Home > Blockchain >  Python code to download list of csv files from Azure Blob Storage using SAS token
Python code to download list of csv files from Azure Blob Storage using SAS token

Time:04-28

I am trying to to download a list of csv files from an Azure Blob Storage using a shared SAS token, but I am getting all sorts of errors.

I tried looking this up and tried multiple code samples from contributors on Slackoverflow and Azure documentation. here is the final state of the code sample I constructed from those sources! It tries to download the list of csv files in a pooled manner (blob storage contains 200 csv files):

NB: I left commented code snippets to show different approaches I tried testing. sorry if they are confusing!

from itertools import tee
from multiprocessing import Process
from multiprocessing.pool import ThreadPool
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
#from azure.storage.blob import BlockBlobService

STORAGEACCOUNTURL = "https://myaccount.blob.core.windows.net"
STORAGEACCOUNTKEY = "sv=2020-08-04&si=blobpolicyXYZ&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONTAINERNAME = "mycontainer"
##BLOBNAME = "??"

sas_url = 'https://myaccount.blob.core.windows.net/mycontainer/mydir?sv=2020-08-04&si=blobpolicyXYZ&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
LOCAL_BLOB_PATH = "./downloads"

class AzureBlobFileDownloader:
    def __init__(self):
        print("Intializing AzureBlobFileDownloader")

        # Initialize the connection to Azure storage account
        self.blob_service_client_instance = ContainerClient.from_container_url #BlobClient.from_blob_url(sas_url) #BlobServiceClient(account_url=STORAGEACCOUNTURL, credential=STORAGEACCOUNTKEY)
        #self.blob_client_instance = self.blob_service_client_instance.get_blob_client(CONTAINERNAME, BLOBNAME)
        #self.blob_service_client =  BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
        #self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
        
        #self.blob_service_client = BlockBlobService("storage_account",sas_token="?sv=2018-03-28&ss=bfqt&srt=sco&sp=rwdlacup&se=2019-04-24T10:01:58Z&st=2019-04-23T02:01:58Z&spr=https&sig=xxxxxxxxx")
        #self.my_container = self.blob_service_client.get_blob_to_path("container_name","blob_name","local_file_path")

        
    def save_blob(self,file_name,file_content):
        # Get full path to the file
        download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)

        # for nested blobs, create local path as well!
        os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
        
        with open(download_file_path, "wb") as file:
            file.write(file_content)
    
    def download_all_blobs_in_container(self):
        # get a list of blobs
        my_blobs = self.blob_service_client_instance.get_block_list() #list_blobs() #self.blob_client_instance.list_blobs() download_blob() #
        print(my_blobs)

        #iterate through the iterable object for testing purposes, maybe wrong approach!
        result, result_backup = tee(my_blobs)
        print("**first iterate**")
        for i, r in enumerate(result):
            print(r)
        
        #start downloading my_blobs
        result = self.run(my_blobs)
        print(result)

    def run(self,blobs):
        # Download 3 files at a time!
        with ThreadPool(processes=int(3)) as pool:
            return pool.map(self.save_blob_locally, blobs)

    def save_blob_locally(self,blob):
        file_name = blob.name
        print(file_name)
        bytes = self.blob_service_client_instance.get_blob_client(CONTAINERNAME,blob).download_blob().readall()

        # Get full path to the file
        download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
        # for nested blobs, create local path as well!
        os.makedirs(os.path.dirname(download_file_path), exist_ok=True)

        with open(download_file_path, "wb") as file:
            file.write(bytes)
        return file_name

# Initialize class and download files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()

could someone help me get to achieve this task in python:

  • get a list of all files in the blob storage, those files names are prefixed with part-
  • download them to a folder locally

thanks

CodePudding user response:

could someone help me get to achieve this task in python:

  • get a list of all files in the blob storage, those files names are prefixed with part-

To List all the blobs whose prefix is "part-" you can use blob_service.list_blobs(<Container Name>, prefix="<Your Prefix>"). Below is the code to get the list of blobs for the same.

print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="part-")
for blob in generator:
    print("\t Blob name: "   blob.name)
  • download them to a folder locally

To download the blob you can use blob_client = blob_service.get_blob_to_path(<Container Name>,<Blob Name>,<File Path>). Below is the code to download the blob as per your requirement.

blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

Below is the complete code that worked for us which achieves your requirement.

import os
from azure.storage.blob import BlockBlobService

ACCOUNT_NAME = "<Your_ACCOUNT_NAME>"
ACCOUNT_KEY = "<YOUR_ACCOUNT_KEY>"
CONTAINER_NAME = "<YOUR_CONTAINER_NAME>"
LOCAL_BLOB_PATH = "C:\\<YOUR_PATH>\\downloadedfiles"

blob_service = BlockBlobService(ACCOUNT_NAME, ACCOUNT_KEY)

# Lists All Blobs which has a prefic of part-
print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="part-")
for blob in generator:
    print("\t Blob name: "   blob.name)
    
# Downloading the blob to a folder
for blob in generator:
    
    # Adds blob name to the path 
    fname = os.path.join(LOCAL_BLOB_PATH, blob.name)
    print(f'Downloading {blob.name} to {fname}')

    # Downloading blob into file
    blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

RESULT :

enter image description here

Files in my Storage Account

enter image description here

Files in my Local Folder

enter image description here

Updated Answer

blob_service = BlockBlobService(account_name=ACCOUNT_NAME,account_key=None,sas_token=SAS_TOKEN)

# Lists All Blobs which has a prefic of part-
print("\nList blobs in the container")
generator = blob_service.list_blobs(CONTAINER_NAME, prefix="directory1" "/" "part-")
for blob in generator:
    print("\t Blob name: "   blob.name)
    
# Downloading the blob to a folder
for blob in generator:
    
    # Adds blob name to the path 
    fname = os.path.join(LOCAL_BLOB_PATH, blob.name)
    print(f'Downloading {blob.name} to {fname}')

    # Downloading blob into file
    blob_client = blob_service.get_blob_to_path(CONTAINER_NAME,blob.name,fname)

enter image description here

  • Related