I'm working as a Python-Dev in a AZURE Blob/File Storage environment. Those are my credentials
Storage Type: Storage account
Access rule: Shared access signature
Available Credentials:
- Connection string
- SAS token
- Blob service SAS URL
- File service SAS URL
Can you please help me to find a python routine to establish a connection, list and download files?
Thanks in advance!
CodePudding user response:
find a python routine to establish a connection, list and download files?
You can use the connection string as the python routine to establish the connection to azure storage account and download the files from azure blob storage.
You have to use the Azure python SDK for storage to download all blobs in a storage container to a specified local folder.
The below sample code will create local folders for blobs which use virtual folder names(name containing slashes):
# Python program to bulk download blob files from azure storage
# Uses latest python SDK() for Azure blob storage
# Requires python 3.6 or above
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
# IMPORTANT: Replace connection string with your storage account connection string
# Usually starts with DefaultEndpointsProtocol=https;...
MY_CONNECTION_STRING = "REPLACE_THIS"
# Replace with blob container
MY_BLOB_CONTAINER = "myimages"
# Replace with the local folder where you want files to be downloaded
LOCAL_BLOB_PATH = "REPLACE_THIS"
class AzureBlobFileDownloader:
def __init__(self):
print("Intializing AzureBlobFileDownloader")
# Initialize the connection to Azure storage account
self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
def save_blob(self,file_name,file_content):
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
with open(download_file_path, "wb") as file:
file.write(file_content)
def download_all_blobs_in_container(self):
my_blobs = self.my_container.list_blobs()
for blob in my_blobs:
print(blob.name)
bytes = self.my_container.get_blob_client(blob).download_blob().readall()
self.save_blob(blob.name, bytes)
# Initialize class and upload files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()
Note: Replace MY_CONNECTION_STRING, LOCAL_BLOB_PATH and MY_BLOB_CONTAINER variables with your values.
Update:
#Place the same above code for connection string,local blob path and my blob container here
def download_all_blobs_in_container(self):
# get a list of blobs
my_blobs = self.my_container.list_blobs()
result = self.run(my_blobs)
print(result)
def run(self,blobs):
# Download 10 files at a time!
with ThreadPool(processes=int(10)) as pool:
return pool.map(self.save_blob_locally, blobs)
def save_blob_locally(self,blob):
file_name = blob.name
print(file_name)
bytes = self.my_container.get_blob_client(blob).download_blob().readall()
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
with open(download_file_path, "wb") as file:
file.write(bytes)
return file_name
# Initialize class and upload files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()