azure-blob

  1. 操作 Azure Blob Storage
  2. python sdk
  3. Azure CLI
  4. 单纯下载数据
  5. 参考

操作 Azure Blob Storage

在进行云端进行深度学习的时候,我喜欢在本地开发,数据处理也基本在本地进行,处理完的数据要同步到云端,这时候小水管太慢了,我就想到了Azure Blob Storage来进行数据传输,测试下来Azure Blob Storage 在 Azure VM 上传输文件的速度巨快!

python sdk

案例程序如下:

'''
Azure Blob Storage 
    增删改查操作
'''

import os, uuid
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from dotenv import load_dotenv


# 列出Blob中的文件列表
def list_blob_files(blob_service_client:BlobServiceClient,container_name:str=""):
    container_client = blob_service_client.get_container_client(container_name)
    
    print("\nListing blobs...")
    # List the blobs in the container
    blob_list = container_client.list_blobs()
    for blob in blob_list:
        print("\t" + blob.name)


# 查询有哪些的contrainer【容器】
def list_containers(blob_service_client:BlobServiceClient):
    containers = blob_service_client.list_containers()
    for container in containers:
        print("\t" + container.name)



# 创建容器
def create_container(blob_service_client:BlobServiceClient):
    # Create a unique name for the container
    container_name = str(uuid.uuid4())
    # Create the container
    container_client = blob_service_client.create_container(container_name)
    return container_name


# 删除容器
def drop_container(blob_service_client:BlobServiceClient, container_name:str):
    # Clean up
    container_client = blob_service_client.get_container_client(container_name)
    print("Deleting blob container...")
    container_client.delete_container()
    print("Container deleted successfully.")




# 上传文件
def upload_files_to_blob(blob_service_client:BlobServiceClient, container_name:str):
    # Create a local directory to hold blob data
    local_path = "./data"
    os.mkdir(local_path)

    # Create a file in the local data directory to upload and download
    local_file_name = str(uuid.uuid4()) + ".txt"
    upload_file_path = os.path.join(local_path, local_file_name)

    # Write text to the file
    file = open(file=upload_file_path, mode='w')
    file.write("Hello, World!")
    file.close()

    # Create a blob client using the local file name as the name for the blob
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=local_file_name)

    print("\nUploading to Azure Storage as blob:\n\t" + local_file_name)

    # Upload the created file
    with open(file=upload_file_path, mode="rb") as data:
        blob_client.upload_blob(data)


# 下载文件
def download_files_from_blob(blob_service_client,local_path,target_name,container_name):
    # Download the blob to a local file
    # Add 'DOWNLOAD' before the .txt extension so you can see both files in the data directory
    download_file_path = os.path.join(local_path, str.replace(target_name ,'.txt', 'DOWNLOAD.txt'))
    container_client = blob_service_client.get_container_client(container= container_name) 
    print("\nDownloading blob to \n\t" + download_file_path)
    

    with open(file=download_file_path, mode="wb") as download_file:
        download_file.write(container_client.download_blob(target_name).readall())


# 删除文件
def delete_files_from_blob(blob_service_client:BlobServiceClient, container_name, target_name):
    container_client = blob_service_client.get_container_client(container=container_name)
    print("\nDeleting blob:\n\t" + target_name)
    container_client.delete_blob(target_name)
    print("Blob deleted successfully.")


load_dotenv()
try:
    # Quickstart code goes here
    print("Azure Blob Storage Python quickstart sample")
    # 获取KEY
    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    print("Connection string:", connect_str)
    # Create the BlobServiceClient object
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    # 容器名称
    container_name="dbb9ac80-884e-4932-b204-5874b5b5c742"


    # 创建
    # container_name = create_blob(blob_service_client)
    
    # 上传文件
    # upload_files_to_blob(blob_service_client, container_name)


    # 列出container列表 【容器】
    list_containers(blob_service_client)

    # 列出Blob列表
    list_blob_files(blob_service_client,container_name=container_name)

    # 将Blob中的文件下载到本地
    download_files_from_blob(blob_service_client=blob_service_client,local_path="./data",container_name = container_name,target_name="57d0dfec-a940-4a5c-9149-a97f87629e12.txt")

    # 将Blob中的文件删除
    delete_files_from_blob(blob_service_client,target_name="57d0dfec-a940-4a5c-9149-a97f87629e12.txt",container_name =container_name)

    # 销毁container 容器
    drop_container(blob_service_client,container_name=container_name)

except Exception as ex:
    print('Exception:')
    print(ex)

Azure CLI

感觉好JB难用,要是有交互式的就好了

az login

单纯下载数据

推荐直接使用 curlweget 来下载到本地

将数据打包成 tar.gz ,可以使用web app 来将数据上传至 Azure Blob Storage中

先将数据共享

image-20250821153025213

下载到本地

curl <http-links> -o <data.7z>

参考

github