In [7]:
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobPrefix, ContainerClient, BlobProperties, BlobClient

In [8]:
account_url = f"https://wboylesbackups.blob.core.windows.net"
credential = DefaultAzureCredential()

In [9]:
def download_blob(container_name: str, blob_name: str, out_filename: str | None = None):
    if out_filename is None:
        out_filename = container_name + blob_name

    blob_client = BlobClient(account_url, container_name, blob_name, credential=credential)

    with open(out_filename, "wb") as f:
        f.write(blob_client.download_blob().readall())

In [10]:
# Returns list of BlobProperties in preorder traversal
# Does not include folders, only files
# TODO: This does not properly show empty directories
def list_blobs(container_name: str, prefix: str = "") -> list[BlobProperties]:
    blobs: list[BlobProperties] = []
    container_client = ContainerClient(account_url, container_name, credential)

    for blob in container_client.walk_blobs(name_starts_with=prefix):
        name: str = blob.name  # type: ignore

        if isinstance(blob, BlobPrefix):
            blobs += list_blobs(container_name, prefix=name)

        blobs.append(blob)

    return blobs


In [11]:
def create_folder(container_name: str, folder_name: str):
    container_client = ContainerClient(account_url, container_name, credential)

    # This is a hack, but it works better than using the data lake file system
    container_client.upload_blob(f"{folder_name}/_", "").delete_blob()


# blob_client.get_blob_properties().metadata['hdi_isfolder'] == 'true'


In [12]:
def upload_file(container_name: str, local_path: str, azure_path: str):
    blob_client = BlobClient(account_url, container_name, azure_path, credential=credential)
    with open(local_path, "rb") as f:
        blob_client.upload_blob(f)

In [13]:
# "Deletes blobs recursively
# If deleting a directory, do not include a trailing / in the blob_name
def delete_blob(container_name: str, blob_name: str):
    for blob_props in list_blobs(container_name, blob_name):
        blob_name = str(blob_props.name).removesuffix("/")
        blob_client = BlobClient(account_url, container_name, blob_name, credential=credential)

        if not blob_client.exists():
            continue

        blob_client.delete_blob(delete_snapshots='include')

In [20]:
import os

# uploads all files in a local folder
# folder structure is flattened, and all files are in upload/ directory in storage account
# TODO: Can these be uploaded in parallel?
def upload_folder(container_name: str, local_folder_path: str):
    if not os.path.exists(local_folder_path):
        raise FileNotFoundError(f"{local_folder_path} not found")
    if not os.path.isdir(local_folder_path):
        raise NotADirectoryError(f"{local_folder_path} is not a directory")
    
    path = os.path.abspath(local_folder_path)
    for dir in os.walk(path):
        dirpath, _, filenames = dir

        for filename in filenames:
            upload_file(container_name, f"{dirpath}/{filename}", f"upload/{filename}")