In [1]:
from huggingface_hub import list_repo_files, hf_hub_download
import concurrent.futures
import os

# List all files under vox2/vox2_dev_*
files = list_repo_files(
    repo_id="ProgramComputer/voxceleb",
    repo_type="dataset",
)

# Filter for vox2_dev files
files = [f for f in files if f.startswith("vox2/vox2_dev_")]

# Create the output directory if it doesn't exist
os.makedirs("vox2_data", exist_ok=True)

# Function to download a single file
def download_file(file_path):
    try:
        local = hf_hub_download(
            repo_id="ProgramComputer/voxceleb",
            repo_type="dataset",
            filename=file_path,
            local_dir="vox2_data"
        )
        return f"Downloaded: {local}"
    except Exception as e:
        return f"Error downloading {file_path}: {str(e)}"

# Download files in parallel using ThreadPoolExecutor
print(f"Downloading {len(files)} vox2_dev files with 8 workers...")
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    future_to_file = {executor.submit(download_file, file_path): file_path for file_path in files}
    
    for future in concurrent.futures.as_completed(future_to_file):
        file_path = future_to_file[future]
        try:
            result = future.result()
            print(result)
        except Exception as e:
            print(f"Exception for {file_path}: {e}")


  from .autonotebook import tqdm as notebook_tqdm


Downloading 18 vox2_dev files with 8 workers...
Downloaded: vox2_data/vox2/vox2_dev_aac_partac
Downloaded: vox2_data/vox2/vox2_dev_aac_partab
Downloaded: vox2_data/vox2/vox2_dev_aac_partaf
Downloaded: vox2_data/vox2/vox2_dev_aac_partad
Downloaded: vox2_data/vox2/vox2_dev_aac_partah
Downloaded: vox2_data/vox2/vox2_dev_aac_partag
Downloaded: vox2_data/vox2/vox2_dev_aac_partae
Downloaded: vox2_data/vox2/vox2_dev_aac_partaa
Downloaded: vox2_data/vox2/vox2_dev_mp4_partad
Downloaded: vox2_data/vox2/vox2_dev_mp4_partac
Downloaded: vox2_data/vox2/vox2_dev_mp4_partae
Downloaded: vox2_data/vox2/vox2_dev_mp4_partaa
Downloaded: vox2_data/vox2/vox2_dev_txt.zip
Downloaded: vox2_data/vox2/vox2_dev_mp4_partag
Downloaded: vox2_data/vox2/vox2_dev_mp4_partaf
Downloaded: vox2_data/vox2/vox2_dev_mp4_partai
Downloaded: vox2_data/vox2/vox2_dev_mp4_partah
Downloaded: vox2_data/vox2/vox2_dev_mp4_partab


In [None]:

# 2. Download each one into a local folder
for filepath in files:
    local_path = hf_hub_download(
        repo_id="ProgramComputer/voxceleb",
        repo_type="dataset",
        filename=filepath,
        local_dir="voxceleb_data"
    )
    print("Downloaded to:", local_path)
