In [5]:
import os
import requests

def download_github_folder(repo_owner, repo_name, folder_path, branch="main", save_dir="."):
    """
    Downloads all files in a folder from a GitHub repository.

    Parameters:
        repo_owner (str): GitHub username or organization name.
        repo_name (str): Name of the repository.
        folder_path (str): Path to the folder in the repository.
        branch (str): Branch name (default: "main").
        save_dir (str): Local directory to save the files (default: current directory).

    Returns:
        None
    """
    # GitHub API URL to list folder contents
    api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{folder_path}?ref={branch}"
    print(api_url)
    
    try:
        # Get the folder contents
        response = requests.get(api_url)
        response.raise_for_status()
        folder_contents = response.json()
        
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
        for item in folder_contents:
            if item["type"] == "file":  # Only process files
                file_url = item["download_url"]  # Raw file URL
                file_name = item["name"]
                file_path = os.path.join(save_dir, file_name)

                # Download each file
                file_response = requests.get(file_url)
                file_response.raise_for_status()
                
                with open(file_path, "wb") as file:
                    file.write(file_response.content)
                
                print(f"Successfully Downloaded: {file_name}")
            else:
                print(f"Skipping folder: {item['name']} (nested folders not supported)")
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")

In [6]:
repo_owner = "vegetariancoder"  # Replace with GitHub username
repo_name = "competitive-programming"  # Replace with the repository name
folder_path = "Projects/OLIST_SALES/dataset/"  # Replace with the folder path in the repo
branch = "main"  # Replace with the branch name if different
save_dir = "./downloads/"  # Replace with the local folder to save files



download_github_folder(repo_owner, repo_name, folder_path, branch, save_dir)


https://api.github.com/repos/vegetariancoder/competitive-programming/contents/Projects/OLIST_SALES/dataset/?ref=main
Successfully Downloaded: olist_customers_dataset.csv
Successfully Downloaded: olist_order_items_dataset.csv
Successfully Downloaded: olist_order_payments_dataset.csv
Successfully Downloaded: olist_order_reviews_dataset.csv
Successfully Downloaded: olist_orders_dataset.csv
Successfully Downloaded: olist_products_dataset.csv
Successfully Downloaded: olist_sellers_dataset.csv
Successfully Downloaded: product_category_name_translation.csv
