## Introduction
The script automates the process of downloading AdventureWorks data from GitHub, processing it, and saving it in both Parquet and CSV formats. The processed data is stored in both the lakehouse files directory and in Delta tables for further analysis and use.

In [11]:
import pandas as pd
from tqdm.auto import tqdm
import os

base = "https://raw.githubusercontent.com/vigneshmestha14/Fabric_DP-700/main/AdventureWorks"
lakehouse_dir = "/lakehouse/default/Files/AdventureWorks"
csv_dir = os.path.join(lakehouse_dir, "csvfiles")
parquet_dir = os.path.join(lakehouse_dir, "parquetfiles")

# Create the subdirectories if they don't exist
os.makedirs(csv_dir, exist_ok=True)
os.makedirs(parquet_dir, exist_ok=True)

# Load list of tables
df_tables = pd.read_csv(f"{base}/adventureworks.csv", names=["table"])
print("Loaded list of tables:")

for table in (pbar := tqdm(df_tables['table'].values)):
    pbar.set_description(f"Uploading {table} to lakehouse")

    # Download
    df = pd.read_parquet(f"{base}/{table}.parquet")
    print(f"Downloaded {table}.parquet")

    # Save as lakehouse table
    spark.createDataFrame(df).write.mode('overwrite').saveAsTable(table)
    print(f"Saved {table} to lakehouse table")

    # Save as local Parquet file
    parquet_output_path = os.path.join(parquet_dir, f"{table}.parquet")
    df.to_parquet(parquet_output_path)
    print(f"Saved {table} to {parquet_output_path}")

    # Save as local CSV file
    csv_output_path = os.path.join(csv_dir, f"{table}.csv")
    df.to_csv(csv_output_path, index=False)
    print(f"Saved {table} to {csv_output_path}")

StatementMeta(, 6770bff8-b6e0-494d-a4ee-01010fb65493, 15, Submitted, Running, Running)

StatementMeta(, 6770bff8-b6e0-494d-a4ee-01010fb65493, 14, Finished, Available, Finished)

Loaded list of tables:


  0%|          | 0/28 [00:00<?, ?it/s]

Downloaded DimDate.parquet
Saved DimDate to lakehouse table
Saved DimDate to /lakehouse/default/Files/AdventureWorks/parquetfiles/DimDate.parquet
Saved DimDate to /lakehouse/default/Files/AdventureWorks/csvfiles/DimDate.csv
Downloaded DimCustomer.parquet
Saved DimCustomer to lakehouse table
Saved DimCustomer to /lakehouse/default/Files/AdventureWorks/parquetfiles/DimCustomer.parquet
Saved DimCustomer to /lakehouse/default/Files/AdventureWorks/csvfiles/DimCustomer.csv
Downloaded DimCurrency.parquet
Saved DimCurrency to lakehouse table
Saved DimCurrency to /lakehouse/default/Files/AdventureWorks/parquetfiles/DimCurrency.parquet
Saved DimCurrency to /lakehouse/default/Files/AdventureWorks/csvfiles/DimCurrency.csv
Downloaded DimAccount.parquet
Saved DimAccount to lakehouse table
Saved DimAccount to /lakehouse/default/Files/AdventureWorks/parquetfiles/DimAccount.parquet
Saved DimAccount to /lakehouse/default/Files/AdventureWorks/csvfiles/DimAccount.csv
Downloaded DimOrganization.parquet
Sav