In [0]:
sources = {
    "crm": ['cust_info', 'prd_info', 'sales_details'],
    "erp": ['CUST_AZ12', 'LOC_A101', 'PX_CAT_G1V2']
}

for source_type, entities in sources.items():
    for entity in entities:
        source_dir = f'/Volumes/data_warehouse/source/{source_type}/{entity}.csv'
        target_table = f'data_warehouse.bronze.{entity}'
        
        print(f"Processing entity: {entity}")
        print(f"Source path: {source_dir}")
        print(f"Target table: {target_table}")
        

        # Read CSV
        df = spark.read.format('csv') \
                        .option('header', True) \
                        .option('inferSchema', True) \
                        .load(source_dir)

        row_count = df.count()
        print(f"✅ Loaded {row_count} rows from {entity}.csv")
        print("------------------------------")

        # Truncate Table
        spark.sql(f'TRUNCATE TABLE {target_table}')

        # Write to Delta table
        df.write.format('delta') \
                .mode('append') \
                .option('overwriteSchema', True) \
                .saveAsTable(target_table)


Processing entity: cust_info
Source path: /Volumes/data_warehouse/source/crm/cust_info.csv
Target table: data_warehouse.bronze.cust_info
✅ Loaded 18494 rows from cust_info.csv
------------------------------
Processing entity: prd_info
Source path: /Volumes/data_warehouse/source/crm/prd_info.csv
Target table: data_warehouse.bronze.prd_info
✅ Loaded 397 rows from prd_info.csv
------------------------------
Processing entity: sales_details
Source path: /Volumes/data_warehouse/source/crm/sales_details.csv
Target table: data_warehouse.bronze.sales_details
✅ Loaded 60398 rows from sales_details.csv
------------------------------
Processing entity: CUST_AZ12
Source path: /Volumes/data_warehouse/source/erp/CUST_AZ12.csv
Target table: data_warehouse.bronze.CUST_AZ12
✅ Loaded 18484 rows from CUST_AZ12.csv
------------------------------
Processing entity: LOC_A101
Source path: /Volumes/data_warehouse/source/erp/LOC_A101.csv
Target table: data_warehouse.bronze.LOC_A101
✅ Loaded 18484 rows from LOC