![Medallion Architecture](/Workspace/Users/srav381@gmail.com/Demo1/medallion%20architecture.JPG)

In [0]:
# 01_bronze_ingest.py  (Databricks notebook - Python / PySpark)
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, trim
spark = SparkSession.builder.getOrCreate()

# PARAMETERS (change these if needed)
input_path = "s3://dbr-tasks/input/test_data.csv"   # your uploaded file
bronze_path = "s3://dbr-tasks/bronze/test_data"
bronze_table = "uc_test.demo.bronze_test_data"  # optional catalog.table

In [0]:
# 1) Read raw CSV (keep as strings to preserve raw state)
df_raw = (spark.read
          .option("header", True)
          .option("multiLine", True)   # in case there are commas inside quotes
          .option("inferSchema", False)
          .option("quote", "\"")
          .option("escape", "\"")
          .csv(input_path))
display(df_raw.limit(100))          

ID,Customer_ID,Month,Name,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Type_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Changed_Credit_Limit,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance
1,CUS_0xd40,September,Aaron Maashoh,23,Scientist,19114.12,1824.8433333333328,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan, and Home Equity Loan",3,7,11.27,2022.0,Good,809.98,35.03040185583525,22 Years and 9 Months,No,49.57494921489417,236.64268203272135,Low_spent_Small_value_payments,186.26670208571767
2,CUS_0xd40,October,Aaron Maashoh,24,Scientist,19114.12,1824.8433333333328,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan, and Home Equity Loan",3,9,13.27,4.0,Good,809.98,33.053114497012245,22 Years and 10 Months,No,49.57494921489417,21.465380264657146,High_spent_Medium_value_payments,361.444003853782
3,CUS_0xd40,November,Aaron Maashoh,24,Scientist,19114.12,1824.8433333333328,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan, and Home Equity Loan",-1,4,12.27,4.0,Good,809.98,33.811894118574465,,No,49.57494921489417,148.23393788500925,Low_spent_Medium_value_payments,264.67544623343
4,CUS_0xd40,December,Aaron Maashoh,24_,Scientist,19114.12,,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan, and Home Equity Loan",4,5,11.27,4.0,Good,809.98,32.430559017296765,23 Years and 0 Months,No,49.57494921489417,39.08251089460281,High_spent_Medium_value_payments,343.82687322383634
5,CUS_0x21b1,September,Rick Rothackerj,28,_______,34847.84,3037.986666666666,2,4,6,1,Credit-Builder Loan,3,1,5.42,5.0,Good,605.03,25.92682170377555,27 Years and 3 Months,No,18.816214573128885,39.684018417945296,High_spent_Large_value_payments,485.2984336755923
6,CUS_0x21b1,October,Rick Rothackerj,28,Teacher,34847.84,3037.986666666666,2,4,6,1,Credit-Builder Loan,3,3,5.42,5.0,Good,605.03,30.11660045002501,27 Years and 4 Months,No,18.816214573128885,251.62736875017606,Low_spent_Large_value_payments,303.3550833433617
7,CUS_0x21b1,November,Rick Rothackerj,28,Teacher,34847.84,3037.986666666666,2,4,6,1,Credit-Builder Loan,3,,5.42,5.0,_,605.03,30.996423739085607,27 Years and 5 Months,No,18.816214573128885,72.68014533363515,High_spent_Large_value_payments,452.30230675990265
8,CUS_0x21b1,December,Rick Rothackerj,28,Teacher,34847.84,3037.986666666666,2,4,6,1,Credit-Builder Loan,3,2_,7.42,5.0,_,605.03,33.875167221549795,27 Years and 6 Months,No,18.816214573128885,153.53448761392985,!@9#%8,421.44796447960783
9,CUS_0x2dbc,September,Langep,35,Engineer,143162.64,,1,5,8,3,"Auto Loan, Auto Loan, and Not Specified",8,1942,7.1,3.0,Good,1303.01,35.22970733005963,18 Years and 5 Months,No,246.9923194537421,397.50365354404653,Low_spent_Medium_value_payments,854.2260270022115
10,CUS_0x2dbc,October,Langep,35,Engineer,143162.64,12187.22,1,5,8,3,"Auto Loan, Auto Loan, and Not Specified",6,3,2.1,3.0,Good,1303.01,35.68583594909283,18 Years and 6 Months,No,246.9923194537421,453.6151305781054,Low_spent_Large_value_payments,788.1145499681528


In [0]:

kjmn # 3) Add ingestion metadata (helpful in Bronze)
from pyspark.sql.functions import current_timestamp
df_bronze = df_raw.withColumn("_ingest_ts", current_timestamp()).withColumn("_source_file", lit(input_path))


In [0]:

# 4) Write to Delta
(df_bronze.write
    .format("delta")
    .mode("overwrite")  
    .save(bronze_path))


In [0]:

# 5) Register as table (optional, requires database to exist)
spark.sql(
    f"CREATE TABLE IF NOT EXISTS {bronze_table} USING DELTA LOCATION '{bronze_path}'"
)
print("Bronze write complete:", bronze_path)


Bronze write complete: s3://dbr-tasks/bronze/test_data
