## Bronze Notebook

In [0]:
#Step 1: Widgets (Parameters)
dbutils.widgets.text(
  "source_path",
  "/Volumes/workspace/ecommerce/ecommerce_data/2019-Nov.csv"
)
dbutils.widgets.text(
  "bronze_table",
  "bronze_events"
)

In [0]:
#Step 2: Read Widget Values
source_path = dbutils.widgets.get("source_path")
bronze_table = dbutils.widgets.get("bronze_table")

In [0]:
#Step 3: Read Raw CSV
from pyspark.sql import functions as F

df_raw = spark.read.csv(
    source_path,
    header=True,
    inferSchema=True
)

if df_raw.count() == 0:
    raise Exception("No data found in source path")


In [0]:
#Step 4: Add Ingestion Metadata (THIS IS THE KEY)
df_bronze = df_raw.withColumn(
    "ingestion_ts", F.current_timestamp()
)
#This is what allows you to differentiate runs.

In [0]:
#Step 5: Add Ingestion Date (Partition Column)
df_bronze = df_bronze.withColumn(
    "ingestion_date",
    F.to_date("ingestion_ts")
)
#This column is CRITICAL for:Performance,Filtering,Incremental processing

In [0]:
#Step 6: Append to ONE Bronze Table
df_bronze.write.format("delta") \
    .mode("append") \
    .partitionBy("ingestion_date") \
    .saveAsTable(bronze_table)

#One table Many days Automatically separated by date

In [0]:
#Step 7: Verification
print(f"Bronze ingestion completed into {bronze_table}")
df_bronze.groupBy("ingestion_date").count().show()


Bronze ingestion completed into bronze_events
+--------------+--------+
|ingestion_date|   count|
+--------------+--------+
|    2026-01-15|67501979|
+--------------+--------+



In [0]:
%sql
SELECT *
FROM bronze_events
WHERE month(event_time) = 11;

event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session,ingestion_ts,ingestion_date
2019-11-01T00:00:00.000Z,view,1003461,2053013555631882655,electronics.smartphone,xiaomi,489.07,520088904,4d3b30da-a5e4-49df-b1a8-ba5943f1dd33,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:00.000Z,view,5000088,2053013566100866035,appliances.sewing_machine,janome,293.65,530496790,8e5f4f83-366c-4f70-860e-ca7417414283,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:01.000Z,view,17302664,2053013553853497655,,creed,28.31,561587266,755422e7-9040-477b-9bd2-6a6e8fd97387,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:01.000Z,view,3601530,2053013563810775923,appliances.kitchen.washer,lg,712.87,518085591,3bfb58cd-7892-48cc-8020-2f17e6de6e7f,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:01.000Z,view,1004775,2053013555631882655,electronics.smartphone,xiaomi,183.27,558856683,313628f1-68b8-460d-84f6-cec7a8796ef2,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:01.000Z,view,1306894,2053013558920217191,computers.notebook,hp,360.09,520772685,816a59f3-f5ae-4ccd-9b23-82aa8c23d33c,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:01.000Z,view,1306421,2053013558920217191,computers.notebook,hp,514.56,514028527,df8184cc-3694-4549-8c8c-6b5171877376,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:02.000Z,view,15900065,2053013558190408249,,rondell,30.86,518574284,5e6ef132-4d7c-4730-8c7f-85aa4082588f,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:02.000Z,view,12708937,2053013553559896355,,michelin,72.72,532364121,0a899268-31eb-46de-898d-09b2da950b24,2026-01-15T00:55:16.367Z,2026-01-15
2019-11-01T00:00:02.000Z,view,1004258,2053013555631882655,electronics.smartphone,apple,732.07,532647354,d2d3d2c6-631d-489e-9fb5-06f340b85be0,2026-01-15T00:55:16.367Z,2026-01-15
