## SILVER NOTEBOOK

In [0]:
# ==========================
# Step 0: Reset stale widgets
# ==========================
for w in ["bronze_table", "silver_table", "processing_date"]:
    try:
        dbutils.widgets.remove(w)
    except:
        pass

# ==========================
# Step 1: Create widgets with defaults
# ==========================
dbutils.widgets.text("bronze_table", "bronze_events")   # Bronze Delta table
dbutils.widgets.text("silver_table", "silver_events")   # Silver Delta table
dbutils.widgets.text("processing_date", "2026-01-15")   # Default processing date
#It tells the notebook which day's new data to process from the Bronze table.

In [0]:
# ==========================
# Step 2: Read widget values safely
# ==========================
bronze_table = dbutils.widgets.get("bronze_table").strip()
silver_table = dbutils.widgets.get("silver_table").strip()
processing_date = dbutils.widgets.get("processing_date").strip()

# Safety check
if not processing_date:
    raise Exception("❌ processing_date widget value is empty. Provide date in yyyy-MM-dd format")

print(f"🔹 Bronze Table: {bronze_table}")
print(f"🔹 Silver Table: {silver_table}")
print(f"🔹 Processing Date: {processing_date}")

🔹 Bronze Table: bronze_events
🔹 Silver Table: silver_events
🔹 Processing Date: 2026-01-15


In [0]:
# ==========================
# Step 3: Read Bronze Table & Filter Incremental Data
# ==========================
from pyspark.sql import functions as F

df_bronze = spark.table(bronze_table)

# Convert ingestion_ts to date for filtering
df_incremental = df_bronze.filter(
    F.to_date(F.col("ingestion_ts")) == processing_date
)

# Safety check: Ensure there is data
count_incremental = df_incremental.count()
if count_incremental == 0:
    raise Exception(f"❌ No Bronze data found for processing_date = {processing_date}")

print(f"✅ Incremental Bronze rows for {processing_date}: {count_incremental}")

✅ Incremental Bronze rows for 2026-01-15: 67501979


In [0]:
# ==========================
# Step 4: Bronze → Silver transformation
# ==========================
# Example Silver transformations (same as Day 6 logic)
df_silver = df_incremental.dropDuplicates(["user_session", "event_time", "product_id"]) \
    .withColumn("event_date", F.to_date("event_time")) \
    .withColumn("price_tier",
        F.when(F.col("price") < 10, "budget")
         .when(F.col("price") < 50, "mid")
         .otherwise("premium")
    ) \
    .withColumn("ingestion_date", F.to_date("ingestion_ts"))  # optional derived column for easier filtering later

In [0]:
# ==========================
# Step 5: Write to Silver Table (append)
# ==========================
df_silver.write.format("delta").mode("append").option("mergeSchema", "true").saveAsTable(silver_table)

print(f"✅ Silver table updated: {silver_table} for processing_date = {processing_date}")

✅ Silver table updated: silver_events for processing_date = 2026-01-15


In [0]:
%sql
SELECT ingestion_date, COUNT(*) AS rows
FROM silver_events
GROUP BY ingestion_date
ORDER BY ingestion_date DESC;

ingestion_date,rows
2026-01-15,67360187
,67172275


In [0]:
spark.table("silver_events").filter(F.col("ingestion_date") == "2026-01-15").show(5)

+-------------------+----------+----------+-------------------+--------------------+--------+------+---------+--------------------+--------------------+----------+----------+--------------+
|         event_time|event_type|product_id|        category_id|       category_code|   brand| price|  user_id|        user_session|        ingestion_ts|event_date|price_tier|ingestion_date|
+-------------------+----------+----------+-------------------+--------------------+--------+------+---------+--------------------+--------------------+----------+----------+--------------+
|2019-11-01 00:03:31|      view|   2401540|2053013563743667055|appliances.kitche...|   midea|167.29|515761354|b2da6f0a-6eeb-450...|2026-01-15 00:55:...|2019-11-01|   premium|    2026-01-15|
|2019-11-01 00:24:53|      view|   1801887|2053013554415534427|electronics.video.tv|horizont|489.05|513590675|295b71d7-10e2-435...|2026-01-15 00:55:...|2019-11-01|   premium|    2026-01-15|
|2019-11-01 00:26:38|      view|   1801739|2053013