In [0]:
# 02_Silver_Transformation
# Purpose: Clean & validate Bronze data into Silver layer

from pyspark.sql.functions import col, to_timestamp

# Read Bronze
bronze_df = spark.table("workspace.ecommerce.bronze_events")

print("Bronze count:", bronze_df.count())

# Clean & normalize
silver_df = bronze_df \
    .withColumn("event_time", to_timestamp(col("event_time"))) \
    .filter(col("event_time").isNotNull()) \
    .filter(col("user_session").isNotNull()) \
    .filter(col("product_id").isNotNull())

# Deduplicate using business key
silver_dedup_df = silver_df.dropDuplicates(
    ["user_session", "event_time", "product_id"]
)

print("Silver count after dedup:", silver_dedup_df.count())

display(silver_dedup_df)

# Write to Silver Delta table
silver_dedup_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("workspace.ecommerce.silver_events")

print("✅ Silver layer written: workspace.ecommerce.silver_events")


Bronze count: 42448764
Silver count after dedup: 42412833


event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
2019-10-31T00:37:25.000Z,view,13200244,2053013557192163841,furniture.bedroom.bed,,87.52,549758857,c5504b1c-cfcf-41a8-9bae-abf288f76695
2019-10-31T00:37:50.000Z,view,19100006,2053013556227473861,construction.tools.saw,,55.09,540302861,7e28139b-3897-4461-b5c0-7a6be3633231
2019-10-31T00:45:25.000Z,view,38900012,2085718636156158307,,,44.53,565938855,8a0098b6-da46-446d-a376-4074fc256e22
2019-10-31T00:45:37.000Z,view,21410743,2053013561579406073,electronics.clocks,tissot,229.09,529624404,9ef78313-d382-46ab-a198-8c6f9ef94cf2
2019-10-31T00:57:48.000Z,view,6100241,2053013560866374351,auto.accessories.radar,playme,102.71,557919943,0a411698-a90d-4353-a5a4-8dc82b51cfd5
2019-10-31T00:57:48.000Z,view,12711053,2053013553559896355,,tunga,32.69,559716471,d61608b4-4b0d-4f20-b174-b0306f812647
2019-10-31T01:15:08.000Z,view,5100337,2053013553341792533,electronics.clocks,apple,318.26,523539992,abfc99c8-ebae-4cdd-8c28-344c31f7a081
2019-10-31T01:19:16.000Z,view,13200667,2053013557192163841,furniture.bedroom.bed,bts,175.01,531726104,ad86ede7-32fd-4ef1-86bc-875028eda668
2019-10-31T01:24:09.000Z,view,3700951,2053013565983425517,appliances.environment.vacuum,bosch,164.48,512391103,ca9fb4ac-bcf0-4dc8-abb0-a575cd0fe6e0
2019-10-31T01:24:33.000Z,view,2800424,2053013563835941749,appliances.kitchen.refrigerators,haier,231.64,558486323,84525e9c-11c2-4f3d-ac00-05da0e8c4667


✅ Silver layer written: workspace.ecommerce.silver_events
