In [0]:
from pyspark.sql import functions as F

# ============================================================
# 1. LOAD SILVER
# ============================================================
df = spark.read.format("delta").load(
    "<output_path>/products"
)

# ============================================================
# 2. BUSINESS RULES
# ============================================================

# Rule: Drop invalid product_id
df = df.filter(F.col("product_id").isNotNull())

# Rule: Clean empty strings â†’ NULL
for c in df.columns:
    df = df.withColumn(c, F.when(F.trim(F.col(c)) == "", None).otherwise(F.col(c)))

# Rule: Safe numeric conversion for price
df = df.withColumn(
    "price",
    F.expr("try_cast(price AS double)")
)

# Rule: Drop invalid price rows
df = df.filter(F.col("price").isNotNull())

# Rule: Round price
df = df.withColumn("price", F.round(F.col("price"), 2))

# Rule: Status cleanup
df = df.withColumn(
    "status",
    F.when(F.trim("status") == "", "unknown")
     .otherwise(F.col("status"))
)

# Rule: Add ingestion_date
df = df.withColumn("ingestion_date", F.current_timestamp())

# ============================================================
# 3. WRITE GOLD
# ============================================================
df.write.format("delta").mode("overwrite").save(
    "<output_path>/gold/products"
)


In [0]:
%sql
SELECT *
FROM delta.`<output_path>/gold/products`;


product_id,product_code,product_name,category,price,status,ingestion_date
P001,TDXU,Product_cAzAf,Home,606.07,discontinued,2026-01-30T19:26:29.935Z
P002,ABC123,Product_JklzZ,Sports,585.44,discontinued,2026-01-30T19:26:29.935Z
P003,ZCEQ,Product_RngqV,Sports,156.61,active,2026-01-30T19:26:29.935Z
P007,INVALIDCODE,Product_RhjRj,Electronics,825.6,active,2026-01-30T19:26:29.935Z
P010,FFPO,Product_SGuMU,Home,439.58,active,2026-01-30T19:26:29.935Z
P011,INVALIDCODE,Product_sWpGt,Home,979.68,discontinued,2026-01-30T19:26:29.935Z
P012,ABC123,Product_qPVRg,Gadgets,106.32,discontinued,2026-01-30T19:26:29.935Z
P014,ABC123,Product_QzApj,Electronics,879.72,active,2026-01-30T19:26:29.935Z
P016,ABC123,Product_gWqRw,Electronics,258.61,active,2026-01-30T19:26:29.935Z
P017,ABC123,Product_NiBii,kitchen,214.51,active,2026-01-30T19:26:29.935Z
