In [0]:
# one single file that does everything - creation of Bronze silver, gold and the driver code 
import dlt
from pyspark.sql.functions import current_timestamp, col

# -------------------------------------
# Bronze: land raw CC events into Delta
# -------------------------------------
@dlt.table(
  name="bronze_cc_events",
  comment="Raw CreativeCloud events with audit columns",
  table_properties={
    "pipelines.trigger": "once"    # or "continuous" for streaming
  }
)
def bronze_cc_events():
    raw_path = "dbfs:/tmp/raw/telemetry/"
    schema = """ user_id STRING, app_name STRING, event_type STRING,
                 event_timestamp STRING, device STRUCT<os:STRING,region:STRING> """
    df = (
      spark.read
           .schema(schema)
           .option("multiline", True)
           .json(raw_path)
    )
    return df.withColumn("ingest_ts", current_timestamp()) \
             .withColumn("process_id", dlt.current_timestamp())  # or any unique run ID

# ------------------------------------------------
# Silver: flatten, filter, dedupe & conformance join
# ------------------------------------------------
@dlt.table(
  name="silver_cc_events",
  comment="Flattened, cleansed & conformed events",
  table_properties={ "pipelines.trigger": "once" }
)
@dlt.expect_or_drop("valid_timestamp", "event_timestamp IS NOT NULL")
def silver_cc_events():
    bronze = dlt.read("bronze_cc_events")
    # flatten the device struct
    flat = bronze.select(
      "user_id","app_name","event_type","event_timestamp",
      col("device.os").alias("os"),
      col("device.region").alias("region"),
      "ingest_ts","process_id"
    )
    # de-dupe on natural key
    deduped = flat.dropDuplicates(
      ["user_id","app_name","event_type","event_timestamp"]
    )
    # example conformance join against a small lookup table you’ve registered
    # make sure to create that lookup earlier or as another DLT table
    return deduped.join(
      dlt.read("cc_features"),  # assume you have a LIVE.TABLE cc_features 
      on=["app_name","event_type"],
      how="left"
    )

# -----------------------------------
# Gold: aggregate into business metrics
# -----------------------------------
@dlt.table(
  name="gold_daily_counts",
  comment="Daily event counts by app and feature",
  table_properties={ "pipelines.trigger": "once" }
)
def gold_daily_counts():
    silver = dlt.read("silver_cc_events")
    return (
      silver.withColumn("dt", col("event_timestamp").cast("date"))
            .groupBy("dt","app_name","feature_category")
            .count()
    )