In [0]:
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *

# -----------------------------------------------------------------------------
# 1) Bronze: just land raw data into Delta
# -----------------------------------------------------------------------------
@dlt.table(
    name="bronze_events",
    comment="Raw ingested data; exactly the files you pointed at in 01_Bronze.py"
)
def bronze_events():
    # Copy your spark.read() + write code from 01_Bronze.py here…
    df = (
        spark.read
             .format("json")           # or parquet — whatever your sample does
             .load("/mnt/raw/source_path")
    )
    return df


# -----------------------------------------------------------------------------
# 2) Silver: flatten, dedupe, conform, etc.
# -----------------------------------------------------------------------------
@dlt.table(
    name="silver_events",
    comment="Flattened structs, deduplicated, with feature_category joined on"
)
@dlt.expect_or_drop("valid_event_ts", "event_ts IS NOT NULL")   # example expectation
def silver_events():
    bronze = dlt.read("bronze_events")

    # 2.a) flatten nested structs (if any)
    # Copy your flatten logic: e.g.
    flat = (
        bronze
        .select(
            col("user_id"),
            col("event.app_name").alias("app_name"),
            col("event.event_type").alias("event_type"),
            col("event.event_ts").alias("event_ts"),
            *[c for c in bronze.columns if c not in ("event",)]
        )
    )

    # 2.b) deduplicate on natural key
    deduped = flat.dropDuplicates(
        ["user_id","app_name","event_type","event_ts"]
    )

    # 2.c) conformance join against small lookup table
    feature_cat = (
        spark.table("feature_catalog")  # or read the small lookup from Bronze
    )
    silver = (
        deduped
        .join(
            feature_cat,
            on=["app_name","event_type"],
            how="left"
        )
    )

    return silver


# -----------------------------------------------------------------------------
# 3) Gold: aggregates, roll-ups, business-level tables
# -----------------------------------------------------------------------------
@dlt.table(
    name="gold_event_summary",
    comment="Business-level summary of events per user/app/…"
)
def gold_event_summary():
    silver = dlt.read("silver_events")

    gold = (
        silver
        .groupBy("user_id", "feature_category")
        .agg(
            count("*").alias("event_count"),
            min("event_ts").alias("first_seen"),
            max("event_ts").alias("last_seen")
        )
    )
    return gold

Key points:
	1.	Replace each notebook’s body
Copy your ingestion code from 01_Bronze.py into bronze_events().
Copy your cleaning/flatten/dedupe/join from 02_silver.ipynb into silver_events().
Copy your aggregations from 03_Gold.ipynb into gold_event_summary().
	2.	Declare each stage with @dlt.table
That tells Delta Live Tables how to build each layer in order.
	3.	Remove your old driver
You no longer need the run_notebook() orchestration — DLT takes over scheduling and execution.
	4.	Attach it to a DLT Pipeline
	•	In the UI, go to Jobs & Pipelines → Delta Live Tables
	•	Create a new DLT pipeline and point it at your Medallion_DLT.py file in /Workspace/.../Medallion_DLT.py
	•	Configure the cluster/compute you want it to run on
	•	Click Start
From then on, DLT will execute your three stages in order, manage lineage, enforce expectations, etc.