In [0]:
 Databricks notebook: traffic_dev_job_runner

# ── Widgets (tweak then Run All) ────────────────────────────────────────────────
dbutils.widgets.text("env", "dev")
dbutils.widgets.text("batch_start", "2025-01-01")
dbutils.widgets.text("batch_end", "2025-05-31")
dbutils.widgets.text("stream_pattern", "VSDATA_202506*.csv")

dbutils.widgets.dropdown("do_setup", "true", ["true", "false"])
dbutils.widgets.dropdown("do_bronze_batch", "true", ["true", "false"])
dbutils.widgets.dropdown("do_bronze_stream", "true", ["true", "false"])
dbutils.widgets.dropdown("do_silver", "true", ["true", "false"])
dbutils.widgets.dropdown("do_gold", "true", ["true", "false"])

# ── Read widget values ─────────────────────────────────────────────────────────
env                = dbutils.widgets.get("env")
batch_start        = dbutils.widgets.get("batch_start")
batch_end          = dbutils.widgets.get("batch_end")
stream_pattern     = dbutils.widgets.get("stream_pattern")

do_setup           = dbutils.widgets.get("do_setup") == "true"
do_bronze_batch    = dbutils.widgets.get("do_bronze_batch") == "true"
do_bronze_stream   = dbutils.widgets.get("do_bronze_stream") == "true"
do_silver          = dbutils.widgets.get("do_silver") == "true"
do_gold            = dbutils.widgets.get("do_gold") == "true"

# ── Imports from your modules (paths unchanged) ────────────────────────────────
from config import conf, Config
from setup import SetupHelper
from bronze_loader import LoadRawTraffic
from silver_loader import SilverLoader, create_region_lookup
from gold_loader import make_foreach_batch as make_gold_foreach_batch

from pyspark.sql import SparkSession

spark = SparkSession.getActiveSession() or SparkSession.builder.getOrCreate()

# Ensure we operate in the selected catalog/db
spark.sql(f"USE CATALOG {conf.catalog}")
spark.sql(f"USE {conf.db_name}")

# ── Helpers ───────────────────────────────────────────────────────────────────
import time

def t():
    return time.strftime("%Y-%m-%d %H:%M:%S")

def banner(msg):
    print(f"\n{'='*80}\n[{t()}] {msg}\n{'='*80}")

# ── 1) SETUP (create DB and empty tables) ─────────────────────────────────────
if do_setup:
    banner("SETUP: creating/validating database and base tables")
    sh = SetupHelper(catalog=conf.catalog)
    sh.setup()
    sh.validate()

# ── 2) BRONZE LOAD (batch + optional stream) ──────────────────────────────────
if do_bronze_batch or do_bronze_stream:
    banner("BRONZE: loading raw CSVs into bronze table")
    bronze = LoadRawTraffic(catalog=conf.catalog, table_name=conf.bronze_table)

    # Make sure database exists in current session (idempotent)
    bronze.create_db()

    if do_bronze_batch:
        print(f"[{t()}] Bronze BATCH load: {batch_start} → {batch_end}")
        bronze.batch_load(start_date=batch_start, end_date=batch_end)

    if do_bronze_stream:
        print(f"[{t()}] Bronze STREAM load pattern: {stream_pattern}")
        bronze.stream_load(file_pattern=stream_pattern)

    bronze.validate_table()

# ── 3) SILVER (streaming foreachBatch transform) ──────────────────────────────
if do_silver:
    banner("SILVER: unpivot + dim tables via foreachBatch (trigger=once)")
    # Create region lookup once (idempotent overwrite)
    create_region_lookup(spark)

    bronze_table_fqn = conf.table_fqn(conf.bronze_table)
    silver_table_fqn = conf.table_fqn(conf.silver_table)
    region_lookup_fqn = conf.table_fqn(conf.region_lookup)
    silver_checkpoint = f"{conf.checkpoint_base}/silver/{conf.silver_table}"

    streaming_df = spark.readStream.format("delta").table(bronze_table_fqn)
    foreach_batch_fn = SilverLoader.make_foreach_batch(
        silver_table_fqn,
        region_lookup_fqn
    )

    q_silver = (
        streaming_df.writeStream
        .foreachBatch(foreach_batch_fn)
        .option("checkpointLocation", silver_checkpoint)
        .outputMode("append")
        .trigger(once=True)
        .start()
    )
    q_silver.awaitTermination()
    print(f"[{t()}] SILVER complete.")

# ── 4) GOLD (streaming foreachBatch aggregates) ───────────────────────────────
if do_gold:
    banner("GOLD: rollups via foreachBatch (trigger=once)")
    silver_table_fqn = conf.table_fqn(conf.silver_table)
    region_lookup_fqn = conf.table_fqn(conf.region_lookup)
    # Keep your original gold checkpoint base on /tmp
    gold_checkpoint = f"/tmp/checkpoints/gold/{conf.silver_table}"

    streaming_df = spark.readStream.format("delta").table(silver_table_fqn)
    foreach_batch_gold = make_gold_foreach_batch(
        conf.catalog,
        conf.db_name,
        region_lookup_fqn
    )

    q_gold = (
        streaming_df.writeStream
        .foreachBatch(foreach_batch_gold)
        .option("checkpointLocation", gold_checkpoint)
        .outputMode("append")
        .trigger(once=True)
        .start()
    )
    q_gold.awaitTermination()
    print(f"[{t()}] GOLD complete.")

# ── 5) Summary ────────────────────────────────────────────────────────────────
banner("SUMMARY COUNTS")

def safe_count(tn):
    try:
        return spark.table(tn).count()
    except Exception as e:
        return f"⚠️ {e}"

print(f"Bronze: {conf.table_fqn(conf.bronze_table)} -> {safe_count(conf.table_fqn(conf.bronze_table))}")
print(f"Silver: {conf.table_fqn(conf.silver_table)} -> {safe_count(conf.table_fqn(conf.silver_table))}")
for tn in [
    "traffic_gold_region_hourly",
    "traffic_gold_detector_hourly",
    "traffic_gold_region_monthly",
    "traffic_gold_detector_congestion",
]:
    fqn = conf.table_fqn(tn)
    print(f"Gold:   {fqn} -> {safe_count(fqn)}")

dbutils.notebook.exit("✅ Job finished")
