In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # DLT Wrapper for Existing Bronze/Silver/Gold ETL
# MAGIC - This notebook defines **DLT tables** that reference the Delta tables produced by your existing ETL notebooks.
# MAGIC - Optionally, it can **invoke** your existing notebooks via `%run` before creating the DLT tables.
# MAGIC - No changes to your original code or paths.

# COMMAND ----------

import dlt
from pyspark.sql import functions as F

# --------------------------------
# CONFIG: Update only if your table names or notebook paths differ
# --------------------------------
BRONZE_TABLE = "aws_dataingestion.bronze_aws.household"
SILVER_TABLE = "aws_refinement.default.usage_cleaned"
GOLD_DAILY_TABLE = "aws_curateddata.default.daily_summary"

# OPTIONAL: If you want DLT to execute your existing notebooks first, UNCOMMENT these three lines
# Make sure the paths below match the real locations of your notebooks in your workspace.
# COMMAND ----------

# %run /Workspace/Users/venkynani701@gmail.com/bronze_notebook
# %run /Workspace/Users/venkynani701@gmail.com/silver_notebook
# %run /Workspace/Users/venkynani701@gmail.com/gold_notebook

# COMMAND ----------

# BRONZE as a DLT VIEW (raw), then materialize as a DLT TABLE
@dlt.view(
    name="bronze_household_src",
    comment="Source view reading raw Bronze table written by external ETL."
)
def bronze_household_src():
    return spark.read.table(BRONZE_TABLE)

@dlt.table(
    name="bronze_household",
    comment="Bronze layer registered in DLT for lineage/monitoring."
)
def bronze_household():
    return dlt.read("bronze_household_src")

# COMMAND ----------

# SILVER as a DLT VIEW (cleaned), then materialize as a DLT TABLE
@dlt.view(
    name="silver_usage_cleaned_src",
    comment="Source view reading Silver table produced by external ETL."
)
def silver_usage_cleaned_src():
    return spark.read.table(SILVER_TABLE)

@dlt.table(
    name="silver_usage_cleaned",
    comment="Silver layer registered in DLT for lineage/monitoring."
)
def silver_usage_cleaned():
    return dlt.read("silver_usage_cleaned_src")

# Optional: lightweight quality checks visible in DLT UI
@dlt.expect_or_drop("valid_timestamp", "timestamp IS NOT NULL")
@dlt.expect("numeric_non_negative", "Global_active_power >= 0")
@dlt.table(
    name="silver_usage_cleaned_qc",
    comment="Silver with basic expectations applied for observability."
)
def silver_usage_cleaned_qc():
    return dlt.read("silver_usage_cleaned")

# COMMAND ----------

# GOLD (daily summary) as a DLT VIEW + TABLE
@dlt.view(
    name="gold_daily_summary_src",
    comment="Source view reading Gold daily summary produced by external ETL."
)
def gold_daily_summary_src():
    return spark.read.table(GOLD_DAILY_TABLE)

@dlt.table(
    name="gold_daily_summary",
    comment="Gold layer registered in DLT for lineage/monitoring."
)
def gold_daily_summary():
    return dlt.read("gold_daily_summary_src")

# COMMAND ----------

# Optional: derive a small analytics table on top of your Gold to demo DLT lineage
@dlt.table(
    name="gold_top_days_by_active_power",
    comment="Top 10 days by total active power, derived inside DLT for lineage."
)
def gold_top_days_by_active_power():
    return (
        dlt.read("gold_daily_summary")
        .orderBy(F.col("total_active_power_kw").desc())
        .limit(10)
    )
