### ============================================
 RAW → BRONZE INGESTION PIPELINE
### ============================================
Purpose:
  - Read raw IoT CSV file from ADLS
  - Apply minimal structure (schema inference)
  - Write Bronze Delta table (no cleaning)
  - This is the ingestion layer ONLY
### ============================================


In [0]:
#PRODUCTION NOTEBOOK 1 — RAW → BRONZE (FINAL, INCREMENTAL, DAILY)
spark.conf.set(
  "fs.azure.account.key.<container>.blob.core.windows.net",
  "<Security Key>"
)


In [0]:
#2. Parameters
dbutils.widgets.text("input_path", "abfss://<container>@<Datalake>.dfs.core.windows.net/project2/raw_data/")
dbutils.widgets.text("date_filter", "")   # e.g. 2026-01-01 00:00:00

input_path   = dbutils.widgets.get("input_path")
date_filter  = dbutils.widgets.get("date_filter")


In [0]:
#3. Read RAW Data
from pyspark.sql.functions import to_timestamp

df_raw = (
    spark.read
        .option("header", "true")
        .option("inferSchema", "true")
        .csv(input_path)
        .withColumn("timestamp", to_timestamp("timestamp", "M/d/yyyy H:mm"))
)

In [0]:
#Apply Incremental Filter (Daily Timestamp‑Based)
if date_filter:
    df_raw = df_raw.filter(f"timestamp >= '{date_filter}'")


In [0]:
#Write to Bronze (UC‑Managed Delta Table)
df_raw.write.format("delta").mode("append").saveAsTable("<custom>.default.project2_bronze")


In [0]:
%sql
-- ============================================
-- VALIDATION / INSPECTION
-- Validation only. Not part of pipeline logic.
-- ============================================

SELECT * FROM uzi.default.project2_bronze LIMIT 20;


device_id,timestamp,temperature,humidity,status
dev-03,2026-01-01T08:22:00Z,28.43,49.57,ON
dev-03,2026-01-01T15:21:00Z,26.09,43.75,OFF
dev-02,2026-01-01T04:48:00Z,28.03,72.43,ERROR
dev-03,2026-01-01T06:00:00Z,32.06,38.2,error
dev-01,2026-01-01T15:45:00Z,31.52,56.92,error
dev-03,2026-01-01T07:03:00Z,16.09,23.15,Off
dev-01,2026-01-01T20:07:00Z,25.76,45.36,Off
dev-01,2026-01-01T23:53:00Z,16.86,45.42,ON
dev-01,2026-01-01T16:54:00Z,19.17,48.48,OFF
dev-02,2026-01-01T03:35:00Z,28.33,57.21,on
