In [0]:
# COMMAND ---------- (cell 1) — Init & widgets
dbutils.widgets.text("lat",  "52.52")      # Berlin default
dbutils.widgets.text("lon",  "13.405")
dbutils.widgets.text("tz",   "Europe/Berlin")
dbutils.widgets.text("hours","168")        # forecast horizon (if no start_date)
dbutils.widgets.text("start_date", "")     # NEW: historical backfill start (YYYY-MM-DD)
dbutils.widgets.text("end_date",   "")     # NEW: historical backfill end (YYYY-MM-DD)

lat        = dbutils.widgets.get("lat")
lon        = dbutils.widgets.get("lon")
tz         = dbutils.widgets.get("tz")
hours      = int(dbutils.widgets.get("hours"))
start_date = dbutils.widgets.get("start_date") or None
end_date   = dbutils.widgets.get("end_date") or None

import requests, pyspark.sql.functions as F
# COMMAND ---------- (cell 2) — Pull JSON from Open-Meteo (Historical or Forecast)

# Use archive endpoint if backfill dates are provided
if start_date and end_date:
    BASE = "https://archive-api.open-meteo.com/v1/archive"
else:
    BASE = "https://api.open-meteo.com/v1/forecast"

params = dict(
    latitude = lat,
    longitude = lon,
    hourly = ",".join([
        "temperature_2m", "relativehumidity_2m",
        "windspeed_10m", "winddirection_10m", "pressure_msl"
    ]),
    timezone = tz
)

# Add backfill dates OR forecast horizon
if start_date and end_date:
    params["start_date"] = start_date
    params["end_date"]   = end_date
else:
    params["forecast_hours"] = hours  # default 7-day forecast

resp = requests.get(BASE, params=params, timeout=20)
resp.raise_for_status()
payload = resp.json()

# COMMAND ---------- (cell 3) — Shape into Spark DF
hourly = payload["hourly"]
rows = list(zip(hourly["time"],
                hourly["temperature_2m"],
                hourly["relativehumidity_2m"],
                hourly["windspeed_10m"],
                hourly["winddirection_10m"],
                hourly["pressure_msl"]))

cols = ["timestamp_utc","temp_c","humidity_pct","wind_speed_kmh","wind_dir_deg","pressure_hpa"]
df = spark.createDataFrame(rows, cols)\
          .withColumn("location_lat", F.lit(float(lat)))\
          .withColumn("location_lon", F.lit(float(lon)))\
          .withColumn("ingest_ts",    F.current_timestamp())\
          .withColumn("ingest_ts_date", F.to_date("ingest_ts"))\
          .withColumn("source_url",   F.lit(resp.url))

# COMMAND ---------- (cell 4) — Write/merge into Delta Bronze
spark.sql("CREATE DATABASE IF NOT EXISTS weather_bronze")
(
  df.write
    .format("delta")
    .mode("append")
    .partitionBy("ingest_ts_date")
    .option("mergeSchema", "true")
    .saveAsTable("weather_bronze.hourly")
)

# Quick helper view (latest record)
spark.sql("""
CREATE OR REPLACE VIEW weather_bronze.hourly_latest AS
SELECT * FROM weather_bronze.hourly 
WHERE timestamp_utc = (SELECT max(timestamp_utc) FROM weather_bronze.hourly)
""")

print("API Used:", BASE)
print("Source URL:", resp.url)
