In [0]:
# COMMAND ----------  (cell 1) — Init & widgets
dbutils.widgets.text("lat",  "52.52")     # Berlin default
dbutils.widgets.text("lon",  "13.405")
dbutils.widgets.text("tz",   "Europe/Berlin")
dbutils.widgets.text("hours","168")       # forecast horizon
lat      = dbutils.widgets.get("lat")
lon      = dbutils.widgets.get("lon")
tz       = dbutils.widgets.get("tz")
hours    = int(dbutils.widgets.get("hours"))

# COMMAND ----------  (cell 2) — Pull JSON from Open-Meteo
import requests, json, datetime, pyspark.sql.functions as F
BASE = "https://api.open-meteo.com/v1/forecast"
params = dict(
    latitude = lat,
    longitude = lon,
    hourly = ",".join([
        "temperature_2m","relativehumidity_2m",
        "windspeed_10m","winddirection_10m","pressure_msl"
    ]),
    timezone = tz,
    forecast_hours = hours          # returns up to 168 h (7 days) :contentReference[oaicite:1]{index=1}
)
resp = requests.get(BASE, params=params, timeout=10)
resp.raise_for_status()
payload = resp.json()

# COMMAND ----------  (cell 3) — Shape into Spark DF
# Explode hourly arrays
hourly = payload["hourly"]
rows = list(zip(hourly["time"],
                hourly["temperature_2m"],
                hourly["relativehumidity_2m"],
                hourly["windspeed_10m"],
                hourly["winddirection_10m"],
                hourly["pressure_msl"]))

cols = ["timestamp_utc","temp_c","humidity_pct","wind_speed_kmh","wind_dir_deg","pressure_hpa"]
df = spark.createDataFrame(rows, cols)\
          .withColumn("location_lat", F.lit(float(lat)))\
          .withColumn("location_lon", F.lit(float(lon)))\
          .withColumn("ingest_ts",    F.current_timestamp())\
          .withColumn("ingest_ts_date", F.to_date("ingest_ts"))\
          .withColumn("source_url",   F.lit(resp.url))

# COMMAND ----------  (cell 4) — Write/merge into Delta Bronze
spark.sql("CREATE DATABASE IF NOT EXISTS weather_bronze")
(
  df.write
    .format("delta")
    .mode("append")
    .partitionBy("ingest_ts_date")         # derived below for fast purges
    .option("mergeSchema","true")
    .saveAsTable("weather_bronze.hourly")
)

# Quick helper view
spark.sql("""
CREATE OR REPLACE VIEW weather_bronze.hourly_latest AS
SELECT * FROM weather_bronze.hourly 
WHERE timestamp_utc = (SELECT max(timestamp_utc) FROM weather_bronze.hourly)
""")


In [0]:
# spark.sql("USE weather_bronze")

# # 1️⃣ Does the table exist?
# spark.sql("SHOW TABLES").show()

# # 2️⃣ Row count and a peek
# spark.sql("""
#     SELECT COUNT(*) AS rows,
#            MIN(timestamp_utc) AS oldest,
#            MAX(timestamp_utc) AS newest
#     FROM weather_bronze.hourly
# """).show()

# # 3️⃣ Sample 5 rows
# spark.table("weather_bronze.hourly").show(5, truncate=False)
