# NOAA Sensors (Puerto Rico) - Resilient Water Level Hydrograph

GitHub: https://github.com/yagaC64/Spring2026DAEN

License: https://github.com/yagaC64/Spring2026DAEN/blob/main/LICENSE

This notebook is the non-AGOL, data-science/student version of the Puerto Rico NOAA workflow.

What this notebook does:
- builds the station list algorithmically from NOAA MDAPI (no hardwired dependency required),
- applies resilient station resolution and compatibility filtering,
- fetches observed water-level time series with robust request/error handling,
- exports one comprehensive standalone HTML report.

Output policy:
- exactly one HTML file is produced (`OUTPUT_HTML`).
- CSV exports are also written for analysis/reproducibility.


In [None]:
# Cell 1: Install and import libraries
# =================================================================================
import importlib.util
import logging
import os
import subprocess
import sys
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path


def ensure_packages(packages):
    missing = [pkg for pkg in packages if importlib.util.find_spec(pkg) is None]
    if missing:
        print(f"Installing missing packages: {missing}")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", *missing])


ensure_packages(["pandas", "requests", "plotly"])

import pandas as pd
import requests
import plotly.graph_objects as go
from plotly.subplots import make_subplots

try:
    from IPython.display import display
except ImportError:
    display = print

logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
logger = logging.getLogger("noaa-pr-hydrograph")

print("Cell 1 complete.")


In [None]:
# Cell 2: Configuration
# =================================================================================
NOAA_STATE = "PR"
NOAA_PRODUCT = os.environ.get("NOAA_PRODUCT", "water_level")
NOAA_DATUM = os.environ.get("NOAA_DATUM", "MLLW")
NOAA_TIME_ZONE = os.environ.get("NOAA_TIME_ZONE", "gmt")
NOAA_UNITS = os.environ.get("NOAA_UNITS", "metric")

LOOKBACK_HOURS = int(os.environ.get("LOOKBACK_HOURS", str(int(os.environ.get("LOOKBACK_DAYS", "7")) * 24)))

# Station selection strategy (algorithmic first)
RAW_STATION_IDS = os.environ.get("NOAA_STATION_IDS", "").strip()
EXCLUDE_STATION_IDS = os.environ.get("NOAA_EXCLUDE_STATION_IDS", "").strip()
MAX_ACTIVE_STATIONS = int(os.environ.get("MAX_ACTIVE_STATIONS", "100"))
REQUIRE_TIDAL_FOR_DATUM = os.environ.get("REQUIRE_TIDAL_FOR_DATUM", "true").lower() in ("1", "true", "yes")

# Catalog safety controls
CATALOG_TIMEOUT_SECONDS = int(os.environ.get("CATALOG_TIMEOUT_SECONDS", "90"))
CATALOG_MAX_BYTES = int(os.environ.get("CATALOG_MAX_BYTES", "5000000"))
CATALOG_MAX_ROWS = int(os.environ.get("CATALOG_MAX_ROWS", "5000"))

# Local outputs (single HTML + CSV files)
OUTPUT_DIR = Path(os.environ.get("OUTPUT_DIR", "outputs/noaa_pr"))
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_HTML = OUTPUT_DIR / "noaa_pr_waterlevel_comprehensive.html"
OUTPUT_CSV = OUTPUT_DIR / "noaa_pr_water_levels_timeseries.csv"
OUTPUT_STATION_CSV = OUTPUT_DIR / "noaa_pr_station_summary.csv"

# Puerto Rico geographic guardrails
PR_BBOX = {
    "min_lon": -68.5,
    "max_lon": -65.0,
    "min_lat": 17.5,
    "max_lat": 18.9,
}

# Minimal coordinate fallbacks for continuity if metadata gaps appear
STATION_COORDS_FALLBACK = {
    "9755371": (18.4655, -66.1061),
    "9759110": (17.9733, -67.0469),
    "9759938": (17.9691, -67.0464),
}

RUN_UTC = datetime.now(timezone.utc)
BEGIN_UTC = RUN_UTC - timedelta(hours=LOOKBACK_HOURS)
BEGIN_DATE = BEGIN_UTC.strftime("%Y%m%d %H:%M")
END_DATE = RUN_UTC.strftime("%Y%m%d %H:%M")

MDAPI_BASE = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
DATAGETTER_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"

print("Configuration loaded:")
print(f"  NOAA state: {NOAA_STATE}")
print(f"  Product/datum/units: {NOAA_PRODUCT} / {NOAA_DATUM} / {NOAA_UNITS}")
print(f"  Lookback hours: {LOOKBACK_HOURS}")
print(f"  Manual station IDs override: {RAW_STATION_IDS or '[none: use live catalog]'}")
print(f"  Excluded station IDs: {EXCLUDE_STATION_IDS or '[none]'}")
print(f"  Max active stations: {MAX_ACTIVE_STATIONS}")
print(f"  Require tidal stations for datum compatibility: {REQUIRE_TIDAL_FOR_DATUM}")
print(f"  Catalog timeout/bytes/rows: {CATALOG_TIMEOUT_SECONDS}s / {CATALOG_MAX_BYTES} / {CATALOG_MAX_ROWS}")
print(f"  Output directory: {OUTPUT_DIR}")


In [None]:
# Cell 3: Helper functions
# =================================================================================
def api_get_json(url, params=None, timeout=60, retries=3, backoff_seconds=2, max_bytes=None):
    last_err = None
    for attempt in range(1, retries + 1):
        try:
            response = requests.get(url, params=params, timeout=timeout)

            if max_bytes is not None:
                payload_size = len(response.content or b"")
                if payload_size > max_bytes:
                    raise RuntimeError(
                        f"Payload size {payload_size} bytes exceeded limit {max_bytes} bytes for {url}"
                    )

            if response.status_code >= 400:
                message = ""
                try:
                    err_payload = response.json()
                    message = (err_payload.get("error") or {}).get("message", "")
                except Exception:
                    message = (response.text or "").strip()[:300]

                # 4xx other than timeout/rate-limit are usually non-transient (do not retry).
                if 400 <= response.status_code < 500 and response.status_code not in (408, 429):
                    raise RuntimeError(
                        f"Non-retryable NOAA HTTP {response.status_code}: {message or 'client error'}"
                    )

                response.raise_for_status()

            return response.json()

        except Exception as exc:
            last_err = exc
            if "Non-retryable NOAA HTTP" in str(exc):
                break

            if attempt < retries:
                sleep_for = backoff_seconds ** attempt
                logger.warning(
                    "Request failed (attempt %s/%s). Retrying in %ss. %s",
                    attempt,
                    retries,
                    sleep_for,
                    exc,
                )
                time.sleep(sleep_for)

    raise RuntimeError(f"NOAA request failed after {retries} attempts: {last_err}")


def parse_station_csv(value):
    return [sid.strip() for sid in str(value).split(",") if sid and sid.strip()]


def in_pr_bbox(lat, lon, bbox=PR_BBOX):
    return bbox["min_lon"] <= lon <= bbox["max_lon"] and bbox["min_lat"] <= lat <= bbox["max_lat"]


def valid_lat_lon(lat, lon):
    if pd.isna(lat) or pd.isna(lon):
        return False
    lat = float(lat)
    lon = float(lon)
    if abs(lat) < 1e-9 and abs(lon) < 1e-9:
        return False
    return -90 <= lat <= 90 and -180 <= lon <= 180


def split_primary_flag(flag_value):
    if flag_value is None or (isinstance(flag_value, float) and pd.isna(flag_value)):
        return 0
    text = str(flag_value)
    try:
        return int(text.split(",")[0])
    except Exception:
        return 0


def get_pr_station_catalog(state="PR", timeout_seconds=90, max_bytes=5000000, max_rows=5000):
    payload = api_get_json(
        f"{MDAPI_BASE}/stations.json",
        params={"state": state},
        timeout=timeout_seconds,
        max_bytes=max_bytes,
    )
    rows = payload.get("stations", [])
    if not rows:
        raise RuntimeError("No stations returned by NOAA MDAPI for the configured state.")

    if max_rows > 0 and len(rows) > max_rows:
        raise RuntimeError(
            f"Catalog row count {len(rows)} exceeded CATALOG_MAX_ROWS={max_rows}. Increase threshold if expected."
        )

    df = pd.DataFrame(rows)
    if "id" in df.columns:
        df["id"] = df["id"].astype(str).str.strip()

    for c in ["lat", "lng"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    if {"lat", "lng"}.issubset(df.columns):
        df = df[
            df.apply(
                lambda r: in_pr_bbox(float(r["lat"]), float(r["lng"])) if pd.notna(r["lat"]) and pd.notna(r["lng"]) else False,
                axis=1,
            )
        ].copy()

    keep = [c for c in ["id", "name", "state", "lat", "lng", "shefcode", "tidal"] if c in df.columns]
    return df[keep].drop_duplicates(subset=["id"]).sort_values("id").reset_index(drop=True)


def resolve_station_ids(catalog_df, raw_station_ids="", exclude_station_ids="", max_active_stations=100):
    catalog_ids = sorted(catalog_df["id"].astype(str).tolist())
    catalog_id_set = set(catalog_ids)

    requested_ids = parse_station_csv(raw_station_ids)
    excluded_ids = set(parse_station_csv(exclude_station_ids))

    invalid_requested = [sid for sid in requested_ids if sid not in catalog_id_set]

    if requested_ids:
        selected = [sid for sid in requested_ids if sid in catalog_id_set]
        selection_mode = "manual intersect live catalog"
        if not selected:
            logger.warning("No manual station IDs matched live catalog; falling back to full live catalog.")
            selected = catalog_ids
            selection_mode = "auto fallback (full live catalog)"
    else:
        selected = catalog_ids
        selection_mode = "auto (full live catalog)"

    if excluded_ids:
        selected = [sid for sid in selected if sid not in excluded_ids]

    if max_active_stations > 0 and len(selected) > max_active_stations:
        logger.warning(
            "Selected station count %s exceeds MAX_ACTIVE_STATIONS=%s; truncating list.",
            len(selected),
            max_active_stations,
        )
        selected = selected[:max_active_stations]

    return selected, invalid_requested, sorted(excluded_ids), selection_mode


def get_station_metadata(station_id):
    payload = api_get_json(
        f"{MDAPI_BASE}/stations/{station_id}.json",
        params={"expand": "floodlevels,details,sensors"},
        timeout=90,
        max_bytes=CATALOG_MAX_BYTES,
    )
    stations = payload.get("stations", [])
    if not stations:
        raise RuntimeError(f"NOAA MDAPI did not return metadata for station {station_id}.")
    return stations[0]


def fetch_station_observations(station_id, begin_date, end_date):
    params = {
        "product": NOAA_PRODUCT,
        "application": "GMU_DAEN_PR",
        "begin_date": begin_date,
        "end_date": end_date,
        "datum": NOAA_DATUM,
        "station": station_id,
        "time_zone": NOAA_TIME_ZONE,
        "units": NOAA_UNITS,
        "format": "json",
    }
    payload = api_get_json(DATAGETTER_URL, params=params, timeout=120, max_bytes=CATALOG_MAX_BYTES)

    if "error" in payload:
        message = (payload["error"] or {}).get("message", "")
        if "No data was found" in message:
            return pd.DataFrame(columns=["t", "v", "s", "f", "q"])
        raise RuntimeError(f"NOAA API error for station {station_id}: {payload['error']}")

    rows = payload.get("data", [])
    if not rows:
        return pd.DataFrame(columns=["t", "v", "s", "f", "q"])

    df = pd.DataFrame(rows)
    for c in ["v", "s"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    return df


print("Helper functions ready.")


In [None]:
# Cell 4: DataFrame display options
# =================================================================================
pd.set_option("mode.chained_assignment", None)
pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

print("Cell 4 complete.")


## NOAA Field Definitions (`time_utc`, `water_level`, `sigma`, `flags`, `quality`)

Field meanings used in this notebook:
- `time_utc`: observation timestamp in UTC.
- `water_level`: observed water level (`NOAA_UNITS`) based on selected datum (`NOAA_DATUM`).
- `sigma`: NOAA sigma value for the measurement.
- `flags`: comma-separated QA/QC flags from NOAA.
- `quality`: quality code (`p` preliminary, `v` verified).

References:
- CO-OPS Data API: [https://api.tidesandcurrents.noaa.gov/api/prod/](https://api.tidesandcurrents.noaa.gov/api/prod/)
- CO-OPS response help: [https://api.tidesandcurrents.noaa.gov/api/prod/responseHelp.html](https://api.tidesandcurrents.noaa.gov/api/prod/responseHelp.html)
- CO-OPS Metadata API: [https://api.tidesandcurrents.noaa.gov/mdapi/prod/](https://api.tidesandcurrents.noaa.gov/mdapi/prod/)


In [None]:
# Cell 5: Build station list from live catalog and fetch observations
# =================================================================================
catalog_pull_utc = datetime.now(timezone.utc)
catalog_df = get_pr_station_catalog(
    state=NOAA_STATE,
    timeout_seconds=CATALOG_TIMEOUT_SECONDS,
    max_bytes=CATALOG_MAX_BYTES,
    max_rows=CATALOG_MAX_ROWS,
)

logger.info("Catalog pull UTC: %s", catalog_pull_utc.strftime("%Y-%m-%d %H:%M:%S %Z"))
logger.info("Puerto Rico station catalog rows after PR filter: %s", len(catalog_df))

if REQUIRE_TIDAL_FOR_DATUM and NOAA_DATUM.upper() != "STND" and "tidal" in catalog_df.columns:
    pre_filter_count = len(catalog_df)
    catalog_df["tidal"] = catalog_df["tidal"].fillna(False).astype(bool)
    catalog_df = catalog_df[catalog_df["tidal"]].copy()
    logger.info(
        "Datum compatibility filter (tidal only for datum=%s): %s -> %s stations",
        NOAA_DATUM,
        pre_filter_count,
        len(catalog_df),
    )

display(catalog_df.head(30))

STATION_IDS, invalid_requested_ids, excluded_ids, station_selection_mode = resolve_station_ids(
    catalog_df,
    raw_station_ids=RAW_STATION_IDS,
    exclude_station_ids=EXCLUDE_STATION_IDS,
    max_active_stations=MAX_ACTIVE_STATIONS,
)

print("Station selection summary:")
print(f"  Selection mode: {station_selection_mode}")
print(f"  Selected station count: {len(STATION_IDS)}")
print(f"  Selected station IDs: {', '.join(STATION_IDS) if STATION_IDS else '[none]'}")
if invalid_requested_ids:
    print(f"  Ignored non-catalog station IDs: {', '.join(invalid_requested_ids)}")
if excluded_ids:
    print(f"  Excluded station IDs applied: {', '.join(excluded_ids)}")

if not STATION_IDS:
    raise RuntimeError("No active station IDs available after catalog resolution and exclusions.")

station_rows = []
for sid in STATION_IDS:
    try:
        meta = get_station_metadata(sid)
    except Exception as exc:
        logger.warning("Skipping station %s due to metadata error: %s", sid, exc)
        continue

    name = meta.get("name", sid)
    shefcode = meta.get("shefcode")

    lat = pd.to_numeric(meta.get("lat"), errors="coerce")
    lon = pd.to_numeric(meta.get("lng"), errors="coerce")

    if not valid_lat_lon(lat, lon):
        fallback = STATION_COORDS_FALLBACK.get(sid)
        if fallback:
            lat, lon = fallback
            logger.warning("Using fallback coordinates for station %s", sid)

    if not valid_lat_lon(lat, lon):
        logger.warning("Skipping station %s due to invalid coordinates and no fallback.", sid)
        continue

    flood = meta.get("floodlevels") or {}
    minor = flood.get("nos_minor") or flood.get("action")
    moderate = flood.get("nos_moderate")
    major = flood.get("nos_major")

    station_rows.append(
        {
            "station_id": sid,
            "station_name": name,
            "shefcode": shefcode,
            "lat": float(lat),
            "lon": float(lon),
            "minor": pd.to_numeric(minor, errors="coerce"),
            "moderate": pd.to_numeric(moderate, errors="coerce"),
            "major": pd.to_numeric(major, errors="coerce"),
        }
    )

if not station_rows:
    raise RuntimeError("No stations with valid metadata/coordinates were available after resolution.")

station_meta_df = pd.DataFrame(station_rows)
display(station_meta_df)

obs_frames = []
for rec in station_rows:
    sid = rec["station_id"]
    try:
        obs_df = fetch_station_observations(sid, BEGIN_DATE, END_DATE)
    except Exception as exc:
        msg = str(exc)
        if "There is no" in msg and "for the station" in msg:
            logger.info("Skipping station %s due to datum incompatibility: %s", sid, exc)
        else:
            logger.warning("Skipping station %s due to observation fetch error: %s", sid, exc)
        continue

    if obs_df.empty:
        logger.info("No observations returned for station %s", sid)
        continue

    obs_df["time_utc"] = pd.to_datetime(obs_df["t"], utc=True, errors="coerce")
    obs_df = obs_df.dropna(subset=["time_utc"]).copy()

    if "f" in obs_df.columns:
        obs_df["flags"] = obs_df["f"].astype(str)
    else:
        obs_df["flags"] = ""

    obs_df["f"] = obs_df["flags"].apply(split_primary_flag)

    obs_df["station_id"] = sid
    obs_df["station_name"] = rec["station_name"]
    obs_df["shefcode"] = rec["shefcode"]
    obs_df["lat"] = rec["lat"]
    obs_df["lon"] = rec["lon"]
    obs_df["minor"] = rec["minor"]
    obs_df["moderate"] = rec["moderate"]
    obs_df["major"] = rec["major"]
    obs_df["datum"] = NOAA_DATUM
    obs_df["units"] = NOAA_UNITS
    obs_df["run_utc"] = RUN_UTC.isoformat()

    if "q" not in obs_df.columns:
        obs_df["q"] = None

    keep_cols = [
        "station_id",
        "station_name",
        "shefcode",
        "time_utc",
        "t",
        "v",
        "s",
        "f",
        "flags",
        "q",
        "lat",
        "lon",
        "minor",
        "moderate",
        "major",
        "datum",
        "units",
        "run_utc",
    ]
    obs_frames.append(obs_df[keep_cols])

if not obs_frames:
    raise RuntimeError("No NOAA observations were returned for any active station.")

water_df = pd.concat(obs_frames, ignore_index=True)
water_df = water_df.sort_values(["station_id", "time_utc"]).reset_index(drop=True)
water_df = water_df[water_df.apply(lambda r: valid_lat_lon(r["lat"], r["lon"]), axis=1)].copy()

latest_df = (
    water_df.sort_values("time_utc")
    .groupby("station_id", as_index=False)
    .tail(1)
    .reset_index(drop=True)
)

# Station-level analytics for one comprehensive HTML report
station_agg = water_df.groupby("station_id", as_index=False).agg(
    obs_count=("v", "count"),
    peak_value=("v", "max"),
    mean_value=("v", "mean"),
)

station_summary_df = (
    latest_df[["station_id", "station_name", "time_utc", "v", "q", "lat", "lon", "minor", "moderate", "major"]]
    .rename(columns={"time_utc": "latest_time_utc", "v": "latest_value", "q": "latest_quality"})
    .merge(station_agg, on="station_id", how="left")
    .sort_values(["latest_value", "station_id"], ascending=[False, True])
    .reset_index(drop=True)
)

print(f"Total observations prepared: {len(water_df)}")
print(f"Active stations with observations: {station_summary_df['station_id'].nunique()}")
display(station_summary_df)


In [None]:
# Cell 6: Build one comprehensive standalone HTML report
# =================================================================================
trace_station_ids = station_summary_df["station_id"].astype(str).tolist()
station_name_lookup = {
    row["station_id"]: row["station_name"] for _, row in station_summary_df.iterrows()
}

fig = make_subplots(
    rows=3,
    cols=1,
    specs=[[{"type": "xy"}], [{"type": "xy"}], [{"type": "table"}]],
    row_heights=[0.58, 0.22, 0.20],
    vertical_spacing=0.08,
    subplot_titles=(
        "Observed Water-Level Time Series by Station",
        "Latest Water Level by Station",
        "Station Summary",
    ),
)

# Row 1: one line per station
for sid in trace_station_ids:
    sdf = water_df[water_df["station_id"] == sid].copy()
    sname = station_name_lookup.get(sid, sid)
    fig.add_trace(
        go.Scatter(
            x=sdf["time_utc"],
            y=sdf["v"],
            mode="lines",
            name=f"{sid} | {sname}",
            hovertemplate=(
                "Station: " + sid + "<br>"
                + "Name: " + sname + "<br>"
                + "Time (UTC): %{x}<br>"
                + f"Water level ({NOAA_UNITS}): %{{y:.3f}}<extra></extra>"
            ),
        ),
        row=1,
        col=1,
    )

# Row 2: latest value bar chart
fig.add_trace(
    go.Bar(
        x=station_summary_df["station_id"],
        y=station_summary_df["latest_value"],
        text=station_summary_df["latest_value"].map(lambda x: f"{x:.3f}" if pd.notna(x) else "nan"),
        textposition="outside",
        name="Latest level",
        marker={"color": "rgba(58, 126, 184, 0.85)"},
        hovertemplate=(
            "Station: %{x}<br>"
            + "Latest level: %{y:.3f}<br>"
            + "Quality: %{customdata[0]}<br>"
            + "Latest time (UTC): %{customdata[1]}<extra></extra>"
        ),
        customdata=station_summary_df[["latest_quality", "latest_time_utc"]].astype(str).values,
        showlegend=False,
    ),
    row=2,
    col=1,
)

# Row 3: summary table
fig.add_trace(
    go.Table(
        header={
            "values": [
                "Station ID",
                "Station Name",
                "Latest UTC",
                f"Latest ({NOAA_UNITS})",
                f"Peak ({NOAA_UNITS})",
                "Obs Count",
                "Quality",
            ],
            "fill_color": "#1f3a56",
            "font": {"color": "white", "size": 12},
            "align": "left",
        },
        cells={
            "values": [
                station_summary_df["station_id"],
                station_summary_df["station_name"],
                station_summary_df["latest_time_utc"].astype(str),
                station_summary_df["latest_value"].map(lambda x: f"{x:.3f}" if pd.notna(x) else "nan"),
                station_summary_df["peak_value"].map(lambda x: f"{x:.3f}" if pd.notna(x) else "nan"),
                station_summary_df["obs_count"].astype(int),
                station_summary_df["latest_quality"].astype(str),
            ],
            "align": "left",
            "font": {"size": 11},
            "height": 24,
        },
    ),
    row=3,
    col=1,
)

# Optional station filter buttons for row-1 traces
station_trace_count = len(trace_station_ids)
button_all_visibility = [True] * station_trace_count + [True, True]
buttons = [
    {
        "label": "All Stations",
        "method": "update",
        "args": [{"visible": button_all_visibility}],
    }
]

for idx, sid in enumerate(trace_station_ids):
    visibility = [False] * station_trace_count + [True, True]
    visibility[idx] = True
    buttons.append(
        {
            "label": sid,
            "method": "update",
            "args": [{"visible": visibility}],
        }
    )

fig.update_layout(
    title=(
        f"NOAA CO-OPS Puerto Rico Water Levels | "
        f"Stations: {station_trace_count} | "
        f"Run UTC: {RUN_UTC.strftime('%Y-%m-%d %H:%M:%S')}"
    ),
    template="plotly_white",
    height=1100,
    hovermode="x unified",
    updatemenus=[
        {
            "buttons": buttons,
            "direction": "down",
            "showactive": True,
            "x": 1.01,
            "xanchor": "left",
            "y": 1.16,
            "yanchor": "top",
        }
    ],
    margin={"l": 50, "r": 220, "t": 130, "b": 50},
)

fig.update_xaxes(title_text="Time (UTC)", row=1, col=1)
fig.update_yaxes(title_text=f"Water level ({NOAA_UNITS})", row=1, col=1)
fig.update_xaxes(title_text="Station ID", row=2, col=1)
fig.update_yaxes(title_text=f"Latest water level ({NOAA_UNITS})", row=2, col=1)

# Exports (single HTML artifact + CSV data files)
water_df.to_csv(OUTPUT_CSV, index=False)
station_summary_df.to_csv(OUTPUT_STATION_CSV, index=False)
fig.write_html(OUTPUT_HTML, include_plotlyjs="inline", full_html=True)

print("Export complete:")
print(f"  HTML report: {OUTPUT_HTML}")
print(f"  Time-series CSV: {OUTPUT_CSV}")
print(f"  Station summary CSV: {OUTPUT_STATION_CSV}")

fig.show()


## Notes

- This notebook is API-first and resilient by design.
- Live station catalogs are preferred over hardwired IDs.
- Non-retryable NOAA client errors are handled without unnecessary retry storms.
- This notebook intentionally omits AGOL maintenance/action cells.
