# NOAA Sensors (Puerto Rico) - Water Level Hydrograph

GitHub: https://github.com/yagaC64/Spring2026DAEN

License: https://github.com/yagaC64/Spring2026DAEN/blob/main/LICENSE

This notebook builds a **dynamic, standalone hydrograph workflow** for Puerto Rico using NOAA sensor APIs.

What it does:
- pulls station metadata + flood thresholds from NOAA CO-OPS MDAPI,
- pulls observed water level time series from NOAA CO-OPS Data API,
- generates an interactive chart with flood bands,
- exports a **single-file dynamic HTML** chart for sharing.

`# Optional ArcGIS Online Sync`
If you want to publish the latest station reading to ArcGIS Online, set:
- `USE_ARCGIS=1`
- `NOAA_PR_WATER_LAYER_ID` (or `FEATURE_LAYER_ITEM_ID`)

Then re-run the notebook from Cell 1.


In [None]:
# Cell 1: Install and import libraries
# =================================================================================
import os
import sys
import subprocess
import logging
from pathlib import Path

# ArcGIS remains OFF by default for local/student execution.
USE_ARCGIS = os.environ.get("USE_ARCGIS", "").lower() in ("1", "true", "yes")

print("Installing required libraries...")
packages = ["pandas", "requests", "plotly"]
if USE_ARCGIS:
    packages.append("arcgis")
subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", *packages])
print("Installation complete.")

import json
from datetime import datetime, timedelta, timezone

import pandas as pd
import requests
import plotly.graph_objects as go

if USE_ARCGIS:
    from arcgis.gis import GIS
    from arcgis.features import Feature

try:
    from IPython.display import display
except ImportError:
    display = print

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("noaa-pr-hydrograph")

pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)

print("Cell 1 complete.")

In [None]:
# Cell 2: Configuration
# =================================================================================
# This notebook is fixed to Puerto Rico station selection.
NOAA_STATE = "PR"

# Default PR station: San Juan, La Puntilla, San Juan Bay
# You can override with another PR station using NOAA_PR_STATION_ID.
NOAA_STATION_ID = os.environ.get("NOAA_PR_STATION_ID", "9755371")

# Date range (last N days)
LOOKBACK_DAYS = int(os.environ.get("LOOKBACK_DAYS", "7"))
NOAA_DATUM = os.environ.get("NOAA_DATUM", "MLLW")
NOAA_TIME_ZONE = os.environ.get("NOAA_TIME_ZONE", "gmt")
NOAA_UNITS = os.environ.get("NOAA_UNITS", "english")

# ArcGIS target (optional)
FEATURE_LAYER_ITEM_ID = os.environ.get("NOAA_PR_WATER_LAYER_ID") or os.environ.get("FEATURE_LAYER_ITEM_ID")
LAYER_INDEX = 0
if USE_ARCGIS and not FEATURE_LAYER_ITEM_ID:
    raise ValueError("Set NOAA_PR_WATER_LAYER_ID (or FEATURE_LAYER_ITEM_ID) in the environment.")

# Local outputs
OUTPUT_DIR = Path(os.environ.get("OUTPUT_DIR", "outputs/noaa_pr"))
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_CSV = OUTPUT_DIR / "noaa_pr_water_levels.csv"
OUTPUT_HTML = OUTPUT_DIR / "noaa_pr_hydrograph.html"
OUTPUT_GEOJSON = OUTPUT_DIR / "noaa_pr_latest_station.geojson"

# NOAA CO-OPS endpoints
# No API key is required for these public calls.
MDAPI_BASE = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
DATAGETTER_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"

print("Configuration loaded.")
print(f"  USE_ARCGIS={USE_ARCGIS}")
print(f"  NOAA_STATION_ID={NOAA_STATION_ID}")
print(f"  LOOKBACK_DAYS={LOOKBACK_DAYS}")
print(f"  OUTPUT_DIR={OUTPUT_DIR}")

In [None]:
# Cell 3: Helper functions
# =================================================================================
def api_get_json(url, params=None, timeout=60):
    response = requests.get(url, params=params, timeout=timeout)
    response.raise_for_status()
    return response.json()


def get_pr_station_catalog(state="PR"):
    url = f"{MDAPI_BASE}/stations.json"
    payload = api_get_json(url, params={"state": state}, timeout=90)
    stations = payload.get("stations", [])
    if not stations:
        raise RuntimeError("No stations returned for Puerto Rico.")

    df = pd.DataFrame(stations)
    for col in ["lat", "lng"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    # MDAPI sometimes returns broader catalogs. Force Puerto Rico geography here.
    pr_bbox = {
        "min_lon": -68.5,
        "max_lon": -65.0,
        "min_lat": 17.5,
        "max_lat": 18.9,
    }
    if {"lat", "lng"}.issubset(df.columns):
        df = df[
            (df["lng"] >= pr_bbox["min_lon"])
            & (df["lng"] <= pr_bbox["max_lon"])
            & (df["lat"] >= pr_bbox["min_lat"])
            & (df["lat"] <= pr_bbox["max_lat"])
        ].copy()

    if df.empty:
        raise RuntimeError("PR station filter returned no rows. Check NOAA service response.")

    keep_cols = [c for c in ["id", "name", "state", "lat", "lng", "shefcode", "tidal"] if c in df.columns]
    return df[keep_cols].sort_values("id").reset_index(drop=True)


def get_station_metadata(station_id):
    url = f"{MDAPI_BASE}/stations/{station_id}.json"
    params = {"expand": "floodlevels,details,sensors"}
    payload = api_get_json(url, params=params, timeout=90)
    stations = payload.get("stations", [])
    if not stations:
        raise RuntimeError(f"Station not found in MDAPI: {station_id}")
    return stations[0]


def get_observed_water_levels(station_id, begin_date, end_date, datum="MLLW", time_zone="gmt", units="english"):
    params = {
        "product": "water_level",
        "application": "GMU_DAEN_PR",
        "begin_date": begin_date,
        "end_date": end_date,
        "datum": datum,
        "station": station_id,
        "time_zone": time_zone,
        "units": units,
        "format": "json",
    }
    payload = api_get_json(DATAGETTER_URL, params=params, timeout=120)

    if "error" in payload:
        raise RuntimeError(f"NOAA API error: {payload['error']}")

    rows = payload.get("data", [])
    if not rows:
        raise RuntimeError("No water-level records returned for selected range.")

    df = pd.DataFrame(rows)
    df = df.rename(columns={"t": "time_utc", "v": "water_level_ft", "s": "sigma_ft", "f": "flags", "q": "quality"})
    df["time_utc"] = pd.to_datetime(df["time_utc"], utc=True)

    for col in ["water_level_ft", "sigma_ft"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    df = df.sort_values("time_utc").reset_index(drop=True)
    return df


print("Helper functions ready.")

In [None]:
# pandas full tables
# import warnings
# warnings.simplefilter(action="ignore", category=FutureWarning)
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# Cell 4: Load PR station catalog and selected station metadata
# =================================================================================
pr_stations_df = get_pr_station_catalog(NOAA_STATE)
logger.info("Puerto Rico station catalog size: %s", len(pr_stations_df))

print("Sample PR stations:")
display(pr_stations_df.head(20))
# display(pr_stations_df)

if NOAA_STATION_ID not in set(pr_stations_df["id"].astype(str)):
    logger.warning("Selected station %s is not in the PR catalog preview; continuing with direct metadata lookup.", NOAA_STATION_ID)

station_meta = get_station_metadata(NOAA_STATION_ID)
station_name = station_meta.get("name", NOAA_STATION_ID)
station_lat = float(station_meta.get("lat"))
station_lon = float(station_meta.get("lng"))
station_shef = station_meta.get("shefcode")
flood_levels = station_meta.get("floodlevels") or {}

minor_ft = flood_levels.get("nos_minor") or flood_levels.get("action")
moderate_ft = flood_levels.get("nos_moderate")
major_ft = flood_levels.get("nos_major")

print("Selected station:")
print(f"  ID: {NOAA_STATION_ID}")
print(f"  Name: {station_name}")
print(f"  SHEF: {station_shef}")
print(f"  Coordinates: ({station_lat:.4f}, {station_lon:.4f})")
print(f"  Flood thresholds (ft) -> minor={minor_ft}, moderate={moderate_ft}, major={major_ft}")

## NOAA Field Definitions (`time_utc`, `water_level_ft`, `sigma_ft`, `flags`, `quality`)

NOAA documents these fields in the CO-OPS API response help.

Example row:

`2026-02-13 15:42:00+00:00 | 1.043 | 0.056 | 0,0,0,0 | p`

Field meaning in this notebook:
- `time_utc`: observation timestamp in UTC.
- `water_level_ft`: measured water level (feet, based on selected datum such as `MLLW`).
- `sigma_ft`: standard deviation used in NOAA water-level computation.
- `flags`: comma-separated data flags from NOAA QA/QC checks.
  - For `product=water_level` (preliminary water level), NOAA defines 4 positions as `O,F,R,L`:
    - `O`: count of 1-second samples outside a 3-sigma band around the mean.
    - `F`: flat tolerance limit exceeded (`1` = yes).
    - `R`: rate-of-change tolerance limit exceeded (`1` = yes).
    - `L`: expected min/max level limit exceeded (`1` = yes).
  - So `0,0,0,0` means no triggered tolerance flags and zero outlier count.
- `quality`: NOAA QA/QC level code.
  - `p` = preliminary
  - `v` = verified

NOAA references:
- CO-OPS Data API (parameters/products): [https://api.tidesandcurrents.noaa.gov/api/prod/](https://api.tidesandcurrents.noaa.gov/api/prod/)
- CO-OPS response field definitions (`f`, `q`, etc.): [https://api.tidesandcurrents.noaa.gov/api/prod/responseHelp.html](https://api.tidesandcurrents.noaa.gov/api/prod/responseHelp.html)
- CO-OPS Metadata API (station metadata/flood levels): [https://api.tidesandcurrents.noaa.gov/mdapi/prod/](https://api.tidesandcurrents.noaa.gov/mdapi/prod/)

In [None]:
# Cell 5: Fetch observed NOAA water-level time series
# =================================================================================
end_dt = datetime.now(timezone.utc)
begin_dt = end_dt - timedelta(days=LOOKBACK_DAYS)

begin_date = begin_dt.strftime("%Y%m%d")
end_date = end_dt.strftime("%Y%m%d")

logger.info("Requesting observed water levels from %s to %s (UTC dates).", begin_date, end_date)

water_df = get_observed_water_levels(
    station_id=NOAA_STATION_ID,
    begin_date=begin_date,
    end_date=end_date,
    datum=NOAA_DATUM,
    time_zone=NOAA_TIME_ZONE,
    units=NOAA_UNITS,
)

latest = water_df.iloc[-1]
peak = water_df.loc[water_df["water_level_ft"].idxmax()]

print(f"Records fetched: {len(water_df)}")
print(f"Latest reading: {latest['water_level_ft']:.3f} ft at {latest['time_utc']}")
print(f"Peak reading:   {peak['water_level_ft']:.3f} ft at {peak['time_utc']}")

display(water_df.tail(8))

In [None]:
# Cell 6: Build interactive hydrograph and export standalone HTML
# =================================================================================
fig = go.Figure()

# Main time-series line
fig.add_trace(
    go.Scatter(
        x=water_df["time_utc"],
        y=water_df["water_level_ft"],
        mode="lines",
        name="Observed water level",
        line={"color": "rgb(81,148,195)", "width": 2},
        hovertemplate="%{x}<br>Stage: %{y:.3f} ft<extra></extra>",
    )
)

# Flood stage bands and threshold lines (when available from NOAA metadata)
y_max = float(max(water_df["water_level_ft"].max(), major_ft or 0) + 1.0)

if minor_ft is not None and moderate_ft is not None:
    fig.add_hrect(y0=minor_ft, y1=moderate_ft, line_width=0, fillcolor="rgba(248,193,83,0.15)")
if moderate_ft is not None and major_ft is not None:
    fig.add_hrect(y0=moderate_ft, y1=major_ft, line_width=0, fillcolor="rgba(241,136,81,0.15)")
if major_ft is not None:
    fig.add_hrect(y0=major_ft, y1=y_max, line_width=0, fillcolor="rgba(197,38,34,0.15)")

if minor_ft is not None:
    fig.add_hline(y=minor_ft, line_dash="dot", line_color="gray", annotation_text=f"Minor: {minor_ft} ft", annotation_position="top left")
if moderate_ft is not None:
    fig.add_hline(y=moderate_ft, line_dash="dot", line_color="gray", annotation_text=f"Moderate: {moderate_ft} ft", annotation_position="top left")
if major_ft is not None:
    fig.add_hline(y=major_ft, line_dash="dot", line_color="gray", annotation_text=f"Major: {major_ft} ft", annotation_position="top left")

# Event callouts
fig.add_annotation(
    x=peak["time_utc"],
    y=float(peak["water_level_ft"]),
    text=f"Peak: {peak['water_level_ft']:.2f} ft",
    showarrow=True,
    arrowhead=2,
    ax=35,
    ay=-35,
)
fig.add_annotation(
    x=latest["time_utc"],
    y=float(latest["water_level_ft"]),
    text=f"Latest: {latest['water_level_ft']:.2f} ft",
    showarrow=True,
    arrowhead=2,
    ax=-40,
    ay=35,
)

fig.update_layout(
    title=f"NOAA Observed Water Level - {station_name} ({NOAA_STATION_ID})",
    xaxis_title="Time (UTC)",
    yaxis_title=f"Water level ({NOAA_UNITS})",
    hovermode="x unified",
    template="plotly_white",
    height=520,
)

# Save a standalone dynamic HTML file (self-contained Plotly JS + embedded data)
fig.write_html(OUTPUT_HTML, include_plotlyjs="inline", full_html=True)

# Save CSV for reproducible analytics
water_df.to_csv(OUTPUT_CSV, index=False)

# Save a small GeoJSON point with latest station status
latest_feature = {
    "type": "Feature",
    "geometry": {"type": "Point", "coordinates": [station_lon, station_lat]},
    "properties": {
        "station_id": NOAA_STATION_ID,
        "station_name": station_name,
        "shefcode": station_shef,
        "time_utc": latest["time_utc"].isoformat(),
        "water_level_ft": float(latest["water_level_ft"]),
        "minor_ft": None if minor_ft is None else float(minor_ft),
        "moderate_ft": None if moderate_ft is None else float(moderate_ft),
        "major_ft": None if major_ft is None else float(major_ft),
    },
}
with open(OUTPUT_GEOJSON, "w", encoding="utf-8") as fh:
    json.dump({"type": "FeatureCollection", "features": [latest_feature]}, fh, indent=2)

print("Export complete:")
print(f"  {OUTPUT_CSV}")
print(f"  {OUTPUT_HTML}")
print(f"  {OUTPUT_GEOJSON}")

fig.show()

In [None]:
# Cell 7: Optional ArcGIS Online sync (OFF by default)
# =================================================================================
# This follows the same optional pattern used in your other notebooks.
# It is intentionally guarded so students can run locally with no ArcGIS account.

if USE_ARCGIS:
    logger.info("ArcGIS sync enabled. Connecting to ArcGIS Online...")
    gis = GIS("home")

    feature_layer_item = gis.content.get(FEATURE_LAYER_ITEM_ID)
    if not feature_layer_item or not feature_layer_item.layers:
        raise RuntimeError("Could not load ArcGIS Feature Layer item or layer list.")

    flayer = feature_layer_item.layers[LAYER_INDEX]

    # Build a single latest-observation feature.
    # Field names in your target layer must be compatible with these attributes.
    attrs = {
        "station_id": NOAA_STATION_ID,
        "station_name": station_name,
        "shefcode": station_shef,
        "obs_time_utc": latest["time_utc"].strftime("%Y-%m-%d %H:%M:%S"),
        "water_level_ft": float(latest["water_level_ft"]),
        "minor_ft": None if minor_ft is None else float(minor_ft),
        "moderate_ft": None if moderate_ft is None else float(moderate_ft),
        "major_ft": None if major_ft is None else float(major_ft),
    }

    feature = Feature(
        geometry={"x": station_lon, "y": station_lat, "spatialReference": {"wkid": 4326}},
        attributes=attrs,
    )

    # Optional workflow: clear old rows, then insert latest snapshot.
    flayer.delete_features(where="1=1")
    result = flayer.edit_features(adds=[feature])
    logger.info("ArcGIS edit result: %s", result)
else:
    print("ArcGIS sync is disabled (USE_ARCGIS is not set). Local outputs already generated.")

## Notes

- This notebook uses NOAA public APIs and does not require API keys for the default workflow.
- The exported HTML is a single file with embedded chart logic and data.
- To change stations, set `NOAA_PR_STATION_ID` (must be a valid PR station in MDAPI).
