## Welcome to your notebook.


#### Run this cell to connect to your GIS and get started:

In [None]:
# ArcGIS connection is handled in the main cell (USE_ARCGIS).
# This cell is intentionally left blank for local runs.


#### Now you are ready to start!

In [None]:
# # %%
# """
# USGS ‚Üí AGOL Sync Notebook üåé‚û°Ô∏èüó∫Ô∏è  (v2 ‚Äì NaN‚Äësafe)
# =================================================
# Fixes the *TypeError: JSON object must be str ‚Ä¶ not float* when the
# `Parameters_or_Selectors` cell is blank (Excel reads it as NaN/float).

# Key change ‚Üí robust JSON parse:
# ```python
# params_raw = row.get("Parameters_or_Selectors")
# if isinstance(params_raw, str) and params_raw.strip():
#     jparams = json.loads(params_raw)
# else:
#     jparams = {}
# ```
# Everything else unchanged (field names already aligned).

# Optional: ArcGIS Online Sync
# To publish updates to ArcGIS Online, set:
#   USE_ARCGIS=1
#   USGS_EARTHQUAKE_LAYER_ID (or FEATURE_LAYER_ITEM_ID)
# Then re-run the notebook from the top.
# """

In [None]:
# %% Imports
import pandas as pd
from datetime import datetime, timedelta, timezone
import requests, json, logging, sys, math, os
from pathlib import Path

# Set USE_ARCGIS=1 to enable ArcGIS Online sync; otherwise run locally.
USE_ARCGIS = os.environ.get("USE_ARCGIS", "").lower() in ("1", "true", "yes")

if USE_ARCGIS:
    from arcgis.gis import GIS
    from arcgis.features import Feature
    from arcgis.geometry import Point

In [None]:
# %% ----------------------------------------------------------------------
# 1. CONFIG
# ---------------------------------------------------------------------------
def resolve_file(filename, env_var=None, search_roots=None):
    if env_var:
        env_val = os.environ.get(env_var)
        if env_val:
            return env_val
    roots = search_roots or [Path.cwd(), Path.cwd().parent, Path.home()]
    arcgis_home = Path("/arcgis/home")
    if arcgis_home.exists():
        roots.append(arcgis_home)
    for root in roots:
        if root.exists():
            match = next(root.rglob(filename), None)
            if match:
                return str(match)
    raise FileNotFoundError("Set the required env var or place the file under the repo or /arcgis/home.")

EXCEL_PATH            = resolve_file("PR Alert Data Sources.xlsx", env_var="PR_ALERT_XLSX")
FEATURE_LAYER_ITEM_ID = os.environ.get("USGS_EARTHQUAKE_LAYER_ID") or os.environ.get("FEATURE_LAYER_ITEM_ID")
LAYER_INDEX           = 0
SOURCE_URL_KEYWORD    = "earthquake.usgs.gov"

if USE_ARCGIS and not FEATURE_LAYER_ITEM_ID:
    raise ValueError("Set USGS_EARTHQUAKE_LAYER_ID (or FEATURE_LAYER_ITEM_ID) in the environment.")

# Local outputs (for non-ArcGIS runs)
OUTPUT_DIR = Path(os.environ.get("OUTPUT_DIR", "outputs"))
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_CSV = OUTPUT_DIR / "usgs_earthquakes.csv"
OUTPUT_GEOJSON = OUTPUT_DIR / "usgs_earthquakes.geojson"

logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s", stream=sys.stdout)

In [None]:
# %% ----------------------------------------------------------------------
# 2. CONNECT to AGOL (optional)
# ---------------------------------------------------------------------------
if USE_ARCGIS:
    logging.info("Connecting to ArcGIS Online‚Ä¶")
    try:
        gis = GIS("home")
        logging.info("Connected to %s", gis.properties.portalHostname)
    except Exception as e:
        logging.error(f"FATAL: Could not connect to ArcGIS Online. {e}")
        sys.exit(1)
else:
    gis = None
    logging.info("ArcGIS disabled; running locally only.")

In [None]:
# %% ----------------------------------------------------------------------
# 3. READ Excel & extract parameters
# ---------------------------------------------------------------------------
logging.info("Reading master Excel file: %s", Path(EXCEL_PATH).name)
cfg = pd.read_excel(EXCEL_PATH)
row_sel = cfg[cfg["URL_Endpoint"].str.contains(SOURCE_URL_KEYWORD, case=False, na=False)].head(1)
if row_sel.empty:
    raise ValueError(f"No row with URL_Endpoint containing '{SOURCE_URL_KEYWORD}' found.")
row = row_sel.iloc[0]

# --- Robust JSON parse (handles NaN) --------------------------------------
params_raw = row.get("Parameters_or_Selectors")
if isinstance(params_raw, str) and params_raw.strip():
    try:
        jparams = json.loads(params_raw)
    except json.JSONDecodeError as e:
        raise ValueError(f"Parameters_or_Selectors JSON malformed: {e}")
else:
    jparams = {}

# --------------------------------------------------------------------------
days_back = int(jparams.get("days_back", 7))
min_mag   = jparams.get("min_magnitude")
max_mag   = jparams.get("max_magnitude")
bbox_raw  = row.get("Bounding_Box")

In [None]:
# %% ----------------------------------------------------------------------
# 4. BUILD USGS query & fetch
# ---------------------------------------------------------------------------
end_date   = datetime.now(timezone.utc)
start_date = end_date - timedelta(days=days_back)
query = {
    "format": "geojson",
    "starttime": start_date.strftime("%Y-%m-%d"),
    "endtime":   end_date.strftime("%Y-%m-%d")
}
if pd.notna(min_mag): query["minmagnitude"] = float(min_mag)
if pd.notna(max_mag): query["maxmagnitude"] = float(max_mag)
if isinstance(bbox_raw, str) and bbox_raw.strip():
    try:
        min_lon, min_lat, max_lon, max_lat = [float(x) for x in bbox_raw.split(',')]
        query.update({"minlongitude": min_lon, "minlatitude": min_lat,
                      "maxlongitude": max_lon, "maxlatitude": max_lat})
    except ValueError:
        logging.warning("Bounding_Box malformed ‚Äì ignoring spatial filter.")

USGS_ENDPOINT = "https://earthquake.usgs.gov/fdsnws/event/1/query"
logging.info("Requesting USGS data (%d‚Äëday window)‚Ä¶", days_back)
resp = requests.get(USGS_ENDPOINT, params=query, timeout=30)
resp.raise_for_status()
features = resp.json().get("features", [])
logging.info("Retrieved %d events", len(features))

In [None]:
# %% ----------------------------------------------------------------------
# 5. TO DATAFRAME (with correct column names)
# ---------------------------------------------------------------------------
records = []
for f in features:
    p, g = f.get("properties", {}), f.get("geometry", {})
    coords = g.get("coordinates", [None, None, None])
    records.append({
        "time":            pd.to_datetime(p.get("time"), unit="ms", errors="coerce", utc=True),
        "place":           p.get("place"),
        "magnitude":       p.get("mag"),
        "depth_km":        coords[2],
        "tsunami_warning": p.get("tsunami"),
        "status":          p.get("status"),
        "alert_level":     p.get("alert"),
        "longitude":       coords[0],
        "latitude":        coords[1]
    })

df = pd.DataFrame(records)
logging.info("DataFrame ready (%d rows)", len(df))

if not USE_ARCGIS:
    if df.empty:
        logging.info("No records to write locally.")
    else:
        df.to_csv(OUTPUT_CSV, index=False)

        def to_jsonable(val):
            if isinstance(val, pd.Timestamp):
                return val.isoformat()
            try:
                if pd.isna(val):
                    return None
            except Exception:
                pass
            if hasattr(val, "item"):
                try:
                    return val.item()
                except Exception:
                    pass
            return val

        features = []
        if {"longitude", "latitude"}.issubset(df.columns):
            for _, row in df.iterrows():
                lon = row.get("longitude")
                lat = row.get("latitude")
                if pd.notna(lon) and pd.notna(lat):
                    props = row.drop(labels=["longitude", "latitude"]).to_dict()
                    props = {k: to_jsonable(v) for k, v in props.items()}
                    features.append({
                        "type": "Feature",
                        "geometry": {"type": "Point", "coordinates": [float(lon), float(lat)]},
                        "properties": props
                    })
        geojson = {"type": "FeatureCollection", "features": features}
        with open(OUTPUT_GEOJSON, "w", encoding="utf-8") as f:
            json.dump(geojson, f, ensure_ascii=False, indent=2)
        logging.info("Local outputs written: %s and %s", OUTPUT_CSV, OUTPUT_GEOJSON)

# %% ----------------------------------------------------------------------
if USE_ARCGIS:
    # 6. ACCESS / TRUNCATE FEATURE LAYER
    # ---------------------------------------------------------------------------
    flayer = gis.content.get(FEATURE_LAYER_ITEM_ID).layers[LAYER_INDEX]
    logging.info("Target layer: %s", flayer.properties.name)
    if flayer.query(return_count_only=True):
        logging.info("Truncating existing features‚Ä¶")
        flayer.delete_features(where="1=1")

    # %% ----------------------------------------------------------------------
    # 7. PREP & PUSH ADDS
    # ---------------------------------------------------------------------------
    adds = []
    has_geom = bool(getattr(flayer.properties, "geometryType", ""))

    for _, r in df.iterrows():
        attrs = {
            "time":            int(r["time"].timestamp()*1000) if pd.notna(r["time"]) else None,
            "place":           r["place"],
            "magnitude":       float(r["magnitude"]) if pd.notna(r["magnitude"]) else None,
            "depth_km":        float(r["depth_km"]) if pd.notna(r["depth_km"]) else None,
            "tsunami_warning": int(r["tsunami_warning"]) if pd.notna(r["tsunami_warning"]) else None,
            "status":          r["status"],
            "alert_level":     r["alert_level"],
            "longitude":       float(r["longitude"]) if pd.notna(r["longitude"]) else None,
            "latitude":        float(r["latitude"]) if pd.notna(r["latitude"]) else None,
        }

        geom = None
        if has_geom and pd.notna(r["longitude"]) and pd.notna(r["latitude"]):
            geom = Point({"x": r["longitude"], "y": r["latitude"], "spatialReference": {"wkid": 4326}})

        adds.append(Feature(geometry=geom, attributes=attrs))

    if adds:
        res = flayer.edit_features(adds=adds, rollback_on_failure=True)
        if all(r.get("success") for r in res.get("addResults", [])):
            logging.info("‚úî Added %d features to layer", len(adds))
        else:
            logging.error("Some features failed to add ‚Äì check layer for details.")
    else:
        logging.warning("No features to add (empty DataFrame)")

    logging.info("üî• Workflow complete ‚Äì layer refreshed!")