# 30 Score Operational Indices

Stage: `03_scoring`
Discipline: index scoring and confidence integration.

Inputs:
- `outputs/index_pipeline/10_features/municipio_exposure_vulnerability_features.csv`
- `outputs/index_pipeline/20_features/municipio_hazard_features.csv`

Output:
- `outputs/index_pipeline/30_scoring/municipio_indices_scored.csv`


In [None]:
# Cell 1: Setup
import importlib.util
import subprocess
import sys
from pathlib import Path
import os
import logging
import numpy as np


def ensure_packages(packages):
    missing = [p for p in packages if importlib.util.find_spec(p) is None]
    if missing:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", *missing])


ensure_packages(["pandas", "numpy"])
import pandas as pd

logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
logger = logging.getLogger("index-pipeline-stage30")


def find_repo_root():
    p = Path.cwd().resolve()
    for c in [p, *p.parents]:
        if (c / "JupyterNotebooks").exists():
            return c
    return p


REPO_ROOT = find_repo_root()
BASE_OUT = REPO_ROOT / "JupyterNotebooks" / "outputs" / "index_pipeline"
INPUT_10 = BASE_OUT / "10_features" / "municipio_exposure_vulnerability_features.csv"
INPUT_20 = BASE_OUT / "20_features" / "municipio_hazard_features.csv"
OUTPUT_DIR = BASE_OUT / "30_scoring"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

try:
    from IPython.display import display
except ImportError:
    display = print


In [None]:
# Cell 2: Load and score
for f in [INPUT_10, INPUT_20]:
    if not f.exists():
        raise FileNotFoundError(f"Missing dependency: {f}")

x_df = pd.read_csv(INPUT_10)
h_df = pd.read_csv(INPUT_20)
df = x_df.merge(h_df, on=["municipio", "municipio_key", "latitude", "longitude"], how="inner")

phase = os.environ.get("INDEX_PHASE", "DURING").upper().strip()
if phase not in {"PRE", "DURING", "POST"}:
    phase = "DURING"

# Base components
hazard = pd.to_numeric(df["hazard_combined"], errors="coerce").fillna(0).clip(0, 100)
exposure = pd.to_numeric(df["exposure_score"], errors="coerce").fillna(0).clip(0, 100)
vulnerability = pd.to_numeric(df["vulnerability_score"], errors="coerce").fillna(0).clip(0, 100)
resilience_base = pd.to_numeric(df["resilience_capacity_score"], errors="coerce").fillna(0).clip(0, 100)

# Risk in multiplicative form (scaled 0-100)
df["risk_index_raw"] = (hazard / 100.0) * (exposure / 100.0) * (vulnerability / 100.0) * 100.0

# Operational indices (weighted sums)
df["resilience_index"] = resilience_base

df["response_readiness_index"] = (
    0.45 * (100 - hazard)
    + 0.35 * resilience_base
    + 0.20 * (100 - vulnerability)
).clip(0, 100)

df["recovery_capacity_index"] = (
    0.50 * resilience_base
    + 0.25 * (100 - exposure)
    + 0.25 * (100 - vulnerability)
).clip(0, 100)

# Phase-aware priority index
phase_weights = {
    "PRE": {"risk": 0.45, "rr": 0.35, "rc": 0.20},
    "DURING": {"risk": 0.60, "rr": 0.30, "rc": 0.10},
    "POST": {"risk": 0.35, "rr": 0.20, "rc": 0.45},
}
w = phase_weights[phase]

df["priority_index"] = (
    w["risk"] * df["risk_index_raw"]
    + w["rr"] * (100 - df["response_readiness_index"])  # lower readiness => higher priority
    + w["rc"] * (100 - df["recovery_capacity_index"])   # lower recovery capacity => higher priority
).clip(0, 100)

# Confidence score
latest_obs = pd.to_datetime(df.get("noaa_latest_obs_utc"), utc=True, errors="coerce")
age_hours = (pd.Timestamp.now(tz="UTC") - latest_obs).dt.total_seconds() / 3600.0
freshness = (100 - (age_hours.fillna(168).clip(0, 168) / 168) * 100).clip(0, 100)
completeness = df[[
    "hazard_combined", "exposure_score", "vulnerability_score", "resilience_capacity_score"
]].notna().mean(axis=1) * 100
validity = ((hazard.between(0, 100)) & (exposure.between(0, 100)) & (vulnerability.between(0, 100))).astype(float) * 100
crosscheck = np.where((df["flood_hazard_muni"].fillna(0) > 0) | (df["earthquake_hazard_score"].fillna(0) > 0), 85.0, 60.0)

df["confidence_score"] = (0.35 * freshness + 0.25 * completeness + 0.25 * validity + 0.15 * crosscheck).clip(0, 100)
df["confidence_0_1"] = (df["confidence_score"] / 100.0).clip(0, 1)

baseline = df["priority_index"].median()
df["priority_index_conf_adj"] = (
    df["confidence_0_1"] * df["priority_index"] + (1 - df["confidence_0_1"]) * baseline
)

# Classification bands (plus hard overrides)
def band(score):
    if score >= 85:
        return "Red"
    if score >= 70:
        return "Orange"
    if score >= 50:
        return "Yellow"
    return "Green"


df["priority_band"] = df["priority_index_conf_adj"].apply(band)

hard_red = (df["nws_global_alert_score"].fillna(0) >= 95) | (df["flood_hazard_muni"].fillna(0) >= 90)
df.loc[hard_red, "priority_band"] = "Red"

df["phase"] = phase

out_cols = [
    "municipio", "municipio_key", "latitude", "longitude",
    "hazard_combined", "flood_hazard_muni", "earthquake_hazard_score",
    "exposure_score", "vulnerability_score", "resilience_index",
    "response_readiness_index", "recovery_capacity_index",
    "risk_index_raw", "priority_index", "priority_index_conf_adj",
    "confidence_score", "confidence_0_1", "priority_band", "phase"
]

scored = df[out_cols].sort_values(["priority_index_conf_adj", "municipio"], ascending=[False, True]).reset_index(drop=True)
out_file = OUTPUT_DIR / "municipio_indices_scored.csv"
scored.to_csv(out_file, index=False)

print(f"Phase: {phase}")
print(f"Output: {out_file}")
display(scored.head(15))
