# 10 Build Exposure and Vulnerability Features

Stage: `02_feature_engineering`
Discipline: social risk and baseline feature engineering.

Primary dependencies:
- `JupyterNotebooks/outputs/census_pr/municipio_risk_features.csv`
- `JupyterNotebooks/outputs/census_pr/town_risk_features.csv`

Output:
- `JupyterNotebooks/outputs/index_pipeline/10_features/municipio_exposure_vulnerability_features.csv`


In [None]:
# Cell 1: Setup
import importlib.util
import subprocess
import sys
from pathlib import Path
import logging


def ensure_packages(packages):
    missing = [p for p in packages if importlib.util.find_spec(p) is None]
    if missing:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", *missing])


ensure_packages(["pandas", "numpy"])

import numpy as np
import pandas as pd

logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
logger = logging.getLogger("index-pipeline-stage10")


def find_repo_root():
    p = Path.cwd().resolve()
    for c in [p, *p.parents]:
        if (c / "JupyterNotebooks").exists():
            return c
    return p


REPO_ROOT = find_repo_root()
CENSUS_DIR = REPO_ROOT / "JupyterNotebooks" / "outputs" / "census_pr"
OUTPUT_DIR = REPO_ROOT / "JupyterNotebooks" / "outputs" / "index_pipeline" / "10_features"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

try:
    from IPython.display import display
except ImportError:
    display = print


In [None]:
# Cell 2: Load census-derived base files
muni_file = CENSUS_DIR / "municipio_risk_features.csv"
town_file = CENSUS_DIR / "town_risk_features.csv"

if not muni_file.exists():
    raise FileNotFoundError(f"Missing required file: {muni_file}")
if not town_file.exists():
    raise FileNotFoundError(f"Missing required file: {town_file}")

muni_df = pd.read_csv(muni_file)
town_df = pd.read_csv(town_file)

# municipio centroids available in town_risk_features output
town_coords = town_df[["municipio_key", "municipio", "latitude", "longitude"]].drop_duplicates("municipio_key")

base = muni_df.copy()
if "municipio_key" not in base.columns:
    base["municipio_key"] = base["municipio"].astype(str).str.strip().str.lower().str.replace(" ", "_", regex=False)

base = base.merge(town_coords, on=["municipio_key", "municipio"], how="left")

print(f"Municipios loaded: {len(base)}")
display(base.head(10))


In [None]:
# Cell 3: Engineer exposure/vulnerability features

def robust_score(series, higher_is_worse=True):
    x = pd.to_numeric(series, errors="coerce")
    lo = x.quantile(0.05)
    hi = x.quantile(0.95)
    if pd.isna(lo) or pd.isna(hi) or hi <= lo:
        s = pd.Series(np.nan, index=x.index)
    else:
        s = ((x - lo) / (hi - lo)).clip(0, 1) * 100
    if not higher_is_worse:
        s = 100 - s
    return s

feat = base.copy()

# Exposure components
feat["exposure_population"] = pd.to_numeric(feat.get("population"), errors="coerce")
feat["exposure_population_score"] = robust_score(feat["exposure_population"], higher_is_worse=True)

# Vulnerability components
feat["poverty_rate"] = pd.to_numeric(feat.get("poverty_rate"), errors="coerce")
feat["no_vehicle_rate"] = pd.to_numeric(feat.get("no_vehicle_rate"), errors="coerce")
feat["vacancy_rate"] = pd.to_numeric(feat.get("vacancy_rate"), errors="coerce")
feat["median_income"] = pd.to_numeric(feat.get("median_income"), errors="coerce")

feat["poverty_score"] = robust_score(feat["poverty_rate"], higher_is_worse=True)
feat["transport_constraint_score"] = robust_score(feat["no_vehicle_rate"], higher_is_worse=True)
feat["housing_fragility_score"] = robust_score(feat["vacancy_rate"], higher_is_worse=True)
feat["income_capacity_score"] = robust_score(feat["median_income"], higher_is_worse=False)

feat["vulnerability_score"] = (
    0.35 * feat["poverty_score"]
    + 0.25 * feat["transport_constraint_score"]
    + 0.15 * feat["housing_fragility_score"]
    + 0.25 * (100 - feat["income_capacity_score"])
)

feat["exposure_score"] = feat["exposure_population_score"]

# Optional resilience baseline proxy
feat["resilience_capacity_score"] = (
    0.45 * feat["income_capacity_score"]
    + 0.30 * (100 - feat["transport_constraint_score"])
    + 0.25 * (100 - feat["housing_fragility_score"])
).clip(0, 100)

out_cols = [
    "municipio", "municipio_key", "latitude", "longitude",
    "population", "median_income", "poverty_rate", "no_vehicle_rate", "vacancy_rate",
    "exposure_score", "vulnerability_score", "resilience_capacity_score",
    "poverty_score", "transport_constraint_score", "housing_fragility_score", "income_capacity_score"
]

feat_out = feat[out_cols].copy()
out_file = OUTPUT_DIR / "municipio_exposure_vulnerability_features.csv"
feat_out.to_csv(out_file, index=False)

print(f"Output written: {out_file}")
display(feat_out.head(10))
