## Section 1: Imports and data loading

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 120)

df = pd.read_csv("../data/feature_engineered_monthly.csv")
df["year_month"] = pd.to_datetime(df["year_month"])

df = df[~df["state"].str.match(r"^\d+$", na=False)]

df = df[df["year_month"].notna()]

df["state"] = df["state"].str.strip().str.upper()

df["state"] = df["state"].str.replace(r"\s+", " ", regex=True)

df["state"] = df["state"].str.replace(r"^THE\s+", "", regex=True)



STATE_NORMALIZATION_MAP = {
    "JAMMU & KASHMIR": "JAMMU AND KASHMIR",
    
    # Andaman
    "ANDAMAN & NICOBAR ISLANDS": "ANDAMAN AND NICOBAR ISLANDS",

    # Dadraâ€“Daman merger
    "DADRA AND NAGAR HAVELI": "DADRA AND NAGAR HAVELI AND DAMAN AND DIU",
    "DADRA & NAGAR HAVELI": "DADRA AND NAGAR HAVELI AND DAMAN AND DIU",
    "DAMAN & DIU": "DADRA AND NAGAR HAVELI AND DAMAN AND DIU",
    "DAMAN AND DIU": "DADRA AND NAGAR HAVELI AND DAMAN AND DIU",
    "DADRA & NAGAR HAVELI AND DAMAN & DIU": "DADRA AND NAGAR HAVELI AND DAMAN AND DIU",
    "DADRA AND NAGAR HAVELI AND DAMAN AND DIU": "DADRA AND NAGAR HAVELI AND DAMAN AND DIU",

    # West Bengal
    "WESTBENGAL": "WEST BENGAL",
    "WEST BANGAL": "WEST BENGAL",

    # Historical renames
    "ORISSA": "ODISHA",
    "PONDICHERRY": "PUDUCHERRY",
}


df["state"] = df["state"].replace(STATE_NORMALIZATION_MAP)

df["state"].value_counts().head(15)



state
DADRA AND NAGAR HAVELI AND DAMAN AND DIU    47
WEST BENGAL                                 26
PUDUCHERRY                                  24
ODISHA                                      24
JAMMU AND KASHMIR                           24
ANDAMAN AND NICOBAR ISLANDS                 21
CHHATTISGARH                                12
CHANDIGARH                                  12
UTTARAKHAND                                 12
UTTAR PRADESH                               12
TRIPURA                                     12
TELANGANA                                   12
TAMIL NADU                                  12
SIKKIM                                      12
RAJASTHAN                                   12
Name: count, dtype: int64

## Section 2: Build state-level summary metrics

In [2]:
state_summary = (
    df.groupby("state", as_index=False)
    .agg(
        avg_update_intensity=("update_intensity", "mean"),
        recent_update_intensity=("update_intensity", lambda x: x.sort_index().tail(3).mean()),
        avg_decay_signal=("update_decay_signal", "mean"),
        update_volatility=("update_consistency", "mean"),
        active_months=("year_month", "nunique")
    )
)

state_summary.head()


Unnamed: 0,state,avg_update_intensity,recent_update_intensity,avg_decay_signal,update_volatility,active_months
0,ANDAMAN AND NICOBAR ISLANDS,477.619048,81.0,-315.563492,1194.801287,12
1,ANDHRA PRADESH,420.771655,227.464151,-138.059423,565.175095,12
2,ARUNACHAL PRADESH,48.753879,46.826808,-4.550423,21.921648,12
3,ASSAM,35.775732,32.488771,0.841151,12.382122,12
4,BIHAR,36.174691,31.996439,-0.618413,12.018727,12


## Section 3: Define state categories

In [3]:
def classify_state(row):
    if row["avg_update_intensity"] == 0:
        return "STAGNANT"
    if (
        row["avg_decay_signal"] < 0
        and row["recent_update_intensity"] < row["avg_update_intensity"]
    ):
        return "DECAYING"
    return "HEALTHY"

state_summary["state_status"] = state_summary.apply(classify_state, axis=1)


## Section 4: Build a risk score 

In [4]:
state_summary["risk_score"] = (
    (-1 * state_summary["avg_decay_signal"]) +
    (state_summary["update_volatility"] * 0.5) +
    (1 / (state_summary["recent_update_intensity"] + 1))
)

state_summary

Unnamed: 0,state,avg_update_intensity,recent_update_intensity,avg_decay_signal,update_volatility,active_months,state_status,risk_score
0,ANDAMAN AND NICOBAR ISLANDS,477.619048,81.0,-315.563492,1194.801287,12,DECAYING,912.976331
1,ANDHRA PRADESH,420.771655,227.464151,-138.059423,565.175095,12,DECAYING,420.651347
2,ARUNACHAL PRADESH,48.753879,46.826808,-4.550423,21.921648,12,DECAYING,15.532155
3,ASSAM,35.775732,32.488771,0.841151,12.382122,12,HEALTHY,5.379771
4,BIHAR,36.174691,31.996439,-0.618413,12.018727,12,DECAYING,6.658083
5,CHANDIGARH,684.859593,235.44881,-229.918119,912.016226,12,DECAYING,685.930461
6,CHHATTISGARH,241.470904,199.436387,-27.257399,138.053156,12,DECAYING,96.288966
7,DADRA AND NAGAR HAVELI AND DAMAN AND DIU,240.137791,0.0,-125.462175,587.411425,12,DECAYING,420.167887
8,DELHI,93.856423,88.008726,-3.254462,31.384833,12,DECAYING,18.958113
9,GOA,314.291998,226.883333,-77.134959,332.120434,12,DECAYING,243.199564


## Section 5: Final priority table

In [5]:
priority_table = (
    state_summary
    .sort_values("risk_score", ascending=False)
    [["state",
      "state_status",
      "risk_score",
      "avg_update_intensity",
      "recent_update_intensity",
      "active_months"]]
)

priority_table.head(10)


Unnamed: 0,state,state_status,risk_score,avg_update_intensity,recent_update_intensity,active_months
26,PUDUCHERRY,DECAYING,4278.883311,2168.765079,485.666667,12
0,ANDAMAN AND NICOBAR ISLANDS,DECAYING,912.976331,477.619048,81.0,12
12,HIMACHAL PRADESH,DECAYING,768.106431,934.20797,575.685185,12
5,CHANDIGARH,DECAYING,685.930461,684.859593,235.44881,12
1,ANDHRA PRADESH,DECAYING,420.651347,420.771655,227.464151,12
7,DADRA AND NAGAR HAVELI AND DAMAN AND DIU,DECAYING,420.167887,240.137791,0.0,12
25,ODISHA,DECAYING,335.944091,261.796767,130.558209,12
30,TAMIL NADU,DECAYING,305.552843,264.995082,147.637626,12
9,GOA,DECAYING,243.199564,314.291998,226.883333,12
17,LADAKH,DECAYING,237.856303,183.129167,122.466667,12


In [6]:
state_summary["priority_rank"] = (
    state_summary["risk_score"]
    .rank(ascending=False, method="dense")
    .astype(int)
)


## Section 6: Sanity check

In [7]:
priority_table["state_status"].value_counts()

state_status
DECAYING    28
HEALTHY      8
Name: count, dtype: int64

## Section 7: Save final output

In [8]:
final_priority_table = (
    state_summary
    .sort_values("priority_rank")
    [[
        "priority_rank",
        "state",
        "state_status",
        "avg_update_intensity",
        "recent_update_intensity",
        "active_months"
    ]]
)

final_priority_table.to_csv(
    "../data/state_priority_classification_final.csv",
    index=False
)
