In [47]:
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Rectangle, Patch
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")

plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams.update({
    "figure.dpi": 300,
    "savefig.dpi": 300,
    "font.size": 11,
    "axes.labelsize": 12,
    "axes.titlesize": 15,
    "legend.fontsize": 11,
    "axes.titleweight": "bold",
})

df = pd.read_csv(r"C:\Users\varun\Desktop\udai_adhar _framework\data\processed\aadhaar_state_monthly.csv")
df["month"] = pd.to_datetime(df["month"])

final_results = pd.read_csv(r"C:\Users\varun\Desktop\udai_adhar _framework\data\processed\state_priority_ranking.csv")

print(final_results.columns.tolist())


['state', 'priority_score', 'rank', 'anomaly_score', 'trend_score', 'biometric_volume', 'why_flagged']


In [48]:
fig, ax = plt.subplots(figsize=(14, 8))
ax.axis("off")

stages = [
    "Data Ingestion",
    "Validation",
    "Feature Engineering",
    "ML Modeling\n(Isolation Forest)",
    "Score Fusion",
    "Explainable Output"
]

y = np.linspace(0.9, 0.1, len(stages))

for i, (label, ypos) in enumerate(zip(stages, y)):
    rect = Rectangle((0.3, ypos-0.06), 0.4, 0.1,
                     facecolor=plt.cm.Blues(0.3+i*0.1),
                     edgecolor="black", linewidth=2)
    ax.add_patch(rect)
    ax.text(0.5, ypos, label, ha="center", va="center", fontsize=12)

    if i < len(stages)-1:
        ax.arrow(0.5, ypos-0.08, 0, -0.08,
                 head_width=0.02, head_length=0.02)

ax.set_title("End-to-End Aadhaar Analytics Pipeline")
plt.savefig(r"C:\Users\varun\Desktop\output\visual_01_architecture.png", bbox_inches="tight")
plt.close()


In [49]:
pivot = df.pivot_table(
    values="enrolment_count",
    index="state",
    columns=df["month"].dt.to_period("M"),
    aggfunc="count",
    fill_value=0
)

plt.figure(figsize=(16, 10))
sns.heatmap(pivot, cmap="YlGn", linewidths=0.4)
plt.title("Data Availability Across States and Months")
plt.xlabel("Month")
plt.ylabel("State")
plt.savefig(r"C:\Users\varun\Desktop\output\visual_02_data_coverage.png", bbox_inches="tight")
plt.close()


In [50]:
stats = df[["enrolment_count", "biometric_volume"]].describe().T
stats["CV (%)"] = (stats["std"] / stats["mean"] * 100).round(1)

fig, ax = plt.subplots(figsize=(12, 3))
ax.axis("off")

table = ax.table(
    cellText=stats.round(0).values,
    colLabels=stats.columns,
    rowLabels=stats.index,
    loc="center"
)

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2)

plt.title("Descriptive Statistics of Key Variables")
plt.savefig(r"C:\Users\varun\Desktop\output\visual_03_summary_stats.png", bbox_inches="tight")
plt.close()


In [51]:
fig, ax = plt.subplots(figsize=(14, 10))
ax.axis("off")

steps = [
    "load_processed_data()",
    "validate_data()",
    "generate_features()",
    "compute_trend_score()",
    "compute_anomaly_score()",
    "compute_priority_score()"
]

y = np.linspace(0.9, 0.1, len(steps))

for i, step in enumerate(steps):
    rect = Rectangle((0.1, y[i]-0.06), 0.8, 0.1,
                     facecolor=plt.cm.Blues(0.35+i*0.08),
                     edgecolor="black", linewidth=2)
    ax.add_patch(rect)
    ax.text(0.5, y[i], step, ha="center", va="center", fontsize=12)

plt.title("Code-Level Analytical Pipeline")
plt.savefig(r"C:\Users\varun\Desktop\output\visual_04_pipeline.png", bbox_inches="tight")
plt.close()


In [52]:
top_states = df.groupby("state")["enrolment_count"].mean().nlargest(10).index
plt.figure(figsize=(14, 7))
sns.boxplot(
    data=df[df["state"].isin(top_states)],
    x="state", y="enrolment_count"
)
plt.title("Enrolment Distribution: Top 10 States")
plt.xticks(rotation=45)
plt.savefig(r"C:\Users\varun\Desktop\output\visual_05_boxplots.png", bbox_inches="tight")
plt.close()


In [53]:
df["enrolment_change"] = df.groupby("state")["enrolment_count"].pct_change()
sample = df.groupby("state")["enrolment_change"].std().nlargest(4).index

fig, axes = plt.subplots(2, 2, figsize=(16, 10))
axes = axes.flatten()

for ax, state in zip(axes, sample):
    data = df[df["state"] == state]
    ax.plot(data["month"], data["enrolment_change"])
    ax.axhline(0, linestyle="--", color="red")
    ax.set_title(state)

plt.suptitle("Operational Volatility Patterns")
plt.savefig(r"C:\Users\varun\Desktop\output\visual_06_volatility.png", bbox_inches="tight")
plt.close()


In [54]:
df["rolling_mean"] = (
    df.groupby("state")["enrolment_count"]
    .rolling(3, min_periods=1).mean()
    .reset_index(level=0, drop=True)
)

states = [
    df.groupby("state")["enrolment_count"].mean().idxmax(),
    final_results.iloc[0]["state"]
]

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

for ax, state in zip(axes, states):
    d = df[df["state"] == state]
    ax.plot(d["month"], d["enrolment_count"], label="Actual")
    ax.plot(d["month"], d["rolling_mean"], linestyle="--", label="Baseline")
    ax.legend()
    ax.set_title(state)

plt.savefig(r"C:\Users\varun\Desktop\output\visual_07_baseline.png", bbox_inches="tight")
plt.close()


In [55]:

features = final_results[
    ["state", "anomaly_score", "biometric_volume"]
].merge(
    df.groupby("state")["enrolment_change"].mean().reset_index(),
    on="state"
)

plt.figure(figsize=(12, 8))

plt.scatter(
    features[features["anomaly_score"] < 0.7]["enrolment_change"],
    features[features["anomaly_score"] < 0.7]["biometric_volume"],
    s=200, alpha=0.6, label="Normal", edgecolors="black"
)

plt.scatter(
    features[features["anomaly_score"] >= 0.7]["enrolment_change"],
    features[features["anomaly_score"] >= 0.7]["biometric_volume"],
    s=300, alpha=0.9, marker="^", label="Anomalous",
    edgecolors="black"
)

plt.xlabel("Average Enrolment Volatility")
plt.ylabel("Average Biometric Volume")
plt.title("Operational Anomaly Detection (Isolation Forest Output)")
plt.legend()
plt.grid(alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_08_anomaly_feature_space.png",
    bbox_inches="tight"
)
plt.close()


In [57]:
fig, axes = plt.subplots(1, 4, figsize=(18, 5))

cols = [
    ("anomaly_score", "Anomaly Score"),
    ("trend_score", "Trend Deviation Score"),
    ("biometric_volume", "Biometric Load Score"),
    ("priority_score", "Final Priority Score")
]

for ax, (col, title) in zip(axes, cols):
    ax.hist(final_results[col], bins=12, edgecolor="black", alpha=0.7)
    ax.set_title(title, fontsize=12, weight="bold")
    ax.set_xlabel("Score")
    ax.set_ylabel("Frequency")
    ax.grid(alpha=0.3)

plt.suptitle(
    "Distribution of Normalized Analytical Scores",
    fontsize=16,
    weight="bold"
)

plt.tight_layout()
plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_09_score_distributions.png",
    bbox_inches="tight"
)
plt.close()


In [None]:
national = df.groupby("month")["enrolment_count"].sum()

plt.figure(figsize=(16, 7))
plt.plot(national.index, national.values, linewidth=3, marker="o")

plt.xlabel("Month")
plt.ylabel("Total Enrolments")
plt.title("National Aadhaar Enrolment Trend (All States)")
plt.grid(alpha=0.3)
plt.xticks(rotation=45)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_10_national_timeseries.png",
    bbox_inches="tight"
)
plt.close()


In [58]:
state_avg = (
    df.groupby("state")["enrolment_count"]
    .mean()
    .nlargest(15)
    .sort_values()
)

plt.figure(figsize=(12, 8))
plt.barh(state_avg.index, state_avg.values, edgecolor="black")

plt.xlabel("Average Monthly Enrolment")
plt.title("Top 15 States by Average Enrolment Volume")
plt.grid(axis="x", alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_11_state_average_enrolment.png",
    bbox_inches="tight"
)
plt.close()


In [59]:
state_summary = df.groupby("state").mean(numeric_only=True)

plt.figure(figsize=(12, 8))
plt.scatter(
    state_summary["enrolment_count"],
    state_summary["biometric_volume"],
    s=200, alpha=0.7, edgecolors="black"
)

plt.xlabel("Average Enrolment Count")
plt.ylabel("Average Biometric Volume")
plt.title("Operational Load: Biometric Volume vs Enrolment")
plt.grid(alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_12_biometric_vs_enrolment.png",
    bbox_inches="tight"
)
plt.close()


In [60]:
top_volatile = (
    df.groupby("state")["enrolment_count"]
    .std()
    .nlargest(10)
    .index
)

plt.figure(figsize=(14, 8))
sns.violinplot(
    data=df[df["state"].isin(top_volatile)],
    x="state",
    y="enrolment_count",
    inner="quartile"
)

plt.xticks(rotation=45)
plt.xlabel("State / UT")
plt.ylabel("Monthly Enrolment Count")
plt.title("Top 10 States by Enrolment Volatility")

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_13_volatility_violin.png",
    bbox_inches="tight"
)
plt.close()


In [61]:
top_15 = (
    final_results
    .sort_values(score_col, ascending=False)
    .head(15)
    .sort_values(score_col)
)

def tier_color(score):
    if score >= 0.7:
        return "#C62828"   # Immediate
    elif score >= 0.5:
        return "#EF6C00"   # Monitor
    else:
        return "#FBC02D"   # Routine

colors = top_15[score_col].apply(tier_color)

plt.figure(figsize=(14, 10))
bars = plt.barh(
    top_15["state"],
    top_15[score_col],
    color=colors,
    edgecolor="black",
    linewidth=1.8
)

plt.xlim(0, 1)
plt.xlabel("Composite Priority Score (Normalized)")
plt.title("State-Level Operational Priority Ranking")

for bar, row in zip(bars, top_15.itertuples()):
    plt.text(
        row._asdict()[score_col] + 0.02,
        bar.get_y() + bar.get_height() / 2,
        f"Rank #{row.rank} | {row._asdict()[score_col]:.3f}",
        va="center",
        fontsize=10,
        weight="bold"
    )

legend_elements = [
    Patch(facecolor="#C62828", edgecolor="black", label="Immediate Attention (≥ 0.70)"),
    Patch(facecolor="#EF6C00", edgecolor="black", label="Enhanced Monitoring (0.50–0.69)"),
    Patch(facecolor="#FBC02D", edgecolor="black", label="Routine Oversight (< 0.50)")
]

plt.legend(handles=legend_elements, loc="lower right")
plt.grid(axis="x", alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_14_state_priority_ranking.png",
    bbox_inches="tight"
)
plt.close()


In [62]:

state_features = (
    df.groupby("state")
    .agg(
        avg_enrolment=("enrolment_count", "mean"),
        avg_biometric=("biometric_volume", "mean"),
    )
    .reset_index()
)

plot_df = state_features.merge(
    final_results[["state", "anomaly_score"]],
    on="state",
    how="inner"
)


plt.figure(figsize=(12, 8))

# Normal states
plt.scatter(
    plot_df[plot_df["anomaly_score"] < 0.7]["avg_enrolment"],
    plot_df[plot_df["anomaly_score"] < 0.7]["avg_biometric"],
    s=180,
    alpha=0.6,
    label="Normal States",
    edgecolors="black"
)

# Anomalous states
plt.scatter(
    plot_df[plot_df["anomaly_score"] >= 0.7]["avg_enrolment"],
    plot_df[plot_df["anomaly_score"] >= 0.7]["avg_biometric"],
    s=260,
    alpha=0.9,
    marker="^",
    label="Anomalous States",
    edgecolors="black"
)

# Annotate anomalous states
for _, row in plot_df[plot_df["anomaly_score"] >= 0.7].iterrows():
    plt.text(
        row["avg_enrolment"],
        row["avg_biometric"],
        row["state"],
        fontsize=9,
        weight="bold"
    )

plt.xlabel("Average Monthly Enrolment Count")
plt.ylabel("Average Biometric Volume")
plt.title(
    "Isolation Forest–Based Anomaly Classification of States",
    fontsize=15,
    weight="bold"
)

plt.legend()
plt.grid(alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_15_anomaly_classification.png",
    bbox_inches="tight"
)
plt.close()


In [63]:
plt.figure(figsize=(12, 8))

plt.scatter(
    final_results["trend_score"],
    final_results["priority_score"],
    s=200,
    alpha=0.75,
    edgecolors="black"
)

for _, row in final_results.iterrows():
    if row["priority_score"] >= 0.7:
        plt.text(
            row["trend_score"] + 0.01,
            row["priority_score"],
            row["state"],
            fontsize=9
        )

plt.xlabel("Trend Deviation Score")
plt.ylabel("Composite Priority Score")
plt.title(
    "Impact of Enrolment Trend Deviation on Priority Ranking",
    fontsize=15,
    weight="bold"
)

plt.grid(alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_16_trend_vs_priority.png",
    bbox_inches="tight"
)
plt.close()



In [64]:
bio_pivot = df.pivot_table(
    values="biometric_volume",
    index="state",
    columns=df["month"].dt.to_period("M"),
    aggfunc="mean"
)

plt.figure(figsize=(18, 10))

sns.heatmap(
    bio_pivot,
    cmap="YlOrRd",
    linewidths=0.3
)

plt.title("Biometric Operational Load Across States and Time")
plt.xlabel("Month")
plt.ylabel("State / UT")

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_17_biometric_load_heatmap.png",
    bbox_inches="tight"
)
plt.close()


In [65]:
# Simulate rank stability using rolling windows
windows = [6, 9, 12]
rank_frames = []

for w in windows:
    temp = df.groupby("state").tail(w)
    agg = temp.groupby("state")["enrolment_count"].mean().reset_index()
    agg["window"] = f"{w}M"
    agg["rank"] = agg["enrolment_count"].rank(ascending=False)
    rank_frames.append(agg)

rank_df = pd.concat(rank_frames)

top_states = (
    rank_df.groupby("state")["rank"]
    .mean()
    .nsmallest(10)
    .index
)

plt.figure(figsize=(14, 8))

for state in top_states:
    subset = rank_df[rank_df["state"] == state]
    plt.plot(
        subset["window"],
        subset["rank"],
        marker="o",
        linewidth=2,
        label=state
    )

plt.gca().invert_yaxis()
plt.xlabel("Time Window")
plt.ylabel("Rank (Lower = Higher Priority)")
plt.title("Rank Stability Across Time Windows")
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
plt.grid(alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_18_rank_stability.png",
    bbox_inches="tight"
)
plt.close()


In [69]:
baseline_rank = (
    df.groupby("state")["enrolment_count"]
    .mean()
    .rank(ascending=False)
    .reset_index(name="baseline_rank")
)

comparison = baseline_rank.merge(
    final_results[["state", "rank"]],
    on="state"
)

plt.figure(figsize=(12, 8))

plt.scatter(
    comparison["baseline_rank"],
    comparison["rank"],
    s=200,
    alpha=0.7,
    edgecolors="black"
)

max_rank = max(
    comparison["baseline_rank"].max(),
    comparison["rank"].max()
)

plt.plot(
    [1, max_rank],
    [1, max_rank],
    linestyle="--",
    color="red",
    linewidth=2,
    label="Baseline = ML Rank"
)

plt.xlabel("Baseline Rank (Avg Enrolment Volume)")
plt.ylabel("ML-Based Priority Rank")
plt.title(
    "Comparison of Baseline Volume Ranking vs ML-Based Prioritization",
    fontsize=15,
    weight="bold"
)

plt.legend()
plt.grid(alpha=0.3)

plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_19_baseline_vs_ml.png",
    bbox_inches="tight"
)
plt.close()



In [70]:

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, FancyArrow

plt.figure(figsize=(14, 9))
ax = plt.gca()
ax.axis("off")

# --------------------------------------------------
# Helper function to draw boxes
# --------------------------------------------------
def draw_box(x, y, w, h, text, color):
    rect = Rectangle(
        (x, y), w, h,
        facecolor=color,
        edgecolor="black",
        linewidth=2
    )
    ax.add_patch(rect)
    ax.text(
        x + w / 2,
        y + h / 2,
        text,
        ha="center",
        va="center",
        fontsize=12,
        weight="bold",
        wrap=True
    )

# --------------------------------------------------
# Boxes
# --------------------------------------------------
draw_box(
    0.35, 0.78, 0.30, 0.12,
    "ML-Based Priority Score\n(State-Level)",
    "#BBDEFB"
)

draw_box(
    0.05, 0.50, 0.35, 0.16,
    "Tier 1: Immediate Attention\n(Priority Score ≥ 0.7)\n\n• Operational audit\n• Emergency staff & device deployment\n• Weekly monitoring",
    "#EF9A9A"
)

draw_box(
    0.40, 0.50, 0.35, 0.16,
    "Tier 2: Enhanced Monitoring\n(0.5 ≤ Priority Score < 0.7)\n\n• Bi-weekly reviews\n• Preventive maintenance\n• Contingency planning",
    "#FFE082"
)

draw_box(
    0.75, 0.50, 0.35, 0.16,
    "Tier 3: Routine Oversight\n(Priority Score < 0.5)\n\n• Standard reporting\n• Best-practice benchmarking\n• Capacity buffer readiness",
    "#C8E6C9"
)

# --------------------------------------------------
# Arrows
# --------------------------------------------------
def draw_arrow(x1, y1, x2, y2):
    ax.annotate(
        "",
        xy=(x2, y2),
        xytext=(x1, y1),
        arrowprops=dict(
            arrowstyle="->",
            linewidth=2
        )
    )

draw_arrow(0.50, 0.78, 0.22, 0.66)
draw_arrow(0.50, 0.78, 0.58, 0.66)
draw_arrow(0.50, 0.78, 0.90, 0.66)

# --------------------------------------------------
# Title
# --------------------------------------------------
plt.title(
    "Priority-Driven Intervention Framework for Aadhaar Operations",
    fontsize=16,
    weight="bold",
    pad=20
)

# Save
plt.savefig(
    r"C:\Users\varun\Desktop\output\visual_20_intervention_framework.png",
    bbox_inches="tight"
)
plt.close()
