# 06 — Monitoring & Drift Detection

**Topics**
- Data vs Concept drift
- Simple univariate drift tests (KS-test) and **PSI (Population Stability Index)**
- Rolling-window monitoring with alerts

**Deliverables**
- Functions to compute PSI & KS
- Synthetic example showing drift emergence

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import ks_2samp

np.random.seed(0)

# Generate baseline and production distributions
baseline = np.random.normal(0, 1, size=5000)
production = np.concatenate([np.random.normal(0.5, 1.2, size=4000), np.random.normal(2.0, 0.8, size=1000)])

def psi(expected, actual, bins=10):
    # Population Stability Index between two arrays.
    e_perc, edges = np.histogram(expected, bins=bins)
    a_perc, _ = np.histogram(actual, bins=edges)
    e_perc = e_perc / np.maximum(1, e_perc.sum())
    a_perc = a_perc / np.maximum(1, a_perc.sum())
    e_perc = np.where(e_perc==0, 1e-6, e_perc)
    a_perc = np.where(a_perc==0, 1e-6, a_perc)
    return float(np.sum((a_perc - e_perc) * np.log(a_perc / e_perc)))

psi_val = psi(baseline, production, bins=20)
ks_stat, ks_p = ks_2samp(baseline, production)

print("PSI:", psi_val)
print("KS statistic:", ks_stat, "p-value:", ks_p)

# Plot distributions
plt.figure()
plt.hist(baseline, bins=40)
plt.title("Baseline distribution"); plt.xlabel("x"); plt.ylabel("count")
plt.show()

plt.figure()
plt.hist(production, bins=40)
plt.title("Production distribution"); plt.xlabel("x"); plt.ylabel("count")
plt.show()

# Rolling monitoring demo (simulate batches)
batches = [np.random.normal(0, 1, size=500) for _ in range(6)]
batches += [np.random.normal(0.6, 1.1, size=500) for _ in range(4)]
psi_vals = [psi(baseline, b, bins=20) for b in batches]

plt.figure()
plt.plot(range(1, len(psi_vals)+1), psi_vals, marker="o")
plt.xlabel("Batch"); plt.ylabel("PSI"); plt.title("PSI over time (drift emerges)")
plt.show()

print("Rule of thumb PSI: <0.1 (stable), 0.1–0.25 (monitor), >0.25 (significant drift)")