<a href="https://colab.research.google.com/github/vrishank-ramineni/ECON3916---Statistics-Machine-Learning/blob/main/Lab_6_The_Architecture_of_Bias.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import seaborn as sns
import pandas as pd
import numpy as np

# 1. Data Ingestion (The Population)
df = sns.load_dataset('titanic')
print(f"Total Population: {len(df)}")
print(f"Population Survival Rate: {df['survived'].mean():.4f}")

# 2. Manual Shuffle (Simulation of Sampling)
# We set a seed to ensure reproducibility for the lesson,
# but in production, this variance happens naturally.
np.random.seed(2026)
indices = np.random.permutation(len(df))

Total Population: 891
Population Survival Rate: 0.3838


In [5]:
# 3. Cut the deck (80/20 Split)
split_point = int(len(df) * 0.8)

# Slicing the shuffled indices
train_idx = indices[:split_point]
test_idx = indices[split_point:]

# Creating the subsets
train_set = df.loc[train_idx]
test_set = df.loc[test_idx]

# 4. Bias Check (The Delta)
train_surv = train_set['survived'].mean()
test_surv = test_set['survived'].mean()
delta = abs(train_surv - test_surv)

print(f"Train Survival Rate: {train_surv:.4f}")
print(f"Test Survival Rate:  {test_surv:.4f}")
print(f"Sampling Bias (Delta): {delta:.4f}")

Train Survival Rate: 0.3736
Test Survival Rate:  0.4246
Sampling Bias (Delta): 0.0510


In [7]:
from sklearn.model_selection import train_test_split

# Stratify by 'pclass' ensures the distribution of classes is identical
X_train, X_test = train_test_split(df, stratify=df['pclass'], test_size=0.2, random_state=42)

print("\n--- Stratified Split ---")
print("Train Class Dist:\n", X_train['pclass'].value_counts(normalize=True))
print("Test Class Dist:\n", X_test['pclass'].value_counts(normalize=True))


--- Stratified Split ---
Train Class Dist:
 pclass
3    0.550562
1    0.242978
2    0.206461
Name: proportion, dtype: float64
Test Class Dist:
 pclass
3    0.553073
1    0.240223
2    0.206704
Name: proportion, dtype: float64


In [9]:
# Sample Ratio Mismatch (SRM) Detection using Chi-Square Test

from scipy.stats import chisquare
import numpy as np

# ================================================================
# A/B Test Forensic Check
# ================================================================

# Observed: What actually happened
observed = np.array([450, 550])  # Control, Treatment

# Expected: What we planned (50/50 split of 1000 users)
expected = np.array([500, 500])  # Control, Treatment

# Perform Chi-Square Test
chi2_stat, p_value = chisquare(f_obs=observed, f_exp=expected)

print("="*60)
print("A/B TEST FORENSIC ANALYSIS: Sample Ratio Mismatch (SRM)")
print("="*60)
print(f"Observed Distribution:  Control={observed[0]}, Treatment={observed[1]}")
print(f"Expected Distribution:  Control={expected[0]}, Treatment={expected[1]}")
print(f"\nChi-Square Statistic: {chi2_stat:.4f}")
print(f"P-Value: {p_value:.6f}")
print("="*60)

# Conclusion
if p_value < 0.01:
    print("CRITICAL FAILURE: Sample Ratio Mismatch (SRM) Detected.")
    print("   CHECK LOAD BALANCER / RANDOMIZATION LOGIC.")
    print("   This split is statistically impossible under random assignment.")
else:
    print("Variance is within natural limits.")
    print("   The observed split is consistent with random chance.")

print("="*60)


# ================================================================
# Why 550/450 is NOT Just "Bad Luck"
# ================================================================

print("\n" + "="*60)
print("WHY THIS MATTERS: The Statistics Behind SRM")
print("="*60)

# Calculate the deviation
deviation = abs(observed[0] - expected[0])
percentage_off = (deviation / expected[0]) * 100

print(f"\nDeviation from Expected: {deviation} users ({percentage_off:.1f}%)")
print(f"\nUnder true random assignment (fair coin flip):")
print(f"  * Probability of 550+ in one group \u2248 0.16% (p={p_value:.6f})")
print(f"  * This is a ~3-sigma event (happens <1 in 600 experiments)")
print(f"\nINTERPRETATION:")
print(f"  If you ran this experiment 1000 times with fair randomization,")
print(f"  you would expect this split (or worse) only \u2248{p_value*1000:.1f} times.")
print(f"\n  This is NOT bad luck. This is a systematic bias.")
print(f"\nLIKELY CAUSES:")
print(f"  * Load balancer misconfiguration")
print(f"  * Hash function collision")
print(f"  * Time-based assignment drift")
print(f"  * Cache/cookie persistence issues")
print("="*60)

A/B TEST FORENSIC ANALYSIS: Sample Ratio Mismatch (SRM)
Observed Distribution:  Control=450, Treatment=550
Expected Distribution:  Control=500, Treatment=500

Chi-Square Statistic: 10.0000
P-Value: 0.001565
CRITICAL FAILURE: Sample Ratio Mismatch (SRM) Detected.
   CHECK LOAD BALANCER / RANDOMIZATION LOGIC.
   This split is statistically impossible under random assignment.

WHY THIS MATTERS: The Statistics Behind SRM

Deviation from Expected: 50 users (10.0%)

Under true random assignment (fair coin flip):
  * Probability of 550+ in one group ≈ 0.16% (p=0.001565)
  * This is a ~3-sigma event (happens <1 in 600 experiments)

INTERPRETATION:
  If you ran this experiment 1000 times with fair randomization,
  you would expect this split (or worse) only ≈1.6 times.

  This is NOT bad luck. This is a systematic bias.

LIKELY CAUSES:
  * Load balancer misconfiguration
  * Hash function collision
  * Time-based assignment drift
  * Cache/cookie persistence issues
