# Verification Effects Visualization

This notebook visualizes the effects of suppressing or amplifying bias features.

**Purpose:**
- Verify that identified bias features actually cause bias predictions
- Compare suppression vs. amplification vs. random control
- Quantify effect sizes across demographics

**Input Data:**
- Verification results (baseline, suppress, amplify, random)
- Logit gaps before and after manipulation

**Output:**
- 3×3 grid of bar charts showing effects per demographic
- Effect size summary statistics
- Statistical significance tests

In [None]:
import os
import sys
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

PROJECT_ROOT = Path(os.getcwd()).parent.parent
sys.path.append(str(PROJECT_ROOT))

from src.visualization import (
    setup_korean_font,
    load_demographics,
    load_verification_results,
    plot_verification_effects,
    get_demographic_labels
)

print(f"Project root: {PROJECT_ROOT}")

In [None]:
# Setup
setup_korean_font()
sns.set_style('whitegrid')

In [None]:
# Configuration
DATA_DIR = PROJECT_ROOT / "data"
RESULTS_DIR = PROJECT_ROOT / "results"
ASSETS_DIR = PROJECT_ROOT / "notebooks" / "visualizations" / "assets"
ASSETS_DIR.mkdir(exist_ok=True, parents=True)

STAGE = "mock"

print(f"Stage: {STAGE}")

## Load Data

In [None]:
# Load demographics
demographics_dict = load_demographics(DATA_DIR)
demographic_labels_ko, demographic_labels_en = get_demographic_labels(demographics_dict)

# Load verification results
verification_results = load_verification_results(RESULTS_DIR, stage=STAGE)

print(f"Loaded verification results for {len(verification_results)} demographics")

## Plot Verification Effects

In [None]:
fig = plot_verification_effects(
    verification_results=verification_results,
    demographic_labels_ko=demographic_labels_ko,
    demographic_labels_en=demographic_labels_en,
    save_path=ASSETS_DIR / f"verification_effects_{STAGE}.png",
    figsize=(18, 15)
)

plt.show()

## Effect Size Analysis

In [None]:
# Compute effect sizes
effect_sizes = []

for demo_ko in demographic_labels_ko:
    if demo_ko not in verification_results:
        continue
    
    results = verification_results[demo_ko]
    baseline = results.get('baseline_gap_mean', 0)
    
    if baseline > 0:
        suppress_effect = (results.get('suppress_gap_mean', 0) - baseline) / baseline
        amplify_effect = (results.get('amplify_gap_mean', 0) - baseline) / baseline
        random_effect = (results.get('random_gap_mean', 0) - baseline) / baseline
    else:
        suppress_effect = 0
        amplify_effect = 0
        random_effect = 0
    
    effect_sizes.append({
        'Demographic': demo_ko,
        'Baseline Gap': baseline,
        'Suppress Effect (%)': suppress_effect * 100,
        'Amplify Effect (%)': amplify_effect * 100,
        'Random Effect (%)': random_effect * 100,
        'Suppress Std': results.get('suppress_gap_std', 0),
        'Amplify Std': results.get('amplify_gap_std', 0),
    })

df_effects = pd.DataFrame(effect_sizes)

print("\nVerification Effect Sizes:")
print("=" * 100)
print(df_effects.to_string(index=False))

# Save to CSV
df_effects.to_csv(ASSETS_DIR / f"verification_effect_sizes_{STAGE}.csv", index=False)
print(f"\nSaved to {ASSETS_DIR / f'verification_effect_sizes_{STAGE}.csv'}")

## Comparative Analysis

In [None]:
# Compare effect sizes across demographics
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Suppress effects
ax = axes[0]
x = np.arange(len(df_effects))
ax.barh(x, df_effects['Suppress Effect (%)'], color='blue', alpha=0.7)
ax.set_yticks(x)
ax.set_yticklabels(df_effects['Demographic'])
ax.set_xlabel('Effect (%)', fontsize=12)
ax.set_title('억제 효과\nSuppression Effects', fontsize=14, pad=10)
ax.axvline(0, color='black', linewidth=0.8)
ax.grid(axis='x', alpha=0.3)

# Amplify effects
ax = axes[1]
ax.barh(x, df_effects['Amplify Effect (%)'], color='red', alpha=0.7)
ax.set_yticks(x)
ax.set_yticklabels(df_effects['Demographic'])
ax.set_xlabel('Effect (%)', fontsize=12)
ax.set_title('증폭 효과\nAmplification Effects', fontsize=14, pad=10)
ax.axvline(0, color='black', linewidth=0.8)
ax.grid(axis='x', alpha=0.3)

# Random effects (should be near zero)
ax = axes[2]
ax.barh(x, df_effects['Random Effect (%)'], color='orange', alpha=0.7)
ax.set_yticks(x)
ax.set_yticklabels(df_effects['Demographic'])
ax.set_xlabel('Effect (%)', fontsize=12)
ax.set_title('무작위 대조군\nRandom Control', fontsize=14, pad=10)
ax.axvline(0, color='black', linewidth=0.8)
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig(ASSETS_DIR / f"effect_sizes_comparison_{STAGE}.png", dpi=300, bbox_inches='tight')
plt.show()

## Success Criteria Evaluation

In [None]:
# Evaluation criteria:
# - Suppress should reduce gap by >5%
# - Amplify should increase gap by >5%
# - Random should have <5% effect

THRESHOLD = 5.0  # percent

success_counts = {
    'suppress_success': 0,
    'amplify_success': 0,
    'random_minimal': 0,
    'total': len(df_effects)
}

print("\nSuccess Criteria Evaluation:")
print("=" * 100)
print(f"Threshold: ±{THRESHOLD}%\n")

for _, row in df_effects.iterrows():
    demo = row['Demographic']
    suppress = row['Suppress Effect (%)']
    amplify = row['Amplify Effect (%)']
    random = row['Random Effect (%)']
    
    suppress_ok = suppress < -THRESHOLD
    amplify_ok = amplify > THRESHOLD
    random_ok = abs(random) < THRESHOLD
    
    if suppress_ok:
        success_counts['suppress_success'] += 1
    if amplify_ok:
        success_counts['amplify_success'] += 1
    if random_ok:
        success_counts['random_minimal'] += 1
    
    status = '✓' if (suppress_ok and amplify_ok and random_ok) else '✗'
    print(f"{status} {demo:15s} | Suppress: {suppress:+6.1f}% {'✓' if suppress_ok else '✗'} | "
          f"Amplify: {amplify:+6.1f}% {'✓' if amplify_ok else '✗'} | "
          f"Random: {random:+6.1f}% {'✓' if random_ok else '✗'}")

print("\n" + "=" * 100)
print(f"\nOverall Success Rates:")
print(f"  Suppress success: {success_counts['suppress_success']}/{success_counts['total']} "
      f"({success_counts['suppress_success']/success_counts['total']*100:.1f}%)")
print(f"  Amplify success:  {success_counts['amplify_success']}/{success_counts['total']} "
      f"({success_counts['amplify_success']/success_counts['total']*100:.1f}%)")
print(f"  Random minimal:   {success_counts['random_minimal']}/{success_counts['total']} "
      f"({success_counts['random_minimal']/success_counts['total']*100:.1f}%)")

## Statistical Summary

In [None]:
# Summary statistics
print("\nEffect Size Statistics:")
print("=" * 80)

for effect_type in ['Suppress Effect (%)', 'Amplify Effect (%)', 'Random Effect (%)']:
    values = df_effects[effect_type]
    print(f"\n{effect_type}:")
    print(f"  Mean:   {values.mean():+7.2f}%")
    print(f"  Median: {values.median():+7.2f}%")
    print(f"  Std:    {values.std():7.2f}%")
    print(f"  Min:    {values.min():+7.2f}%")
    print(f"  Max:    {values.max():+7.2f}%")

## Interpretation

### Expected Results:

1. **Suppression (Blue bars):**
   - Should show negative effect (decrease in logit gap)
   - Target: >5% reduction
   - Indicates features contribute to bias

2. **Amplification (Red bars):**
   - Should show positive effect (increase in logit gap)
   - Target: >5% increase
   - Confirms features are causal for bias

3. **Random Control (Orange bars):**
   - Should be near zero (<5% change)
   - Validates specificity of identified features
   - Rules out non-specific effects

### What to Look For:

- **Strong Effects:** Which demographics show largest suppression/amplification?
- **Asymmetry:** Are suppress/amplify effects symmetric?
- **Controls:** Are random effects truly minimal?
- **Consistency:** Do effects align with IG² scores?

### Next Steps:

1. Investigate demographics with weak effects
2. Test different feature thresholds
3. Examine individual feature contributions
4. Correlate with real-world bias metrics