### Bootstrap

In [None]:
# --- repo bootstrap ---------------------------------------------------------
from pathlib import Path
from dotenv import load_dotenv
import os, sys

def repo_root(start: Path) -> Path:
    cur = start.resolve()
    while cur != cur.parent:
        if (cur / ".env").exists() or (cur / ".git").exists():
            return cur
        cur = cur.parent
    raise RuntimeError("repo root not found")

ROOT = repo_root(Path.cwd())
load_dotenv(ROOT / ".env")             # loads secrets
sys.path.append(str(ROOT / "src"))     # optional helpers

DATA_DIR = ROOT / "data"
OUT_DIR  = ROOT / "outputs"
FIG_DIR  = OUT_DIR / "figs"; FIG_DIR.mkdir(exist_ok=True)

print("Repo root:", ROOT)

## Claude Haiku 3.5 Headline Scores

### Generate Visualizations

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  ANALYZE & VISUALIZE Ukraine War Escalation Scores Over Time          ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# ── Load data ──────────────────────────────────────────────────────────
SCORES_CSV = ROOT / "outputs" / "headline_scores_partial.csv"
OUTPUT_DIR = ROOT / "outputs" / "analysis_plots"
OUTPUT_DIR.mkdir(exist_ok=True)

print("📊 Loading scored headlines...")
df = pd.read_csv(SCORES_CSV, parse_dates=['date'])
df = df[df['score'].notna()]  # Remove any NaN scores

print(f"✅ Loaded {len(df):,} scored headlines")
print(f"📅 Date range: {df['date'].min().date()} to {df['date'].max().date()}")

# ── Basic statistics ───────────────────────────────────────────────────
print("\n📈 Basic Statistics:")
print(f"   Mean escalation score: {df['score'].mean():.2f}")
print(f"   Median score: {df['score'].median():.0f}")
print(f"   Std deviation: {df['score'].std():.2f}")

# ── Calculate daily averages ───────────────────────────────────────────
daily_avg = df.groupby(df['date'].dt.date).agg({
    'score': ['mean', 'median', 'std', 'count']
}).round(2)
daily_avg.columns = ['mean_score', 'median_score', 'std_dev', 'count']
daily_avg = daily_avg.reset_index()
daily_avg['date'] = pd.to_datetime(daily_avg['date'])

# Save daily averages
daily_avg_file = ROOT / "outputs" / "daily_escalation_scores.csv"
daily_avg.to_csv(daily_avg_file, index=False)
print(f"\n💾 Saved daily averages to: {daily_avg_file.name}")

# ── Create visualizations ──────────────────────────────────────────────

# 1. Score Distribution Histogram
plt.figure(figsize=(10, 6))
plt.hist(df['score'], bins=11, range=(-0.5, 10.5), edgecolor='black', alpha=0.7)
plt.xlabel('Escalation Score')
plt.ylabel('Number of Headlines')
plt.title('Distribution of Escalation Scores (0-10)')
plt.xticks(range(11))
for i in range(11):
    count = (df['score'] == i).sum()
    plt.text(i, count + 200, str(count), ha='center', va='bottom')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'score_distribution.png', dpi=300, bbox_inches='tight')
plt.close()

# 2. Daily Average Escalation Score Over Time
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(daily_avg['date'], daily_avg['mean_score'], linewidth=1.5, alpha=0.8)
ax.fill_between(daily_avg['date'], 
                daily_avg['mean_score'] - daily_avg['std_dev'],
                daily_avg['mean_score'] + daily_avg['std_dev'],
                alpha=0.2, label='±1 std dev')

# Add 30-day rolling average
rolling_30 = daily_avg.set_index('date')['mean_score'].rolling('30D').mean()
ax.plot(rolling_30.index, rolling_30.values, 'r-', linewidth=2, label='30-day average')

ax.set_xlabel('Date')
ax.set_ylabel('Average Escalation Score')
ax.set_title('Daily Average Escalation Score Over Time')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'daily_escalation_trend.png', dpi=300, bbox_inches='tight')
plt.close()

# 3. Weekly aggregated view
df['week'] = df['date'].dt.to_period('W')
weekly_stats = df.groupby('week').agg({
    'score': ['mean', 'median', 'count']
}).round(2)
weekly_stats.columns = ['mean_score', 'median_score', 'count']
weekly_stats = weekly_stats.reset_index()
weekly_stats['week_start'] = weekly_stats['week'].apply(lambda x: x.start_time)

plt.figure(figsize=(14, 7))
plt.plot(weekly_stats['week_start'], weekly_stats['mean_score'], 'o-', label='Mean', markersize=4)
plt.plot(weekly_stats['week_start'], weekly_stats['median_score'], 's-', label='Median', markersize=4)
plt.xlabel('Week')
plt.ylabel('Escalation Score')
plt.title('Weekly Average Escalation Scores')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'weekly_escalation_trend.png', dpi=300, bbox_inches='tight')
plt.close()

# 4. Heatmap of scores by day of week and hour
df['hour'] = df['date'].dt.hour
df['day_of_week'] = df['date'].dt.day_name()
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

pivot_heatmap = df.pivot_table(values='score', index='hour', columns='day_of_week', aggfunc='mean')
pivot_heatmap = pivot_heatmap[day_order]

plt.figure(figsize=(10, 8))
sns.heatmap(pivot_heatmap, cmap='YlOrRd', annot=True, fmt='.2f', cbar_kws={'label': 'Average Score'})
plt.title('Average Escalation Score by Hour and Day of Week')
plt.ylabel('Hour of Day')
plt.xlabel('Day of Week')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'hourly_weekly_heatmap.png', dpi=300, bbox_inches='tight')
plt.close()

# 5. Monthly trends with box plots
df['month'] = df['date'].dt.to_period('M')
monthly_data = []
for month in df['month'].unique():
    month_scores = df[df['month'] == month]['score']
    monthly_data.append(month_scores)

plt.figure(figsize=(14, 7))
box_positions = range(len(df['month'].unique()))
plt.boxplot(monthly_data, positions=box_positions, widths=0.6)
plt.xticks(box_positions, [str(m) for m in df['month'].unique()], rotation=45, ha='right')
plt.xlabel('Month')
plt.ylabel('Escalation Score')
plt.title('Distribution of Escalation Scores by Month')
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'monthly_boxplots.png', dpi=300, bbox_inches='tight')
plt.close()

# 6. Proportion of high-escalation headlines over time
df['high_escalation'] = df['score'] >= 7
daily_high = df.groupby(df['date'].dt.date).agg({
    'high_escalation': ['sum', 'mean']
})
daily_high.columns = ['count', 'proportion']
daily_high = daily_high.reset_index()
daily_high['date'] = pd.to_datetime(daily_high['date'])

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# Top plot: Count of high-escalation headlines
ax1.plot(daily_high['date'], daily_high['count'], 'r-', alpha=0.7)
ax1.fill_between(daily_high['date'], 0, daily_high['count'], alpha=0.3, color='red')
ax1.set_ylabel('Count of High-Escalation Headlines (≥7)')
ax1.set_title('High-Escalation Headlines Over Time')
ax1.grid(True, alpha=0.3)

# Bottom plot: Proportion
ax2.plot(daily_high['date'], daily_high['proportion'] * 100, 'b-', alpha=0.7)
ax2.fill_between(daily_high['date'], 0, daily_high['proportion'] * 100, alpha=0.3, color='blue')
ax2.set_xlabel('Date')
ax2.set_ylabel('Percentage of High-Escalation Headlines')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'high_escalation_trends.png', dpi=300, bbox_inches='tight')
plt.close()

# 7. Key events analysis (identify days with highest average scores)
top_days = daily_avg.nlargest(10, 'mean_score')[['date', 'mean_score', 'count']]
print("\n🔥 Top 10 Days with Highest Average Escalation:")
for _, row in top_days.iterrows():
    print(f"   {row['date'].date()}: {row['mean_score']:.2f} (n={row['count']})")

# Save this analysis
top_days.to_csv(ROOT / "outputs" / "top_escalation_days.csv", index=False)

# 8. Source analysis
source_stats = df.groupby('source').agg({
    'score': ['mean', 'count', 'std']
}).round(2)
source_stats.columns = ['mean_score', 'count', 'std_dev']
source_stats = source_stats.sort_values('mean_score', ascending=False)
source_stats.to_csv(ROOT / "outputs" / "source_escalation_scores.csv")

print("\n📰 Top 5 Sources by Average Escalation Score:")
for source, row in source_stats.head().iterrows():
    print(f"   {source}: {row['mean_score']:.2f} (n={row['count']})")

# Final summary plot
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(3, 2, hspace=0.3, wspace=0.3)

# Subplot 1: Score distribution
ax1 = fig.add_subplot(gs[0, 0])
ax1.hist(df['score'], bins=11, range=(-0.5, 10.5), edgecolor='black', alpha=0.7)
ax1.set_xlabel('Score')
ax1.set_ylabel('Count')
ax1.set_title('Score Distribution')

# Subplot 2: Daily trend
ax2 = fig.add_subplot(gs[0, 1])
ax2.plot(daily_avg['date'], daily_avg['mean_score'], linewidth=1)
ax2.set_xlabel('Date')
ax2.set_ylabel('Avg Score')
ax2.set_title('Daily Average Score')
ax2.tick_params(axis='x', rotation=45)

# Subplot 3: Monthly boxplot
ax3 = fig.add_subplot(gs[1, :])
monthly_labels = [str(m)[-7:] for m in df['month'].unique()][-12:]  # Last 12 months
monthly_data_recent = monthly_data[-12:]
ax3.boxplot(monthly_data_recent, labels=monthly_labels)
ax3.set_xlabel('Month')
ax3.set_ylabel('Score')
ax3.set_title('Monthly Score Distribution (Last 12 Months)')
ax3.tick_params(axis='x', rotation=45)

# Subplot 4: Source comparison (top 10)
ax4 = fig.add_subplot(gs[2, :])
top_sources = source_stats.head(10)
ax4.barh(range(len(top_sources)), top_sources['mean_score'])
ax4.set_yticks(range(len(top_sources)))
ax4.set_yticklabels(top_sources.index)
ax4.set_xlabel('Average Escalation Score')
ax4.set_title('Top 10 Sources by Average Escalation Score')

plt.suptitle('Ukraine War Headlines Escalation Analysis Summary', fontsize=16, y=0.98)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'analysis_summary.png', dpi=300, bbox_inches='tight')
plt.close()

print(f"\n✅ Analysis complete! All plots saved to: {OUTPUT_DIR.name}/")
print("\n📊 Generated visualizations:")
print("   1. score_distribution.png - Histogram of all scores")
print("   2. daily_escalation_trend.png - Daily averages with 30-day rolling mean")
print("   3. weekly_escalation_trend.png - Weekly aggregated view")
print("   4. hourly_weekly_heatmap.png - Patterns by hour and day of week")
print("   5. monthly_boxplots.png - Monthly distribution boxplots")
print("   6. high_escalation_trends.png - Tracking headlines with scores ≥7")
print("   7. analysis_summary.png - Combined summary dashboard")
print("\n📄 Generated data files:")
print("   - daily_escalation_scores.csv")
print("   - top_escalation_days.csv")
print("   - source_escalation_scores.csv")

### 7-Day Rolling

In [None]:
# --- 7-day rolling mean of *headline* escalation --------------------------
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

ROOT = Path.cwd().resolve().parents[0]
DAILY_CSV = ROOT / "outputs" / "daily_escalation_scores.csv"  # produced earlier

daily = pd.read_csv(DAILY_CSV, parse_dates=["date"])
daily = daily.set_index("date").sort_index()

# 7-day centred rolling mean
daily["roll7"] = daily["mean_score"].rolling(window=7, center=True).mean()

plt.figure(figsize=(12,4))
plt.plot(daily.index, daily["roll7"], lw=2, color="crimson", label="7-day mean")
plt.scatter(daily.index, daily["mean_score"], s=8, alpha=0.3, label="daily mean")
plt.title("Headline escalation index – 7-day rolling mean")
plt.ylabel("escalation score (0-10)")
plt.legend()
plt.tight_layout()
plt.show()

## Claude Haiku 3.5 Truth Social Scores

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  ANALYZE & VISUALIZE Truth Social Escalation Scores Over Time         ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Configuration
ROOT = Path.cwd().resolve().parents[0]
TRUTH_CSV = ROOT / "outputs" / "truth_scores_anthropic_3-5-haiku-20241022.csv"
HEADLINE_DAILY_CSV = ROOT / "outputs" / "daily_escalation_scores.csv"
OUTPUT_DIR = ROOT / "outputs" / "truth_analysis_plots"
OUTPUT_DIR.mkdir(exist_ok=True)

# ── Load Truth Social data ─────────────────────────────────────────────
print("📊 Loading scored Truth Social posts...")
df = pd.read_csv(TRUTH_CSV)

# Convert created_at to datetime with flexible format handling
df['created_at'] = pd.to_datetime(df['created_at'], format='mixed', utc=True)

# Remove any rows with NaN scores
df = df[df['escalation_score'].notna()]  

print(f"✅ Loaded {len(df):,} scored posts")
print(f"📅 Date range: {df['created_at'].min().date()} to {df['created_at'].max().date()}")

# ── Basic statistics ───────────────────────────────────────────────────
print("\n📈 Basic Statistics:")
print(f"   Mean escalation score: {df['escalation_score'].mean():.2f}")
print(f"   Median score: {df['escalation_score'].median():.0f}")
print(f"   Std deviation: {df['escalation_score'].std():.2f}")

# Blame direction stats
blame_counts = df['blame_direction'].value_counts().sort_index()
print("\n🎯 Blame Direction Distribution:")
for direction, count in blame_counts.items():
    label = {-1: "No clear blame", 0: "Ukraine/NATO/West", 1: "Russia/Putin"}[direction]
    print(f"   {label}: {count:,} ({count/len(df)*100:.1f}%)")

# Call-to-action stats
cta_pct = df['has_cta'].mean() * 100
print(f"\n📢 Call-to-Action: {df['has_cta'].sum():,} posts ({cta_pct:.1f}%) have CTAs")

# ── Calculate daily averages ───────────────────────────────────────────
daily_avg = df.groupby(df['created_at'].dt.date).agg({
    'escalation_score': ['mean', 'median', 'std', 'count'],
    'blame_direction': lambda x: (x == 0).mean(),  # % blaming Ukraine/West
    'has_cta': 'mean'  # % with CTA
}).round(2)
daily_avg.columns = ['mean_score', 'median_score', 'std_dev', 'count', 'pct_blame_west', 'pct_cta']
daily_avg = daily_avg.reset_index()
daily_avg['created_at'] = pd.to_datetime(daily_avg['created_at'])

# Save daily averages
daily_avg_file = ROOT / "outputs" / "truth_daily_escalation_scores.csv"
daily_avg.to_csv(daily_avg_file, index=False)
print(f"\n💾 Saved daily averages to: {daily_avg_file.name}")

# ── Create visualizations ──────────────────────────────────────────────

# 1. Score Distribution Histogram with comparison to headlines
plt.figure(figsize=(12, 6))
plt.hist(df['escalation_score'], bins=11, range=(-0.5, 10.5), 
         edgecolor='black', alpha=0.7, label='Truth Social')
plt.xlabel('Escalation Score')
plt.ylabel('Number of Posts')
plt.title('Distribution of Truth Social Escalation Scores (0-10)')
plt.xticks(range(11))
for i in range(11):
    count = (df['escalation_score'] == i).sum()
    plt.text(i, count + 50, str(count), ha='center', va='bottom', fontsize=9)
plt.legend()
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_score_distribution.png', dpi=300, bbox_inches='tight')
plt.close()

# 2. Daily Average with 7-day and 30-day rolling averages
fig, ax = plt.subplots(figsize=(14, 7))
ax.scatter(daily_avg['created_at'], daily_avg['mean_score'], 
           alpha=0.3, s=20, color='gray', label='Daily mean')

# Calculate rolling averages
daily_avg_indexed = daily_avg.set_index('created_at').sort_index()
rolling_7 = daily_avg_indexed['mean_score'].rolling('7D', center=True).mean()
rolling_30 = daily_avg_indexed['mean_score'].rolling('30D').mean()

ax.plot(rolling_7.index, rolling_7.values, 'b-', linewidth=2, label='7-day rolling mean')
ax.plot(rolling_30.index, rolling_30.values, 'r-', linewidth=2, label='30-day rolling mean')

ax.set_xlabel('Date')
ax.set_ylabel('Average Escalation Score')
ax.set_title('Truth Social: Daily Average Escalation Score Over Time')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_daily_escalation_trend.png', dpi=300, bbox_inches='tight')
plt.close()

# 3. OVERLAY PLOT: Truth Social vs Headlines (7-day rolling)
print("\n📊 Creating overlay comparison plot...")

# Load headline data
headline_daily = pd.read_csv(HEADLINE_DAILY_CSV, parse_dates=['date'])
headline_daily = headline_daily.set_index('date').sort_index()
headline_roll7 = headline_daily['mean_score'].rolling(window=7, center=True).mean()

# Prepare Truth Social 7-day rolling
truth_roll7 = rolling_7

# Create overlay plot
fig, ax = plt.subplots(figsize=(14, 8))

# Plot both series
ax.plot(headline_roll7.index, headline_roll7.values, 
        'crimson', linewidth=2.5, label='Headlines (News Media)', alpha=0.8)
ax.plot(truth_roll7.index, truth_roll7.values, 
        'navy', linewidth=2.5, label='Truth Social Posts', alpha=0.8)

# Add scatter points for daily values
ax.scatter(headline_daily.index, headline_daily['mean_score'], 
           alpha=0.15, s=10, color='crimson')
ax.scatter(daily_avg['created_at'], daily_avg['mean_score'], 
           alpha=0.15, s=10, color='navy')

ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Escalation Score (0-10)', fontsize=12)
ax.set_title('Ukraine War Escalation: Headlines vs Truth Social (7-day rolling mean)', fontsize=14)
ax.legend(fontsize=12)
ax.grid(True, alpha=0.3)

# Add annotations for key differences
ax.annotate('Truth Social\nlower baseline', 
            xy=(pd.Timestamp('2023-06-01'), 2.2), 
            xytext=(pd.Timestamp('2023-06-01'), 1.0),
            arrowprops=dict(arrowstyle='->', color='navy', alpha=0.5),
            fontsize=10, ha='center', color='navy')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'overlay_headlines_vs_truth.png', dpi=300, bbox_inches='tight')
plt.show()  # Display in notebook
plt.close()

# 4. Blame direction over time
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# Top: Percentage blaming West/Ukraine
rolling_blame = daily_avg_indexed['pct_blame_west'].rolling('7D', center=True).mean() * 100
ax1.plot(rolling_blame.index, rolling_blame.values, 'orange', linewidth=2)
ax1.fill_between(rolling_blame.index, 0, rolling_blame.values, alpha=0.3, color='orange')
ax1.set_ylabel('% Posts Blaming Ukraine/West')
ax1.set_title('Truth Social: Blame Attribution Over Time (7-day rolling)')
ax1.grid(True, alpha=0.3)

# Bottom: Percentage with Call-to-Action
rolling_cta = daily_avg_indexed['pct_cta'].rolling('7D', center=True).mean() * 100
ax2.plot(rolling_cta.index, rolling_cta.values, 'green', linewidth=2)
ax2.fill_between(rolling_cta.index, 0, rolling_cta.values, alpha=0.3, color='green')
ax2.set_xlabel('Date')
ax2.set_ylabel('% Posts with Call-to-Action')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_blame_cta_trends.png', dpi=300, bbox_inches='tight')
plt.close()

# 5. Correlation analysis between dimensions
corr_matrix = df[['escalation_score', 'blame_direction', 'has_cta']].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": .8})
plt.title('Correlation Between Truth Social Scoring Dimensions')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_correlation_matrix.png', dpi=300, bbox_inches='tight')
plt.close()

# 6. Monthly comparison
df['month'] = df['created_at'].dt.to_period('M')
monthly_stats = df.groupby('month').agg({
    'escalation_score': ['mean', 'std', 'count'],
    'has_cta': 'mean'
}).round(2)
monthly_stats.columns = ['mean_score', 'std_score', 'count', 'pct_cta']
monthly_stats = monthly_stats.reset_index()

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# Top: Monthly mean scores with error bars
months = [str(m) for m in monthly_stats['month']]
x_pos = range(len(months))
ax1.errorbar(x_pos, monthly_stats['mean_score'], yerr=monthly_stats['std_score'], 
             marker='o', capsize=5, capthick=2, linewidth=2)
ax1.set_ylabel('Mean Escalation Score')
ax1.set_title('Truth Social: Monthly Average Escalation Scores')
ax1.grid(True, alpha=0.3, axis='y')

# Bottom: Monthly CTA percentage
ax2.bar(x_pos, monthly_stats['pct_cta'] * 100, alpha=0.7, color='green')
ax2.set_xlabel('Month')
ax2.set_ylabel('% Posts with CTA')
ax2.set_xticks(x_pos)
ax2.set_xticklabels(months, rotation=45, ha='right')
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_monthly_trends.png', dpi=300, bbox_inches='tight')
plt.close()

# 7. High-escalation analysis (scores >= 7)
df['high_escalation'] = df['escalation_score'] >= 7
daily_high = df.groupby(df['created_at'].dt.date).agg({
    'high_escalation': ['sum', 'mean']
})
daily_high.columns = ['count', 'proportion']
daily_high = daily_high.reset_index()
daily_high['created_at'] = pd.to_datetime(daily_high['created_at'])

plt.figure(figsize=(14, 7))
daily_high_indexed = daily_high.set_index('created_at')
rolling_high = daily_high_indexed['proportion'].rolling('7D', center=True).mean() * 100

plt.plot(rolling_high.index, rolling_high.values, 'red', linewidth=2)
plt.fill_between(rolling_high.index, 0, rolling_high.values, alpha=0.3, color='red')
plt.xlabel('Date')
plt.ylabel('% High-Escalation Posts (≥7)')
plt.title('Truth Social: Proportion of High-Escalation Posts (7-day rolling)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_high_escalation_trend.png', dpi=300, bbox_inches='tight')
plt.close()

# 8. Top escalation days
top_days = daily_avg.nlargest(10, 'mean_score')[['created_at', 'mean_score', 'count']]
print("\n🔥 Top 10 Days with Highest Average Escalation (Truth Social):")
for _, row in top_days.iterrows():
    print(f"   {row['created_at'].date()}: {row['mean_score']:.2f} (n={row['count']})")

# Save top days
top_days.to_csv(ROOT / "outputs" / "truth_top_escalation_days.csv", index=False)

# 9. Summary statistics comparison
print("\n📊 COMPARATIVE SUMMARY: Truth Social vs Headlines")
print("=" * 60)

# Calculate headline stats for comparison
if HEADLINE_DAILY_CSV.exists():
    headline_scores = pd.read_csv(ROOT / "outputs" / "headline_scores_partial.csv")
    
    print(f"\nMean Escalation Score:")
    print(f"   Headlines: {headline_scores['score'].mean():.2f}")
    print(f"   Truth Social: {df['escalation_score'].mean():.2f}")
    print(f"   Difference: {df['escalation_score'].mean() - headline_scores['score'].mean():.2f}")
    
    print(f"\nStandard Deviation:")
    print(f"   Headlines: {headline_scores['score'].std():.2f}")
    print(f"   Truth Social: {df['escalation_score'].std():.2f}")

# Final summary dashboard
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# Subplot 1: Score distribution
ax1 = fig.add_subplot(gs[0, 0])
ax1.hist(df['escalation_score'], bins=11, range=(-0.5, 10.5), 
         edgecolor='black', alpha=0.7, color='navy')
ax1.set_xlabel('Score')
ax1.set_ylabel('Count')
ax1.set_title('Escalation Score Distribution')

# Subplot 2: Blame direction pie chart
ax2 = fig.add_subplot(gs[0, 1])
blame_counts = df['blame_direction'].value_counts()
labels = ['No blame', 'Ukraine/West', 'Russia']
colors = ['gray', 'orange', 'red']
wedges, texts, autotexts = ax2.pie(blame_counts.values, labels=labels, colors=colors, 
                                    autopct='%1.1f%%', startangle=90)
ax2.set_title('Blame Attribution')

# Subplot 3: CTA distribution
ax3 = fig.add_subplot(gs[0, 2])
cta_counts = df['has_cta'].value_counts()
ax3.bar(['No CTA', 'Has CTA'], cta_counts.values, color=['lightgray', 'green'])
ax3.set_ylabel('Count')
ax3.set_title('Call-to-Action Distribution')

# Subplot 4: Daily trend (full width)
ax4 = fig.add_subplot(gs[1, :])
ax4.plot(rolling_7.index, rolling_7.values, 'navy', linewidth=2, label='Truth Social')
if 'headline_roll7' in locals():
    ax4.plot(headline_roll7.index, headline_roll7.values, 'crimson', 
             linewidth=2, label='Headlines', alpha=0.7)
ax4.set_xlabel('Date')
ax4.set_ylabel('7-day Rolling Mean')
ax4.set_title('Escalation Score Trends Comparison')
ax4.legend()
ax4.grid(True, alpha=0.3)

# Subplot 5: Monthly averages
ax5 = fig.add_subplot(gs[2, :2])
ax5.plot(range(len(monthly_stats)), monthly_stats['mean_score'], 'o-', linewidth=2)
ax5.set_xticks(range(0, len(monthly_stats), 3))
ax5.set_xticklabels([str(m) for m in monthly_stats['month']][::3], rotation=45)
ax5.set_xlabel('Month')
ax5.set_ylabel('Mean Score')
ax5.set_title('Monthly Average Escalation')
ax5.grid(True, alpha=0.3)

# Subplot 6: Key metrics
ax6 = fig.add_subplot(gs[2, 2])
ax6.axis('off')
metrics_text = f"""Key Metrics:

Total Posts: {len(df):,}
Date Range: {df['created_at'].min().date()} to {df['created_at'].max().date()}

Mean Score: {df['escalation_score'].mean():.2f}
Median: {df['escalation_score'].median():.0f}
Std Dev: {df['escalation_score'].std():.2f}

High Escalation (≥7): {(df['escalation_score'] >= 7).sum():,} ({(df['escalation_score'] >= 7).mean()*100:.1f}%)
Has CTA: {df['has_cta'].sum():,} ({df['has_cta'].mean()*100:.1f}%)
Blames West: {(df['blame_direction'] == 0).sum():,} ({(df['blame_direction'] == 0).mean()*100:.1f}%)
"""
ax6.text(0.1, 0.9, metrics_text, transform=ax6.transAxes, fontsize=10, 
         verticalalignment='top', fontfamily='monospace')

plt.suptitle('Truth Social Ukraine War Posts: Comprehensive Analysis', fontsize=16, y=0.98)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'truth_analysis_summary.png', dpi=300, bbox_inches='tight')
plt.close()

print(f"\n✅ Analysis complete! All plots saved to: {OUTPUT_DIR.name}/")
print("\n📊 Generated visualizations:")
print("   1. truth_score_distribution.png - Histogram of escalation scores")
print("   2. truth_daily_escalation_trend.png - Daily averages with rolling means")
print("   3. overlay_headlines_vs_truth.png - COMPARISON WITH HEADLINES")
print("   4. truth_blame_cta_trends.png - Blame and CTA patterns over time")
print("   5. truth_correlation_matrix.png - Correlation between dimensions")
print("   6. truth_monthly_trends.png - Monthly aggregated view")
print("   7. truth_high_escalation_trend.png - High escalation posts tracking")
print("   8. truth_analysis_summary.png - Comprehensive dashboard")
print("\n📄 Generated data files:")
print("   - truth_daily_escalation_scores.csv")
print("   - truth_top_escalation_days.csv")

## Anthropic Model Agreement Analysis

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  THREE-MODEL COMPARISON: Haiku 3.5 vs Sonnet 4 vs Opus 4             ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
from itertools import combinations

# Configuration
ROOT = Path.cwd().resolve().parents[0]
HAIKU_CSV = ROOT / "outputs" / "truth_scores_anthropic_3-5-haiku-20241022.csv"
SONNET_CSV = ROOT / "outputs" / "truth_scores_anthropic_claude-sonnet-4-20250514.csv"
OPUS_CSV = ROOT / "outputs" / "truth_scores_anthropic_claude-opus-4-20250514.csv"
OUTPUT_DIR = ROOT / "outputs" / "three_model_comparison"
OUTPUT_DIR.mkdir(exist_ok=True)

print("📊 Loading model outputs...")
# Load all three datasets
haiku_df = pd.read_csv(HAIKU_CSV)
sonnet_df = pd.read_csv(SONNET_CSV)
opus_df = pd.read_csv(OPUS_CSV)

# Merge on common identifier columns
merge_cols = ['created_at', 'account', 'id', 'text']

# First merge Haiku and Sonnet
comparison_df = pd.merge(
    haiku_df[merge_cols + ['escalation_score', 'blame_direction', 'has_cta']],
    sonnet_df[merge_cols + ['escalation_score', 'blame_direction', 'has_cta']],
    on=merge_cols,
    suffixes=('_haiku', '_sonnet'),
    how='inner'
)

# Then merge with Opus
comparison_df = pd.merge(
    comparison_df,
    opus_df[merge_cols + ['escalation_score', 'blame_direction', 'has_cta']],
    on=merge_cols,
    how='inner'
)

# Rename Opus columns for consistency
comparison_df.rename(columns={
    'escalation_score': 'escalation_score_opus',
    'blame_direction': 'blame_direction_opus',
    'has_cta': 'has_cta_opus'
}, inplace=True)

print(f"✅ Matched {len(comparison_df)} posts scored by all three models")

# Calculate pairwise differences
comparison_df['diff_haiku_sonnet'] = comparison_df['escalation_score_haiku'] - comparison_df['escalation_score_sonnet']
comparison_df['diff_haiku_opus'] = comparison_df['escalation_score_haiku'] - comparison_df['escalation_score_opus']
comparison_df['diff_sonnet_opus'] = comparison_df['escalation_score_sonnet'] - comparison_df['escalation_score_opus']

# Agreement metrics
print("\n📈 Pairwise Agreement Statistics:")
model_pairs = [('haiku', 'sonnet'), ('haiku', 'opus'), ('sonnet', 'opus')]
for m1, m2 in model_pairs:
    esc_corr = comparison_df[f'escalation_score_{m1}'].corr(comparison_df[f'escalation_score_{m2}'])
    blame_agree = (comparison_df[f'blame_direction_{m1}'] == comparison_df[f'blame_direction_{m2}']).mean()
    cta_agree = (comparison_df[f'has_cta_{m1}'] == comparison_df[f'has_cta_{m2}']).mean()
    
    print(f"\n{m1.capitalize()} vs {m2.capitalize()}:")
    print(f"   Escalation correlation: {esc_corr:.3f}")
    print(f"   Blame agreement: {blame_agree*100:.1f}%")
    print(f"   CTA agreement: {cta_agree*100:.1f}%")

# Overall statistics by model
print("\n📊 Model Statistics:")
for model in ['haiku', 'sonnet', 'opus']:
    esc_mean = comparison_df[f'escalation_score_{model}'].mean()
    esc_std = comparison_df[f'escalation_score_{model}'].std()
    blame_west = (comparison_df[f'blame_direction_{model}'] == 0).mean() * 100
    blame_russia = (comparison_df[f'blame_direction_{model}'] == 1).mean() * 100
    has_cta = comparison_df[f'has_cta_{model}'].mean() * 100
    
    print(f"\n{model.capitalize()}:")
    print(f"   Escalation: mean={esc_mean:.2f}, std={esc_std:.2f}")
    print(f"   Blames West: {blame_west:.1f}%")
    print(f"   Blames Russia: {blame_russia:.1f}%")
    print(f"   Has CTA: {has_cta:.1f}%")

# Create visualizations
fig = plt.figure(figsize=(20, 16))
gs = fig.add_gridspec(4, 3, hspace=0.3, wspace=0.3)
fig.suptitle('Three-Model Comparison: Haiku 3.5 vs Sonnet 4 vs Opus 4', fontsize=18)

# Row 1: Pairwise escalation scatter plots
for i, (m1, m2) in enumerate(model_pairs):
    ax = fig.add_subplot(gs[0, i])
    ax.scatter(comparison_df[f'escalation_score_{m1}'], 
               comparison_df[f'escalation_score_{m2}'],
               alpha=0.3, s=10)
    ax.plot([0, 10], [0, 10], 'r--', alpha=0.5)
    ax.set_xlabel(f'{m1.capitalize()} Score')
    ax.set_ylabel(f'{m2.capitalize()} Score')
    ax.set_title(f'{m1.capitalize()} vs {m2.capitalize()}')
    ax.grid(True, alpha=0.3)
    
    # Add correlation text
    corr = comparison_df[f'escalation_score_{m1}'].corr(comparison_df[f'escalation_score_{m2}'])
    ax.text(0.05, 0.95, f'r = {corr:.3f}', transform=ax.transAxes, 
            verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# Row 2: Escalation score distributions
ax = fig.add_subplot(gs[1, :])
models = ['haiku', 'sonnet', 'opus']
positions = np.arange(11)
width = 0.25

for i, model in enumerate(models):
    counts = comparison_df[f'escalation_score_{model}'].value_counts().sort_index()
    counts = counts.reindex(range(11), fill_value=0)
    ax.bar(positions + i*width, counts.values, width, label=model.capitalize(), alpha=0.7)

ax.set_xlabel('Escalation Score')
ax.set_ylabel('Count')
ax.set_title('Escalation Score Distributions by Model')
ax.set_xticks(positions + width)
ax.set_xticklabels(positions)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

# Row 3: Blame direction comparison
for i, model in enumerate(models):
    ax = fig.add_subplot(gs[2, i])
    blame_counts = comparison_df[f'blame_direction_{model}'].value_counts()
    labels = ['No blame', 'West/NATO', 'Russia']
    label_map = {-1: 'No blame', 0: 'West/NATO', 1: 'Russia'}
    sizes = [blame_counts.get(j, 0) for j in [-1, 0, 1]]
    colors = ['gray', 'orange', 'red']
    
    wedges, texts, autotexts = ax.pie(sizes, labels=labels, colors=colors, 
                                       autopct='%1.1f%%', startangle=90)
    ax.set_title(f'{model.capitalize()} - Blame Attribution')

# Row 4: Three-way agreement analysis
ax1 = fig.add_subplot(gs[3, 0])
# Calculate where all three models agree within 1 point
all_agree_esc = ((abs(comparison_df['diff_haiku_sonnet']) <= 1) & 
                 (abs(comparison_df['diff_haiku_opus']) <= 1) & 
                 (abs(comparison_df['diff_sonnet_opus']) <= 1)).mean() * 100

all_agree_blame = ((comparison_df['blame_direction_haiku'] == comparison_df['blame_direction_sonnet']) & 
                   (comparison_df['blame_direction_haiku'] == comparison_df['blame_direction_opus'])).mean() * 100

all_agree_cta = ((comparison_df['has_cta_haiku'] == comparison_df['has_cta_sonnet']) & 
                 (comparison_df['has_cta_haiku'] == comparison_df['has_cta_opus'])).mean() * 100

agreement_data = [all_agree_esc, all_agree_blame, all_agree_cta]
agreement_labels = ['Escalation\n(within ±1)', 'Blame\nDirection', 'Call to\nAction']

bars = ax1.bar(agreement_labels, agreement_data, color=['blue', 'orange', 'green'], alpha=0.7)
ax1.set_ylabel('Agreement Rate (%)')
ax1.set_title('Three-Way Agreement Rates')
ax1.set_ylim(0, 100)

# Add value labels on bars
for bar, value in zip(bars, agreement_data):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
             f'{value:.1f}%', ha='center', va='bottom')

# Systematic bias analysis
ax2 = fig.add_subplot(gs[3, 1])
mean_scores = [comparison_df[f'escalation_score_{m}'].mean() for m in models]
ax2.bar(models, mean_scores, color=['lightblue', 'lightgreen', 'lightcoral'], alpha=0.7)
ax2.set_ylabel('Mean Escalation Score')
ax2.set_title('Average Escalation by Model')
ax2.set_ylim(0, 3)

for i, (model, score) in enumerate(zip(models, mean_scores)):
    ax2.text(i, score + 0.05, f'{score:.2f}', ha='center', va='bottom')

# Variance in scoring
ax3 = fig.add_subplot(gs[3, 2])
variance_data = comparison_df[['escalation_score_haiku', 'escalation_score_sonnet', 'escalation_score_opus']].var(axis=1)
ax3.hist(variance_data, bins=30, edgecolor='black', alpha=0.7)
ax3.set_xlabel('Variance in Scores')
ax3.set_ylabel('Number of Posts')
ax3.set_title('Distribution of Score Variance Across Models')
ax3.axvline(variance_data.mean(), color='red', linestyle='--', label=f'Mean: {variance_data.mean():.2f}')
ax3.legend()

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'three_model_comparison_dashboard.png', dpi=300, bbox_inches='tight')
plt.show()

# Identify posts with high disagreement
high_variance = comparison_df[variance_data > variance_data.quantile(0.95)].copy()
high_variance['score_variance'] = variance_data[variance_data > variance_data.quantile(0.95)]
print(f"\n⚠️  Found {len(high_variance)} posts with high variance (top 5%)")

# Save comparison data
comparison_df.to_csv(OUTPUT_DIR / 'three_model_comparison_full.csv', index=False)
high_variance[['text', 'escalation_score_haiku', 'escalation_score_sonnet', 
               'escalation_score_opus', 'score_variance']].to_csv(
    OUTPUT_DIR / 'high_variance_posts.csv', index=False)

print(f"\n💾 Results saved to: {OUTPUT_DIR}")

# Recommended model selection
print("\n🎯 Model Selection Guidance:")
print("\nBased on the analysis:")
print("- Haiku 3.5: Highest escalation scores, strongest West-blame attribution")
print("- Sonnet 4: Lowest blame attribution, very low escalation")
print("- Opus 4: Middle ground on blame, lowest escalation scores")
print("\nRecommendation: Validate a sample from high-variance posts to determine")
print("which model best aligns with human judgment.")

### High Variance Posts

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  EXTRACT FOCUSED VALIDATION SAMPLE FROM HIGH-VARIANCE POSTS           ║
# ╚══════════════════════════════════════════════════════════════════════╝
import pandas as pd
from pathlib import Path
import numpy as np

# Configuration
ROOT = Path.cwd().resolve().parents[0]
HIGH_VAR_CSV = ROOT / "outputs" / "three_model_comparison" / "high_variance_posts.csv"
OUTPUT_DIR = ROOT / "outputs" / "focused_validation"
OUTPUT_DIR.mkdir(exist_ok=True)

# Load high variance posts
print("📊 Loading high-variance posts...")
df = pd.read_csv(HIGH_VAR_CSV)
print(f"✅ Loaded {len(df)} high-variance posts")

# Add analysis columns
df['max_score'] = df[['escalation_score_haiku', 'escalation_score_sonnet', 'escalation_score_opus']].max(axis=1)
df['min_score'] = df[['escalation_score_haiku', 'escalation_score_sonnet', 'escalation_score_opus']].min(axis=1)
df['score_range'] = df['max_score'] - df['min_score']

# Categorize disagreement patterns
df['haiku_outlier'] = (
    (df['escalation_score_haiku'] > df['escalation_score_sonnet'] + 2) & 
    (df['escalation_score_haiku'] > df['escalation_score_opus'] + 2)
)

df['sonnet_opus_agree'] = abs(df['escalation_score_sonnet'] - df['escalation_score_opus']) <= 1

# Define validation categories
validation_samples = {}

# Category 1: Haiku scores high (4+) while both others score low (0-1)
cat1 = df[(df['escalation_score_haiku'] >= 4) & 
          (df['escalation_score_sonnet'] <= 1) & 
          (df['escalation_score_opus'] <= 1)]
validation_samples['haiku_high_others_low'] = cat1.head(10)

# Category 2: All three disagree significantly
cat2 = df[(df['score_range'] >= 4) & (~df['sonnet_opus_agree'])]
validation_samples['all_disagree'] = cat2.head(10)

# Category 3: Sonnet and Opus agree but Haiku differs by 3+
cat3 = df[df['sonnet_opus_agree'] & (abs(df['escalation_score_haiku'] - df['escalation_score_opus']) >= 3)]
validation_samples['haiku_outlier_sonnet_opus_agree'] = cat3.head(10)

# Category 4: Posts with "Biden" or "Trump" to check political vs military scoring
political_keywords = df[df['text'].str.contains('Biden|Trump|Democrat|Republican|MAGA', case=False, na=False)]
cat4 = political_keywords[political_keywords['score_variance'] > 3]
validation_samples['political_content'] = cat4.head(10)

# Category 5: Posts with explicit war/military language
military_keywords = df[df['text'].str.contains('nuclear|missile|weapon|bomb|attack|strike', case=False, na=False)]
cat5 = military_keywords[military_keywords['score_variance'] > 3]
validation_samples['military_content'] = cat5.head(5)

# Combine all samples
all_validation = []
for category, sample_df in validation_samples.items():
    sample_copy = sample_df.copy()
    sample_copy['validation_category'] = category
    all_validation.append(sample_copy)

validation_df = pd.concat(all_validation, ignore_index=True)

# Remove duplicates if any post appears in multiple categories
validation_df = validation_df.drop_duplicates(subset=['text'])

# Create human-readable output
output_df = validation_df[[
    'validation_category',
    'text',
    'escalation_score_haiku',
    'escalation_score_sonnet', 
    'escalation_score_opus',
    'score_variance'
]].copy()

# Add blank columns for human scoring
output_df['human_escalation'] = ''
output_df['human_blame'] = ''
output_df['human_cta'] = ''
output_df['human_notes'] = ''

# Save full validation set
output_df.to_csv(OUTPUT_DIR / 'focused_validation_sample.csv', index=False)

# Create a simplified scoring sheet
print("\n📝 Creating simplified scoring sheets...")

# Split into manageable chunks (10 posts per sheet)
chunk_size = 10
for i, chunk_start in enumerate(range(0, len(output_df), chunk_size)):
    chunk = output_df.iloc[chunk_start:chunk_start + chunk_size]
    
    # Create a text file for easier reading
    with open(OUTPUT_DIR / f'validation_batch_{i+1}.txt', 'w', encoding='utf-8') as f:
        f.write(f"VALIDATION BATCH {i+1}\n")
        f.write("=" * 80 + "\n\n")
        
        for idx, row in chunk.iterrows():
            f.write(f"POST #{idx + 1}\n")
            f.write(f"Category: {row['validation_category']}\n")
            f.write(f"Text: {row['text'][:500]}{'...' if len(row['text']) > 500 else ''}\n")
            f.write(f"\nModel Scores:\n")
            f.write(f"  Haiku:  {row['escalation_score_haiku']}\n")
            f.write(f"  Sonnet: {row['escalation_score_sonnet']}\n")
            f.write(f"  Opus:   {row['escalation_score_opus']}\n")
            f.write(f"\nYour Scores:\n")
            f.write(f"  Escalation (0-10): _____\n")
            f.write(f"  Blame (-1/0/1): _____\n")
            f.write(f"  CTA (0/1): _____\n")
            f.write(f"  Notes: _______________________________________________\n")
            f.write("\n" + "-" * 80 + "\n\n")

# Print summary statistics
print(f"\n✅ Extracted {len(validation_df)} posts for focused validation")
print("\n📊 Sample distribution:")
for category, sample_df in validation_samples.items():
    print(f"   {category}: {len(sample_df)} posts")

print(f"\n💾 Files saved to: {OUTPUT_DIR}")
print("   - focused_validation_sample.csv (full data)")
print(f"   - validation_batch_1.txt through validation_batch_{(len(output_df)-1)//chunk_size + 1}.txt (readable format)")

# Show examples of key disagreement patterns
print("\n🔍 Example disagreement patterns:")

print("\n1. Haiku sees escalation, others don't:")
example1 = cat1.iloc[0] if len(cat1) > 0 else None
if example1 is not None:
    print(f"   Text: {example1['text'][:150]}...")
    print(f"   Scores - Haiku: {example1['escalation_score_haiku']}, Sonnet: {example1['escalation_score_sonnet']}, Opus: {example1['escalation_score_opus']}")

print("\n2. Political content scoring:")
example2 = cat4.iloc[0] if len(cat4) > 0 else None
if example2 is not None:
    print(f"   Text: {example2['text'][:150]}...")
    print(f"   Scores - Haiku: {example2['escalation_score_haiku']}, Sonnet: {example2['escalation_score_sonnet']}, Opus: {example2['escalation_score_opus']}")

# Analysis of score patterns
print("\n📈 Score Pattern Analysis:")
print(f"   Posts where Haiku > both others by 3+: {sum(df['haiku_outlier'])}")
print(f"   Posts where Sonnet & Opus agree (±1): {sum(df['sonnet_opus_agree'])}")
print(f"   Average Haiku score in high-variance set: {df['escalation_score_haiku'].mean():.2f}")
print(f"   Average Sonnet score in high-variance set: {df['escalation_score_sonnet'].mean():.2f}")
print(f"   Average Opus score in high-variance set: {df['escalation_score_opus'].mean():.2f}")

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  PLOT Escalation Scores with Major Events Timeline Overlay            ║
# ╚══════════════════════════════════════════════════════════════════════╝
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pathlib import Path
import json
import numpy as np
from datetime import datetime

# Configuration
ROOT = Path.cwd().resolve().parents[0]
HEADLINE_DAILY_CSV = ROOT / "outputs" / "daily_escalation_scores.csv"
TRUTH_DAILY_CSV = ROOT / "outputs" / "truth_daily_escalation_scores.csv"
TIMELINE_JSON = ROOT / "src" / "ukraine-war-timeline.json"
OUTPUT_DIR = ROOT / "outputs" / "timeline_analysis"
OUTPUT_DIR.mkdir(exist_ok=True)

# Load headline data
print("📊 Loading escalation scores...")
headline_daily = pd.read_csv(HEADLINE_DAILY_CSV, parse_dates=['date'])
headline_daily = headline_daily.set_index('date').sort_index()
headline_roll7 = headline_daily['mean_score'].rolling(window=7, center=True).mean()

# Load Truth Social data (best model - likely Sonnet or Opus based on analysis)
truth_daily = pd.read_csv(TRUTH_DAILY_CSV, parse_dates=['created_at'])
truth_daily.rename(columns={'created_at': 'date'}, inplace=True)
truth_daily = truth_daily.set_index('date').sort_index()
truth_roll7 = truth_daily['mean_score'].rolling(window=7, center=True).mean()

# Load timeline events
print("📅 Loading timeline events...")
events = []
with open(TIMELINE_JSON, 'r') as f:
    for line in f:
        if line.strip():
            events.append(json.loads(line))

# Convert events to DataFrame
events_df = pd.DataFrame(events)
events_df['date'] = pd.to_datetime(events_df['date'])

# Filter only major events
major_events = events_df[events_df['major'] == True].copy()

# Create the main plot
fig, ax = plt.subplots(figsize=(16, 9))

# Plot rolling averages
ax.plot(headline_roll7.index, headline_roll7.values, 
        'crimson', linewidth=2.5, label='News Headlines', alpha=0.9)
ax.plot(truth_roll7.index, truth_roll7.values, 
        'navy', linewidth=2.5, label='Truth Social', alpha=0.9)

# Add daily scatter points with lower opacity
ax.scatter(headline_daily.index, headline_daily['mean_score'], 
           alpha=0.15, s=15, color='crimson')
ax.scatter(truth_daily.index, truth_daily['mean_score'], 
           alpha=0.15, s=15, color='navy')

# Add major event vertical lines and labels
for _, event in major_events.iterrows():
    event_date = event['date']
    
    # Only plot if within data range
    if (event_date >= min(headline_roll7.index.min(), truth_roll7.index.min()) and 
        event_date <= max(headline_roll7.index.max(), truth_roll7.index.max())):
        
        # Add vertical line
        ax.axvline(x=event_date, color='red', alpha=0.3, linestyle='--', linewidth=1)
        
        # Add event label
        # Alternate label positions to avoid overlap
        y_position = ax.get_ylim()[1] * 0.95 if major_events.index.get_loc(event.name) % 2 == 0 else ax.get_ylim()[1] * 0.85
        
        ax.text(event_date, y_position, event['label'], 
                rotation=45, fontsize=8, ha='right', va='top',
                bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.5))

# Formatting
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Escalation Score (0-10)', fontsize=12)
ax.set_title('Ukraine War Escalation: News Headlines vs Truth Social with Major Events\n(7-day rolling mean)', 
             fontsize=14, pad=20)
ax.legend(fontsize=12, loc='upper left')
ax.grid(True, alpha=0.3)

# Format x-axis
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
plt.xticks(rotation=45)

# Set y-axis limits
ax.set_ylim(0, max(headline_roll7.max(), truth_roll7.max()) * 1.1)

# Add annotations for key patterns
ax.annotate('Truth Social\nconsistently lower', 
            xy=(pd.Timestamp('2023-06-01'), 2.2), 
            xytext=(pd.Timestamp('2023-08-01'), 1.0),
            arrowprops=dict(arrowstyle='->', color='navy', alpha=0.5),
            fontsize=10, ha='center', color='navy')

ax.annotate('Headlines spike\nwith major events', 
            xy=(pd.Timestamp('2023-06-04'), headline_roll7.loc['2023-06-04']), 
            xytext=(pd.Timestamp('2023-04-01'), 5.5),
            arrowprops=dict(arrowstyle='->', color='crimson', alpha=0.5),
            fontsize=10, ha='center', color='crimson')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'escalation_timeline_overlay.png', dpi=300, bbox_inches='tight')
plt.show()

# Create a focused plot for specific periods of interest
print("\n📊 Creating focused analysis plots...")

# Function to create period-specific plots
def plot_period(start_date, end_date, title_suffix):
    fig, ax = plt.subplots(figsize=(12, 7))
    
    # Filter data for period
    headline_period = headline_roll7[start_date:end_date]
    truth_period = truth_roll7[start_date:end_date]
    events_period = major_events[(major_events['date'] >= start_date) & 
                                 (major_events['date'] <= end_date)]
    
    # Plot data
    ax.plot(headline_period.index, headline_period.values, 
            'crimson', linewidth=2.5, label='News Headlines')
    ax.plot(truth_period.index, truth_period.values, 
            'navy', linewidth=2.5, label='Truth Social')
    
    # Add events
    for _, event in events_period.iterrows():
        ax.axvline(x=event['date'], color='red', alpha=0.4, linestyle='--')
        ax.text(event['date'], ax.get_ylim()[1] * 0.9, event['label'],
                rotation=45, fontsize=9, ha='right', va='top',
                bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.6))
    
    ax.set_xlabel('Date')
    ax.set_ylabel('Escalation Score (0-10)')
    ax.set_title(f'Escalation Patterns: {title_suffix}')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    filename = f"escalation_period_{start_date.strftime('%Y%m')}_{end_date.strftime('%Y%m')}.png"
    plt.savefig(OUTPUT_DIR / filename, dpi=300, bbox_inches='tight')
    plt.close()

# Create period-specific plots
plot_period(pd.Timestamp('2023-05-01'), pd.Timestamp('2023-07-31'), 
            'Ukrainian Counteroffensive Period')
plot_period(pd.Timestamp('2024-01-01'), pd.Timestamp('2024-03-31'), 
            'Russian Winter Offensive 2024')
plot_period(pd.Timestamp('2024-11-01'), pd.Timestamp('2025-02-28'), 
            'Trump Election and Policy Shift')

# Generate event impact analysis
print("\n📈 Analyzing event impacts...")

# Calculate average scores before/after major events
event_impacts = []
for _, event in major_events.iterrows():
    event_date = event['date']
    
    # 7 days before and after
    before_start = event_date - pd.Timedelta(days=14)
    before_end = event_date - pd.Timedelta(days=1)
    after_start = event_date + pd.Timedelta(days=1)
    after_end = event_date + pd.Timedelta(days=14)
    
    # Calculate means if data exists
    try:
        headline_before = headline_daily.loc[before_start:before_end]['mean_score'].mean()
        headline_after = headline_daily.loc[after_start:after_end]['mean_score'].mean()
        truth_before = truth_daily.loc[before_start:before_end]['mean_score'].mean()
        truth_after = truth_daily.loc[after_start:after_end]['mean_score'].mean()
        
        event_impacts.append({
            'event': event['label'],
            'date': event_date,
            'headline_change': headline_after - headline_before,
            'truth_change': truth_after - truth_before,
            'headline_before': headline_before,
            'headline_after': headline_after,
            'truth_before': truth_before,
            'truth_after': truth_after
        })
    except:
        continue

# Save event impact analysis
impact_df = pd.DataFrame(event_impacts)
impact_df.to_csv(OUTPUT_DIR / 'event_impact_analysis.csv', index=False)

print("\n📊 Event Impact Summary:")
print(impact_df[['event', 'headline_change', 'truth_change']].to_string(index=False))

print(f"\n✅ All visualizations saved to: {OUTPUT_DIR}")
print("\n📁 Generated files:")
print("   - escalation_timeline_overlay.png (main comparison with all events)")
print("   - Period-specific analysis plots")
print("   - event_impact_analysis.csv (quantitative impact measures)")