# üî¨ QEPC Enhanced Backtest with Real Results

This notebook:
1. Uses **actual game results** from TeamStatistics.csv
2. Compares QEPC predictions to real outcomes
3. Calculates detailed accuracy metrics
4. Identifies patterns in prediction errors
5. Generates calibration insights

---

## üîß Setup

In [None]:
from notebook_context import *
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns

# QEPC imports
from qepc.sports.nba.strength import calculate_advanced_strengths
from qepc.sports.nba.lambda_calc import compute_lambda
from qepc.sports.nba.sim import run_qepc_simulation

print("‚úÖ Setup complete")
print(f"üìÅ Project root: {project_root}")

---

## üìä Load Actual Game Results

In [None]:
# Load TeamStatistics.csv with actual results
team_stats_path = project_root / "data" / "raw" / "TeamStatistics.csv"
team_stats = pd.read_csv(team_stats_path)

print(f"üì¶ Loaded {len(team_stats):,} rows from TeamStatistics.csv")

# Parse dates
team_stats['gameDate'] = pd.to_datetime(team_stats['gameDate'], format='mixed')

# Get 2025 season games only
games_2025 = team_stats[team_stats['gameDate'].dt.year == 2025].copy()

# Create team names
games_2025['Team'] = games_2025['teamCity'] + ' ' + games_2025['teamName']
games_2025['Opponent'] = games_2025['opponentTeamCity'] + ' ' + games_2025['opponentTeamName']

# Get only home games (to avoid duplicates)
home_games = games_2025[games_2025['home'] == 1].copy()

print(f"\nüìä 2025 Season:")
print(f"   Date range: {games_2025['gameDate'].min().date()} to {games_2025['gameDate'].max().date()}")
print(f"   Total games: {len(home_games)}")

# Display sample
print(f"\nüèÄ Recent Games (sample):")
sample = home_games.nlargest(5, 'gameDate')[[
    'gameDate', 'Opponent', 'Team', 'opponentScore', 'teamScore', 'win'
]]
for _, game in sample.iterrows():
    date = game['gameDate'].strftime('%Y-%m-%d')
    score = f"{int(game['opponentScore'])}-{int(game['teamScore'])}"
    result = "W" if game['win'] == 1 else "L"
    print(f"   {date}: {game['Opponent']} @ {game['Team']} - {score} ({result})")

---

## üéØ Set Backtest Parameters

In [None]:
# BACKTEST DATE RANGE
# Use dates you have actual results for!

BACKTEST_START = pd.Timestamp("2025-10-22")  # Season start
BACKTEST_END = pd.Timestamp("2025-11-17")    # Latest available data

print(f"üéØ Backtest Configuration:")
print(f"   Start: {BACKTEST_START.date()}")
print(f"   End:   {BACKTEST_END.date()}")
print(f"   Days:  {(BACKTEST_END - BACKTEST_START).days}")

# Filter games to backtest window
backtest_games = home_games[
    (home_games['gameDate'] >= BACKTEST_START) &
    (home_games['gameDate'] <= BACKTEST_END)
].copy()

print(f"\nüìä Games in backtest window: {len(backtest_games)}")

if len(backtest_games) == 0:
    print("\n‚ùå No games found in this date range!")
    print(f"   Available range: {home_games['gameDate'].min().date()} to {home_games['gameDate'].max().date()}")
else:
    print(f"   ‚úÖ Ready to backtest!")

---

## üöÄ Run QEPC Predictions

In [None]:
print("üîÆ Running QEPC predictions on historical games...\n")

predictions = []
errors = []

for idx, game in backtest_games.iterrows():
    home_team = game['Team']
    away_team = game['Opponent']
    game_date = game['gameDate']
    
    try:
        # Calculate team strengths
        strengths = calculate_advanced_strengths()
        
        # Get lambdas
        home_lambda = compute_lambda(home_team, away_team, is_home=True, strengths=strengths)
        away_lambda = compute_lambda(away_team, home_team, is_home=False, strengths=strengths)
        
        # Run simulation
        sim_results = run_qepc_simulation(
            home_team=home_team,
            away_team=away_team,
            home_lambda=home_lambda,
            away_lambda=away_lambda,
            n_sims=10000
        )
        
        # Extract predictions
        pred_home = sim_results['home_score_mean']
        pred_away = sim_results['away_score_mean']
        
        # Actual scores
        actual_home = game['teamScore']
        actual_away = game['opponentScore']
        
        # Store result
        predictions.append({
            'Date': game_date,
            'Home_Team': home_team,
            'Away_Team': away_team,
            
            # Predictions
            'Pred_Home_Score': pred_home,
            'Pred_Away_Score': pred_away,
            'Pred_Total': pred_home + pred_away,
            'Pred_Spread': pred_home - pred_away,
            'Pred_Winner': home_team if pred_home > pred_away else away_team,
            
            # Actuals
            'Actual_Home_Score': actual_home,
            'Actual_Away_Score': actual_away,
            'Actual_Total': actual_home + actual_away,
            'Actual_Spread': actual_home - actual_away,
            'Actual_Winner': home_team if game['win'] == 1 else away_team,
            
            # Errors
            'Error_Home': abs(pred_home - actual_home),
            'Error_Away': abs(pred_away - actual_away),
            'Error_Total': abs((pred_home + pred_away) - (actual_home + actual_away)),
            'Error_Spread': abs((pred_home - pred_away) - (actual_home - actual_away)),
            
            # Correct?
            'Winner_Correct': (pred_home > pred_away) == (game['win'] == 1),
            
            # Confidence metrics
            'Home_Win_Prob': sim_results.get('home_win_prob', 0.5),
            'Confidence': abs(pred_home - pred_away)
        })
        
        # Progress
        if len(predictions) % 10 == 0:
            print(f"   Processed {len(predictions)}/{len(backtest_games)} games...")
            
    except Exception as e:
        errors.append({
            'Date': game_date,
            'Home_Team': home_team,
            'Away_Team': away_team,
            'Error': str(e)
        })
        continue

# Convert to DataFrame
results_df = pd.DataFrame(predictions)

print(f"\n‚úÖ Predictions complete!")
print(f"   Successful: {len(predictions)}")
print(f"   Errors: {len(errors)}")

if len(errors) > 0:
    print(f"\n‚ö†Ô∏è  Errors encountered:")
    for err in errors[:5]:
        print(f"   {err['Date'].date()}: {err['Away_Team']} @ {err['Home_Team']} - {err['Error']}")

---

## üìà Calculate Accuracy Metrics

In [None]:
if len(results_df) > 0:
    print("="*60)
    print("üìä QEPC BACKTEST RESULTS")
    print("="*60)
    
    # Win prediction accuracy
    win_accuracy = results_df['Winner_Correct'].mean()
    print(f"\nüéØ Win Prediction Accuracy: {win_accuracy:.1%}")
    print(f"   Correct: {results_df['Winner_Correct'].sum()}/{len(results_df)}")
    
    # Score accuracy
    print(f"\nüìä Score Prediction Accuracy:")
    print(f"   Avg Home Error:   {results_df['Error_Home'].mean():.2f} points")
    print(f"   Avg Away Error:   {results_df['Error_Away'].mean():.2f} points")
    print(f"   Avg Total Error:  {results_df['Error_Total'].mean():.2f} points")
    print(f"   Avg Spread Error: {results_df['Error_Spread'].mean():.2f} points")
    
    # Error distribution
    print(f"\nüìâ Error Distribution:")
    print(f"   Total Error Median: {results_df['Error_Total'].median():.2f}")
    print(f"   Total Error Std:    {results_df['Error_Total'].std():.2f}")
    print(f"   Total Error Max:    {results_df['Error_Total'].max():.2f}")
    
    # Accuracy by confidence
    high_conf = results_df[results_df['Confidence'] >= 5]
    if len(high_conf) > 0:
        print(f"\nüéØ High Confidence Games (spread >= 5):")
        print(f"   Count: {len(high_conf)}")
        print(f"   Win Accuracy: {high_conf['Winner_Correct'].mean():.1%}")
    
    # Best predictions
    print(f"\nüèÜ Best Predictions (smallest total error):")
    best = results_df.nsmallest(5, 'Error_Total')[[
        'Date', 'Away_Team', 'Home_Team', 
        'Pred_Total', 'Actual_Total', 'Error_Total'
    ]]
    for _, row in best.iterrows():
        print(f"   {row['Date'].date()}: {row['Away_Team']} @ {row['Home_Team']}")
        print(f"      Predicted: {row['Pred_Total']:.1f} | Actual: {row['Actual_Total']:.0f} | Error: {row['Error_Total']:.1f}")
    
    # Worst predictions
    print(f"\n‚ö†Ô∏è  Worst Predictions (largest total error):")
    worst = results_df.nlargest(5, 'Error_Total')[[
        'Date', 'Away_Team', 'Home_Team', 
        'Pred_Total', 'Actual_Total', 'Error_Total'
    ]]
    for _, row in worst.iterrows():
        print(f"   {row['Date'].date()}: {row['Away_Team']} @ {row['Home_Team']}")
        print(f"      Predicted: {row['Pred_Total']:.1f} | Actual: {row['Actual_Total']:.0f} | Error: {row['Error_Total']:.1f}")
    
    print("\n" + "="*60)

else:
    print("‚ùå No predictions to analyze")

---

## üìä Visualize Results

In [None]:
if len(results_df) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. Predicted vs Actual Total
    ax1 = axes[0, 0]
    ax1.scatter(results_df['Actual_Total'], results_df['Pred_Total'], alpha=0.6)
    ax1.plot([200, 250], [200, 250], 'r--', label='Perfect prediction')
    ax1.set_xlabel('Actual Total')
    ax1.set_ylabel('Predicted Total')
    ax1.set_title('Predicted vs Actual Total Score')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 2. Error Distribution
    ax2 = axes[0, 1]
    ax2.hist(results_df['Error_Total'], bins=20, edgecolor='black', alpha=0.7)
    ax2.axvline(results_df['Error_Total'].mean(), color='r', linestyle='--', label='Mean')
    ax2.set_xlabel('Total Error (points)')
    ax2.set_ylabel('Frequency')
    ax2.set_title('Distribution of Total Score Error')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 3. Error Over Time
    ax3 = axes[1, 0]
    results_df_sorted = results_df.sort_values('Date')
    ax3.plot(results_df_sorted['Date'], results_df_sorted['Error_Total'], marker='o', alpha=0.6)
    ax3.axhline(results_df['Error_Total'].mean(), color='r', linestyle='--', label='Mean Error')
    ax3.set_xlabel('Date')
    ax3.set_ylabel('Total Error (points)')
    ax3.set_title('Error Over Time')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    ax3.tick_params(axis='x', rotation=45)
    
    # 4. Win Accuracy by Confidence
    ax4 = axes[1, 1]
    confidence_bins = pd.cut(results_df['Confidence'], bins=[0, 3, 6, 9, 100])
    accuracy_by_conf = results_df.groupby(confidence_bins)['Winner_Correct'].mean()
    accuracy_by_conf.plot(kind='bar', ax=ax4, color='steelblue')
    ax4.axhline(0.5, color='r', linestyle='--', label='Coin flip')
    ax4.set_xlabel('Confidence (Predicted Spread)')
    ax4.set_ylabel('Win Accuracy')
    ax4.set_title('Win Accuracy by Confidence Level')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    ax4.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print("‚úÖ Visualizations complete")
else:
    print("‚ùå No data to visualize")

---

## üíæ Save Results

In [None]:
if len(results_df) > 0:
    # Save detailed results
    output_dir = project_root / "data" / "results" / "backtests"
    output_dir.mkdir(parents=True, exist_ok=True)
    
    filename = f"Enhanced_Backtest_{BACKTEST_START.date()}_to_{BACKTEST_END.date()}.csv"
    output_path = output_dir / filename
    
    results_df.to_csv(output_path, index=False)
    print(f"üíæ Saved detailed results to: {output_path}")
    
    # Save summary
    summary = {
        'Backtest_Period': f"{BACKTEST_START.date()} to {BACKTEST_END.date()}",
        'Games_Analyzed': len(results_df),
        'Win_Accuracy': f"{win_accuracy:.1%}",
        'Avg_Total_Error': f"{results_df['Error_Total'].mean():.2f}",
        'Avg_Spread_Error': f"{results_df['Error_Spread'].mean():.2f}",
        'Median_Total_Error': f"{results_df['Error_Total'].median():.2f}"
    }
    
    summary_path = output_dir / f"Summary_{BACKTEST_START.date()}_to_{BACKTEST_END.date()}.txt"
    with open(summary_path, 'w') as f:
        for key, value in summary.items():
            f.write(f"{key}: {value}\n")
    
    print(f"üíæ Saved summary to: {summary_path}")
    print("\n‚úÖ All results saved!")
else:
    print("‚ùå No results to save")

---

## üéØ Next Steps

### Improve QEPC Based on Results:

1. **If Win Accuracy < 55%:**
   - Adjust lambda calculations
   - Add recency weighting
   - Include rest day adjustments

2. **If Total Error > 15 points:**
   - Calibrate offensive/defensive ratings
   - Add pace adjustments
   - Consider team form metrics

3. **If High Confidence Games Underperform:**
   - Review spread calculations
   - Add upset probability
   - Consider situational factors

### Additional Backtests:

- Split by home/away
- Analyze by team
- Test on different date ranges
- Compare to betting lines

### Integration:

- Use results to calibrate QEPC
- Build confidence intervals
- Create ensemble models
- Develop betting strategies

---

**Your backtest is complete! Use these insights to improve QEPC.** üöÄ