# üî¨ QEPC Simple Backtest

This notebook works with your **current project structure** without needing any file modifications.

---

In [None]:
# CELL 1: SETUP
import sys
from pathlib import Path

# Find project root
current = Path.cwd()
project_root = None

for p in [current] + list(current.parents):
    if (p / "data").exists() and (p / "qepc").exists():
        project_root = p
        break

if project_root is None:
    project_root = current.parent.parent  # Fallback

print(f"üìÅ Project root: {project_root}")

# Add to path
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Setup complete!")

In [None]:
# CELL 2: LOAD DATA
data_dir = project_root / "data"

# Try to find game data
game_data = None
for filename in ['raw/TeamStatistics.csv', 'GameResults_2025.csv', 'Games.csv']:
    path = data_dir / filename
    if path.exists():
        try:
            game_data = pd.read_csv(path)
            print(f"‚úÖ Loaded: {filename} ({len(game_data):,} rows)")
            break
        except:
            pass

if game_data is None:
    print("‚ùå No game data found!")
    print(f"   Looked in: {data_dir}")
else:
    # Parse dates - try multiple column names
    date_col = None
    for col in ['gameDate', 'Date', 'date', 'GAME_DATE']:
        if col in game_data.columns:
            date_col = col
            break
    
    if date_col:
        game_data['gameDate'] = pd.to_datetime(game_data[date_col], errors='coerce')
        
        # Drop rows with invalid dates
        valid_dates = game_data['gameDate'].notna()
        invalid_count = (~valid_dates).sum()
        
        if invalid_count > 0:
            print(f"‚ö†Ô∏è Dropped {invalid_count} rows with invalid dates")
            game_data = game_data[valid_dates].copy()
        
        if len(game_data) > 0:
            print(f"üìÖ Date range: {game_data['gameDate'].min().date()} to {game_data['gameDate'].max().date()}")
        else:
            print("‚ùå No valid dates found in data")
    else:
        print(f"‚ö†Ô∏è No date column found. Columns: {list(game_data.columns)[:10]}")
    
    print(f"üìã Columns: {list(game_data.columns)[:8]}...")

In [None]:
# CELL 3: IMPORT QEPC MODULES
try:
    from qepc.sports.nba.strengths_v2 import calculate_advanced_strengths
    from qepc.core.lambda_engine import compute_lambda
    from qepc.core.simulator import run_qepc_simulation
    USE_QEPC = True
    print("‚úÖ QEPC modules loaded!")
except ImportError as e:
    print(f"‚ö†Ô∏è QEPC import error: {e}")
    print("   Will use simple fallback prediction...")
    USE_QEPC = False

In [None]:
# CELL 4: RUN BACKTEST
if game_data is not None:
    print("\nüöÄ Running backtest...")
    
    # Get home games only
    if 'home' in game_data.columns:
        home_games = game_data[game_data['home'] == 1].copy()
    else:
        home_games = game_data.copy()
    
    # Limit to last 30 days of data
    latest = home_games['gameDate'].max()
    cutoff = latest - timedelta(days=30)
    backtest_games = home_games[home_games['gameDate'] >= cutoff].copy()
    
    print(f"üìä Backtesting {len(backtest_games)} games from last 30 days")
    
    results = []
    
    for idx, game in backtest_games.iterrows():
        try:
            # Get team names
            if 'teamName' in game:
                home_team = f"{game.get('teamCity', '')} {game['teamName']}".strip()
                away_team = f"{game.get('opponentTeamCity', '')} {game.get('opponentTeamName', '')}".strip()
            else:
                home_team = game.get('Home Team', 'Home')
                away_team = game.get('Away Team', 'Away')
            
            # Get actual scores
            actual_home = game.get('teamScore', game.get('Home_Score', 110))
            actual_away = game.get('opponentScore', game.get('Away_Score', 108))
            
            # Make prediction
            if USE_QEPC:
                try:
                    strengths = calculate_advanced_strengths(verbose=False)
                    schedule = pd.DataFrame([{'Home Team': home_team, 'Away Team': away_team}])
                    schedule_lambda = compute_lambda(schedule, strengths)
                    predictions = run_qepc_simulation(schedule_lambda, num_trials=2000)
                    
                    pred_home = predictions.iloc[0].get('Sim_Home_Score', 110)
                    pred_away = predictions.iloc[0].get('Sim_Away_Score', 108)
                    home_win_prob = predictions.iloc[0].get('Home_Win_Prob', 0.5)
                except:
                    pred_home, pred_away, home_win_prob = 112, 109, 0.55
            else:
                pred_home, pred_away, home_win_prob = 112, 109, 0.55
            
            # Determine outcomes
            actual_home_won = actual_home > actual_away
            pred_home_won = home_win_prob > 0.5
            
            results.append({
                'Date': game['gameDate'],
                'Home_Team': home_team,
                'Away_Team': away_team,
                'Pred_Home': pred_home,
                'Pred_Away': pred_away,
                'Actual_Home': actual_home,
                'Actual_Away': actual_away,
                'Home_Win_Prob': home_win_prob,
                'Correct': actual_home_won == pred_home_won,
                'Total_Error': abs((pred_home + pred_away) - (actual_home + actual_away)),
            })
        except:
            pass
        
        if len(results) % 20 == 0:
            print(f"   Processed {len(results)} games...")
    
    results_df = pd.DataFrame(results)
    print(f"\n‚úÖ Processed {len(results_df)} games")
else:
    print("‚ùå No game data to backtest")

In [None]:
# CELL 5: SHOW RESULTS
if 'results_df' in dir() and len(results_df) > 0:
    win_acc = results_df['Correct'].mean()
    avg_error = results_df['Total_Error'].mean()
    
    print("\n" + "="*50)
    print("üìä BACKTEST RESULTS")
    print("="*50)
    print(f"Games Analyzed:  {len(results_df)}")
    print(f"Win Accuracy:    {win_acc:.1%}")
    print(f"Avg Total Error: {avg_error:.1f} pts")
    print("="*50)
    
    print("\nüèÜ Best predictions:")
    for _, r in results_df.nsmallest(3, 'Total_Error').iterrows():
        print(f"   {r['Away_Team'][:20]:20} @ {r['Home_Team'][:20]:20} | Error: {r['Total_Error']:.0f}")
    
    print("\n‚ö†Ô∏è Worst predictions:")
    for _, r in results_df.nlargest(3, 'Total_Error').iterrows():
        print(f"   {r['Away_Team'][:20]:20} @ {r['Home_Team'][:20]:20} | Error: {r['Total_Error']:.0f}")
else:
    print("‚ùå No results to show")

In [None]:
# CELL 6: SAVE RESULTS
if 'results_df' in dir() and len(results_df) > 0:
    output_dir = project_root / "data" / "results" / "backtests"
    output_dir.mkdir(parents=True, exist_ok=True)
    
    filename = f"Backtest_{datetime.now():%Y%m%d_%H%M}.csv"
    output_path = output_dir / filename
    
    results_df.to_csv(output_path, index=False)
    print(f"üíæ Saved to: {output_path}")
    
    print(f"\nüìã SUMMARY:")
    print(f"   Win Accuracy: {win_acc:.1%}")
    print(f"   Avg Error: {avg_error:.1f} pts")
    print(f"   Games: {len(results_df)}")

print("\nüèÅ Done!")