# QEPC NBA â€“ Leakage-free Eoin totals backtest
This notebook runs end-to-end using the shared project-root detector and avoids
any future info leakage by rebuilding strengths before each game date.


In [1]:
from pathlib import Path
import sys

# 1) Find repo root (the folder that contains qepc/__init__.py)
PROJECT_ROOT = next(
    p for p in [Path.cwd().resolve()] + list(Path.cwd().resolve().parents)
    if (p / "qepc" / "__init__.py").exists()
)

# 2) Make qepc importable
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# 3) Now this works
from qepc.notebook_bootstrap import ensure_project_root
PROJECT_ROOT = ensure_project_root()

from qepc.utils.paths import get_project_root
PROJECT_ROOT = get_project_root(PROJECT_ROOT)

print("PROJECT_ROOT:", PROJECT_ROOT)


PROJECT_ROOT: C:\Users\wdorsey\qepc_project


In [2]:
from qepc.nba.eoin_data_source import load_eoin_games, load_eoin_team_boxes
from qepc.nba.entanglement import build_team_state_vectors, attach_team_state_to_games

games = load_eoin_games(PROJECT_ROOT)
team_boxes = load_eoin_team_boxes(PROJECT_ROOT)
team_state = build_team_state_vectors(team_boxes, windows=CONFIG['rolling_windows'])
games = attach_team_state_to_games(games, team_state)

print('games shape', games.shape)
print('team_state shape', team_state.shape)


NameError: name 'CONFIG' is not defined

In [None]:
from qepc.nba.matchups_eoin import build_matchups_for_date

def find_score_columns(df, prefix):
    for cand in [f'{prefix}_score', f'{prefix}score', f'{prefix}_points', f'{prefix}points']:
        if cand in df.columns:
            return cand
    return None

def filter_range(df):
    out = df.copy()
    if CONFIG['backtest_start']:
        out = out[out['game_date'] >= CONFIG['backtest_start']]
    if CONFIG['backtest_end']:
        out = out[out['game_date'] <= CONFIG['backtest_end']]
    return out

games_bt = filter_range(games)
results = []

for gdate, day_games in games_bt.groupby('game_date'):
    matchups = build_matchups_for_date(
        gdate,
        modern_cutoff=CONFIG['modern_cutoff'],
        cutoff_date=gdate,
    )
    if matchups.empty:
        continue

    home_col = find_score_columns(day_games, 'home')
    away_col = find_score_columns(day_games, 'away')
    actuals = day_games[['game_id']].copy()
    if home_col:
        actuals['actual_home_pts'] = day_games[home_col]
    if away_col:
        actuals['actual_away_pts'] = day_games[away_col]

    matchups = matchups.merge(actuals, on='game_id', how='left')

    if {'actual_home_pts', 'actual_away_pts'} <= set(matchups.columns):
        matchups['home_abs_err'] = (matchups['actual_home_pts'] - matchups['exp_home_pts']).abs()
        matchups['away_abs_err'] = (matchups['actual_away_pts'] - matchups['exp_away_pts']).abs()
    results.append(matchups)

backtest_df = pd.concat(results, ignore_index=True) if results else pd.DataFrame()
backtest_df.head()


In [None]:
if backtest_df.empty:
    print('No backtest rows built. Check date filters or source data.')
else:
    mae_home = backtest_df['home_abs_err'].mean()
    mae_away = backtest_df['away_abs_err'].mean()
    print('Backtest rows:', len(backtest_df))
    print('MAE home/away:', mae_home, mae_away)
