In [1]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parents[1]

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)
print("qepc in root?", (PROJECT_ROOT / "qepc").exists())


PROJECT_ROOT: C:\Users\wdors\qepc_project
qepc in root? True


In [2]:
from qepc.brain.games_loader import fetch_league_games, build_games_table
from qepc.brain.scripts import label_game_scripts_by_total_points
from qepc.brain.lambda_builder import build_script_level_lambdas

season = "2023-24"

team_games = fetch_league_games(season)
games_df = build_games_table(team_games)

print("games_df rows:", len(games_df))
display(games_df.head())

scripts_df = label_game_scripts_by_total_points(
    games_df,
    low_quantile=0.25,
    high_quantile=0.75,
)

print("scripts_df rows:", len(scripts_df))
display(scripts_df.head())

script_lambdas = build_script_level_lambdas(games_df, scripts_df)

print("Script-level lambdas:")
display(script_lambdas)


[games_loader] Fetching LeagueGameLog for season=2023-24, season_type=Regular Season...
[games_loader] Retrieved 2460 team-games.
games_df rows: 1230


Unnamed: 0,GAME_ID,GAME_DATE,SEASON_ID,HOME_TEAM_ID,HOME_TEAM_NAME,HOME_TEAM_ABBREVIATION,HOME_PTS,AWAY_TEAM_ID,AWAY_TEAM_NAME,AWAY_TEAM_ABBREVIATION,AWAY_PTS,HOME_RESULT,MARGIN,TOTAL_POINTS
0,22300061,2023-10-24,22023,1610612743,Denver Nuggets,DEN,119,1610612747,Los Angeles Lakers,LAL,107,W,12,226
1,22300062,2023-10-24,22023,1610612744,Golden State Warriors,GSW,104,1610612756,Phoenix Suns,PHX,108,L,-4,212
2,22300070,2023-10-25,22023,1610612741,Chicago Bulls,CHI,104,1610612760,Oklahoma City Thunder,OKC,124,L,-20,228
3,22300074,2023-10-25,22023,1610612746,LA Clippers,LAC,123,1610612757,Portland Trail Blazers,POR,111,W,12,234
4,22300068,2023-10-25,22023,1610612748,Miami Heat,MIA,103,1610612765,Detroit Pistons,DET,102,W,1,205


scripts_df rows: 1230


Unnamed: 0,GAME_ID,GAME_DATE,TOTAL_POINTS,TOTAL_Q,SCRIPT_LABEL,SCRIPT_INDEX,TOTAL_Q_LOW,TOTAL_Q_HIGH
0,22300061,2023-10-24,226,0.472358,BALANCED,1,214.0,242.0
1,22300062,2023-10-24,212,0.213821,GRIND,0,214.0,242.0
2,22300070,2023-10-25,228,0.510976,BALANCED,1,214.0,242.0
3,22300074,2023-10-25,234,0.623171,BALANCED,1,214.0,242.0
4,22300068,2023-10-25,205,0.114634,GRIND,0,214.0,242.0


Script-level lambdas:


Unnamed: 0,SCRIPT_LABEL,mean_total_pts,std_total_pts,count_games
0,BALANCED,227.651391,7.661779,611
1,CHAOS,254.773463,11.196611,309
2,GRIND,203.677419,9.17534,310


In [3]:
from qepc.brain.teams_loader import fetch_league_team_season_stats
import pandas as pd

# Advanced team stats (OFF_RATING, DEF_RATING, NET_RATING, PACE, PIE)
team_stats_adv = fetch_league_team_season_stats(
    season,
    measure_type="Advanced",
)

adv_cols_keep = [
    "TEAM_ID",
    "TEAM_NAME",
    "TEAM_ABBREVIATION",
    "GP",
    "W",
    "L",
    "W_PCT",
    "MIN",
    "OFF_RATING",
    "DEF_RATING",
    "NET_RATING",
    "PACE",
    "PIE",
]
adv_cols_keep = [c for c in adv_cols_keep if c in team_stats_adv.columns]
team_adv_small = team_stats_adv[adv_cols_keep].copy()

home_adv = team_adv_small.add_prefix("HOME_")
away_adv = team_adv_small.add_prefix("AWAY_")

games_feat = games_df.copy()

games_feat = games_feat.merge(
    home_adv,
    left_on="HOME_TEAM_ID",
    right_on="HOME_TEAM_ID",
    how="left",
)

games_feat = games_feat.merge(
    away_adv,
    left_on="AWAY_TEAM_ID",
    right_on="AWAY_TEAM_ID",
    how="left",
)

print("games_feat shape:", games_feat.shape)
display(games_feat.head())


games_feat shape: (1230, 36)


Unnamed: 0,GAME_ID,GAME_DATE,SEASON_ID,HOME_TEAM_ID,HOME_TEAM_NAME_x,HOME_TEAM_ABBREVIATION,HOME_PTS,AWAY_TEAM_ID,AWAY_TEAM_NAME_x,AWAY_TEAM_ABBREVIATION,...,AWAY_GP,AWAY_W,AWAY_L,AWAY_W_PCT,AWAY_MIN,AWAY_OFF_RATING,AWAY_DEF_RATING,AWAY_NET_RATING,AWAY_PACE,AWAY_PIE
0,22300061,2023-10-24,22023,1610612743,Denver Nuggets,DEN,119,1610612747,Los Angeles Lakers,LAL,...,82,47,35,0.573,3971.0,115.4,114.8,0.6,101.38,0.518
1,22300062,2023-10-24,22023,1610612744,Golden State Warriors,GSW,104,1610612756,Phoenix Suns,PHX,...,82,49,33,0.598,3956.0,116.8,113.7,3.1,99.0,0.527
2,22300070,2023-10-25,22023,1610612741,Chicago Bulls,CHI,104,1610612760,Oklahoma City Thunder,OKC,...,82,57,25,0.695,3961.0,118.3,111.0,7.3,100.85,0.538
3,22300074,2023-10-25,22023,1610612746,LA Clippers,LAC,123,1610612757,Portland Trail Blazers,POR,...,82,21,61,0.256,3976.0,107.6,116.6,-9.0,97.88,0.434
4,22300068,2023-10-25,22023,1610612748,Miami Heat,MIA,103,1610612765,Detroit Pistons,DET,...,82,14,68,0.171,3951.0,109.0,118.0,-9.0,100.45,0.449


In [4]:
games_with_scripts = games_feat.merge(
    scripts_df[["GAME_ID", "SCRIPT_LABEL", "SCRIPT_INDEX"]],
    on="GAME_ID",
    how="left",
)

print("games_with_scripts shape:", games_with_scripts.shape)
display(games_with_scripts.head())


games_with_scripts shape: (1230, 38)


Unnamed: 0,GAME_ID,GAME_DATE,SEASON_ID,HOME_TEAM_ID,HOME_TEAM_NAME_x,HOME_TEAM_ABBREVIATION,HOME_PTS,AWAY_TEAM_ID,AWAY_TEAM_NAME_x,AWAY_TEAM_ABBREVIATION,...,AWAY_L,AWAY_W_PCT,AWAY_MIN,AWAY_OFF_RATING,AWAY_DEF_RATING,AWAY_NET_RATING,AWAY_PACE,AWAY_PIE,SCRIPT_LABEL,SCRIPT_INDEX
0,22300061,2023-10-24,22023,1610612743,Denver Nuggets,DEN,119,1610612747,Los Angeles Lakers,LAL,...,35,0.573,3971.0,115.4,114.8,0.6,101.38,0.518,BALANCED,1
1,22300062,2023-10-24,22023,1610612744,Golden State Warriors,GSW,104,1610612756,Phoenix Suns,PHX,...,33,0.598,3956.0,116.8,113.7,3.1,99.0,0.527,GRIND,0
2,22300070,2023-10-25,22023,1610612741,Chicago Bulls,CHI,104,1610612760,Oklahoma City Thunder,OKC,...,25,0.695,3961.0,118.3,111.0,7.3,100.85,0.538,BALANCED,1
3,22300074,2023-10-25,22023,1610612746,LA Clippers,LAC,123,1610612757,Portland Trail Blazers,POR,...,61,0.256,3976.0,107.6,116.6,-9.0,97.88,0.434,BALANCED,1
4,22300068,2023-10-25,22023,1610612748,Miami Heat,MIA,103,1610612765,Detroit Pistons,DET,...,68,0.171,3951.0,109.0,118.0,-9.0,100.45,0.449,GRIND,0


In [5]:
import joblib

model_dir = PROJECT_ROOT / "data" / "processed" / "nba" / "models"
model_path = model_dir / f"script_classifier_rf_{season}.joblib"

print("Loading classifier from:", model_path)
clf = joblib.load(model_path)

print("Loaded classifier:", clf)
print("Classes:", clf.classes_)


Loading classifier from: C:\Users\wdors\qepc_project\data\processed\nba\models\script_classifier_rf_2023-24.joblib
Loaded classifier: RandomForestClassifier(n_estimators=300, n_jobs=-1, random_state=42)
Classes: [0 1 2]


In [6]:
import numpy as np

feature_cols = [
    "HOME_OFF_RATING",
    "HOME_DEF_RATING",
    "HOME_NET_RATING",
    "HOME_PACE",
    "HOME_PIE",
    "AWAY_OFF_RATING",
    "AWAY_DEF_RATING",
    "AWAY_NET_RATING",
    "AWAY_PACE",
    "AWAY_PIE",
]

feature_cols = [c for c in feature_cols if c in games_with_scripts.columns]
print("Using feature columns:", feature_cols)

model_df = games_with_scripts.dropna(subset=["SCRIPT_INDEX"]).copy()
X_all = model_df[feature_cols].values.astype(float)

print("model_df shape:", model_df.shape)


Using feature columns: ['HOME_OFF_RATING', 'HOME_DEF_RATING', 'HOME_NET_RATING', 'HOME_PACE', 'HOME_PIE', 'AWAY_OFF_RATING', 'AWAY_DEF_RATING', 'AWAY_NET_RATING', 'AWAY_PACE', 'AWAY_PIE']
model_df shape: (1230, 38)


In [7]:
# Pick one game to demo
row_idx = 0  # you can change this to inspect a different game

game_row = model_df.iloc[row_idx]
x_row = X_all[row_idx : row_idx + 1]  # 2D slice for sklearn

print("Available columns in game_row:")
print(list(game_row.index))

# Try to show the most informative ones that exist
candidate_cols = [
    "GAME_ID",
    "GAME_DATE",
    "HOME_TEAM_NAME",
    "AWAY_TEAM_NAME",
    "HOME_TEAM",
    "AWAY_TEAM",
    "HOME_TEAM_ABBREVIATION",
    "AWAY_TEAM_ABBREVIATION",
    "HOME_TEAM_TRICODE",
    "AWAY_TEAM_TRICODE",
    "TOTAL_POINTS",
    "SCRIPT_LABEL",
]

preview_cols = [c for c in candidate_cols if c in game_row.index]

print("\nDemo game (subset of columns):")
display(game_row[preview_cols])


Available columns in game_row:
['GAME_ID', 'GAME_DATE', 'SEASON_ID', 'HOME_TEAM_ID', 'HOME_TEAM_NAME_x', 'HOME_TEAM_ABBREVIATION', 'HOME_PTS', 'AWAY_TEAM_ID', 'AWAY_TEAM_NAME_x', 'AWAY_TEAM_ABBREVIATION', 'AWAY_PTS', 'HOME_RESULT', 'MARGIN', 'TOTAL_POINTS', 'HOME_TEAM_NAME_y', 'HOME_GP', 'HOME_W', 'HOME_L', 'HOME_W_PCT', 'HOME_MIN', 'HOME_OFF_RATING', 'HOME_DEF_RATING', 'HOME_NET_RATING', 'HOME_PACE', 'HOME_PIE', 'AWAY_TEAM_NAME_y', 'AWAY_GP', 'AWAY_W', 'AWAY_L', 'AWAY_W_PCT', 'AWAY_MIN', 'AWAY_OFF_RATING', 'AWAY_DEF_RATING', 'AWAY_NET_RATING', 'AWAY_PACE', 'AWAY_PIE', 'SCRIPT_LABEL', 'SCRIPT_INDEX']

Demo game (subset of columns):


GAME_ID                            0022300061
GAME_DATE                 2023-10-24 00:00:00
HOME_TEAM_ABBREVIATION                    DEN
AWAY_TEAM_ABBREVIATION                    LAL
TOTAL_POINTS                              226
SCRIPT_LABEL                         BALANCED
Name: 0, dtype: object

In [8]:
probs = clf.predict_proba(x_row)[0]  # shape (3,)
class_order = list(clf.classes_)     # e.g. [0, 1, 2]

# Map class indices to GRIND/BALANCED/CHAOS
i_grind = class_order.index(0)
i_bal   = class_order.index(1)
i_chaos = class_order.index(2)

p_grind = float(probs[i_grind])
p_bal   = float(probs[i_bal])
p_chaos = float(probs[i_chaos])

print("Script probabilities for this game:")
print(f"P_GRIND   = {p_grind:.3f}")
print(f"P_BALANCED= {p_bal:.3f}")
print(f"P_CHAOS   = {p_chaos:.3f}")
print("Sum:", p_grind + p_bal + p_chaos)


Script probabilities for this game:
P_GRIND   = 0.033
P_BALANCED= 0.880
P_CHAOS   = 0.087
Sum: 0.9999999999999998


In [9]:
from qepc.brain.lambda_builder import expected_total_from_script_mix

expected_total = expected_total_from_script_mix(
    script_lambdas=script_lambdas,
    p_grind=p_grind,
    p_balanced=p_bal,
    p_chaos=p_chaos,
)

print("\nScript-level lambdas:")
display(script_lambdas)

print("\nFor this specific game:")
actual_total = float(game_row["TOTAL_POINTS"])
print(f"Actual final total:         {actual_total:.1f}")
print(f"Script-mixture expected total: {expected_total:.1f}")



Script-level lambdas:


Unnamed: 0,SCRIPT_LABEL,mean_total_pts,std_total_pts,count_games
0,BALANCED,227.651391,7.661779,611
1,CHAOS,254.773463,11.196611,309
2,GRIND,203.677419,9.17534,310



For this specific game:
Actual final total:         226.0
Script-mixture expected total: 229.2


In [10]:
import numpy as np

# Global league-average total for the season
league_avg_total = games_df["TOTAL_POINTS"].mean()

print(f"League average total for {season}: {league_avg_total:.2f}")


League average total for 2023-24: 228.42


In [11]:
from qepc.brain.lambda_builder import expected_total_from_script_mix

# 1) Script probabilities for all games
probs_all = clf.predict_proba(X_all)  # shape (n_games, 3)
class_order = list(clf.classes_)      # e.g. [0, 1, 2]

i_grind = class_order.index(0)
i_bal   = class_order.index(1)
i_chaos = class_order.index(2)

model_df_with_preds = model_df.copy()

model_df_with_preds["P_GRIND"]    = probs_all[:, i_grind]
model_df_with_preds["P_BALANCED"] = probs_all[:, i_bal]
model_df_with_preds["P_CHAOS"]    = probs_all[:, i_chaos]

# 2) Use script mixture to get expected total for each game
def _mix_expected_total(row):
    return expected_total_from_script_mix(
        script_lambdas=script_lambdas,
        p_grind=row["P_GRIND"],
        p_balanced=row["P_BALANCED"],
        p_chaos=row["P_CHAOS"],
    )

model_df_with_preds["EXPECTED_TOTAL_QEPC"] = model_df_with_preds.apply(
    _mix_expected_total,
    axis=1,
)

# 3) Baseline prediction (same number for every game)
model_df_with_preds["EXPECTED_TOTAL_BASELINE"] = league_avg_total

print("model_df_with_preds shape:", model_df_with_preds.shape)
display(model_df_with_preds.head())


model_df_with_preds shape: (1230, 43)


Unnamed: 0,GAME_ID,GAME_DATE,SEASON_ID,HOME_TEAM_ID,HOME_TEAM_NAME_x,HOME_TEAM_ABBREVIATION,HOME_PTS,AWAY_TEAM_ID,AWAY_TEAM_NAME_x,AWAY_TEAM_ABBREVIATION,...,AWAY_NET_RATING,AWAY_PACE,AWAY_PIE,SCRIPT_LABEL,SCRIPT_INDEX,P_GRIND,P_BALANCED,P_CHAOS,EXPECTED_TOTAL_QEPC,EXPECTED_TOTAL_BASELINE
0,22300061,2023-10-24,22023,1610612743,Denver Nuggets,DEN,119,1610612747,Los Angeles Lakers,LAL,...,0.6,101.38,0.518,BALANCED,1,0.032611,0.88,0.087389,229.239741,228.422764
1,22300062,2023-10-24,22023,1610612744,Golden State Warriors,GSW,104,1610612756,Phoenix Suns,PHX,...,3.1,99.0,0.527,GRIND,0,0.4275,0.530833,0.041667,218.532605,228.422764
2,22300070,2023-10-25,22023,1610612741,Chicago Bulls,CHI,104,1610612760,Oklahoma City Thunder,OKC,...,7.3,100.85,0.538,BALANCED,1,0.044167,0.9025,0.053333,228.039051,228.422764
3,22300074,2023-10-25,22023,1610612746,LA Clippers,LAC,123,1610612757,Portland Trail Blazers,POR,...,-9.0,97.88,0.434,BALANCED,1,0.038,0.513007,0.448993,238.918011,228.422764
4,22300068,2023-10-25,22023,1610612748,Miami Heat,MIA,103,1610612765,Detroit Pistons,DET,...,-9.0,100.45,0.449,GRIND,0,0.090556,0.863889,0.045556,226.715976,228.422764


In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

actual = model_df_with_preds["TOTAL_POINTS"].values
pred_qepc = model_df_with_preds["EXPECTED_TOTAL_QEPC"].values
pred_base = model_df_with_preds["EXPECTED_TOTAL_BASELINE"].values

# QEPC script-mixture model
mae_qepc = mean_absolute_error(actual, pred_qepc)
rmse_qepc = np.sqrt(mean_squared_error(actual, pred_qepc))

# Baseline: constant league average
mae_base = mean_absolute_error(actual, pred_base)
rmse_base = np.sqrt(mean_squared_error(actual, pred_base))

print(f"QEPC script-mixture total model ({season})")
print(f"  MAE  (QEPC)     : {mae_qepc:.3f}")
print(f"  RMSE (QEPC)     : {rmse_qepc:.3f}")
print()
print("Baseline: always predict league average total")
print(f"  MAE  (baseline) : {mae_base:.3f}")
print(f"  RMSE (baseline) : {rmse_base:.3f}")
print()

improve_mae = mae_base - mae_qepc
improve_rmse = rmse_base - rmse_qepc
print(f"Improvement in MAE  (baseline - QEPC): {improve_mae:.3f}")
print(f"Improvement in RMSE (baseline - QEPC): {improve_rmse:.3f}")


QEPC script-mixture total model (2023-24)
  MAE  (QEPC)     : 10.636
  RMSE (QEPC)     : 14.236

Baseline: always predict league average total
  MAE  (baseline) : 16.155
  RMSE (baseline) : 20.266

Improvement in MAE  (baseline - QEPC): 5.519
Improvement in RMSE (baseline - QEPC): 6.030
