In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parents[1]

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)
print("qepc in root?", (PROJECT_ROOT / "qepc").exists())


In [None]:
from qepc.brain.games_loader import fetch_league_games, build_games_table
from qepc.brain.scripts import label_game_scripts_by_total_points
from qepc.brain.lambda_builder import build_script_level_lambdas

season = "2023-24"

team_games = fetch_league_games(season)
games_df = build_games_table(team_games)

print("games_df rows:", len(games_df))
display(games_df.head())

scripts_df = label_game_scripts_by_total_points(
    games_df,
    low_quantile=0.25,
    high_quantile=0.75,
)

print("scripts_df rows:", len(scripts_df))
display(scripts_df.head())

script_lambdas = build_script_level_lambdas(games_df, scripts_df)

print("Script-level lambdas:")
display(script_lambdas)


In [None]:
from qepc.brain.teams_loader import fetch_league_team_season_stats
import pandas as pd

# Advanced team stats (OFF_RATING, DEF_RATING, NET_RATING, PACE, PIE)
team_stats_adv = fetch_league_team_season_stats(
    season,
    measure_type="Advanced",
)

adv_cols_keep = [
    "TEAM_ID",
    "TEAM_NAME",
    "TEAM_ABBREVIATION",
    "GP",
    "W",
    "L",
    "W_PCT",
    "MIN",
    "OFF_RATING",
    "DEF_RATING",
    "NET_RATING",
    "PACE",
    "PIE",
]
adv_cols_keep = [c for c in adv_cols_keep if c in team_stats_adv.columns]
team_adv_small = team_stats_adv[adv_cols_keep].copy()

home_adv = team_adv_small.add_prefix("HOME_")
away_adv = team_adv_small.add_prefix("AWAY_")

games_feat = games_df.copy()

games_feat = games_feat.merge(
    home_adv,
    left_on="HOME_TEAM_ID",
    right_on="HOME_TEAM_ID",
    how="left",
)

games_feat = games_feat.merge(
    away_adv,
    left_on="AWAY_TEAM_ID",
    right_on="AWAY_TEAM_ID",
    how="left",
)

print("games_feat shape:", games_feat.shape)
display(games_feat.head())


In [None]:
games_with_scripts = games_feat.merge(
    scripts_df[["GAME_ID", "SCRIPT_LABEL", "SCRIPT_INDEX"]],
    on="GAME_ID",
    how="left",
)

print("games_with_scripts shape:", games_with_scripts.shape)
display(games_with_scripts.head())


In [None]:
import joblib

model_dir = PROJECT_ROOT / "data" / "processed" / "nba" / "models"
model_path = model_dir / f"script_classifier_rf_{season}.joblib"

print("Loading classifier from:", model_path)
clf = joblib.load(model_path)

print("Loaded classifier:", clf)
print("Classes:", clf.classes_)


In [None]:
import numpy as np

feature_cols = [
    "HOME_OFF_RATING",
    "HOME_DEF_RATING",
    "HOME_NET_RATING",
    "HOME_PACE",
    "HOME_PIE",
    "AWAY_OFF_RATING",
    "AWAY_DEF_RATING",
    "AWAY_NET_RATING",
    "AWAY_PACE",
    "AWAY_PIE",
]

feature_cols = [c for c in feature_cols if c in games_with_scripts.columns]
print("Using feature columns:", feature_cols)

model_df = games_with_scripts.dropna(subset=["SCRIPT_INDEX"]).copy()
X_all = model_df[feature_cols].values.astype(float)

print("model_df shape:", model_df.shape)


In [None]:
# Pick one game to demo
row_idx = 0  # you can change this to inspect a different game

game_row = model_df.iloc[row_idx]
x_row = X_all[row_idx : row_idx + 1]  # 2D slice for sklearn

print("Available columns in game_row:")
print(list(game_row.index))

# Try to show the most informative ones that exist
candidate_cols = [
    "GAME_ID",
    "GAME_DATE",
    "HOME_TEAM_NAME",
    "AWAY_TEAM_NAME",
    "HOME_TEAM",
    "AWAY_TEAM",
    "HOME_TEAM_ABBREVIATION",
    "AWAY_TEAM_ABBREVIATION",
    "HOME_TEAM_TRICODE",
    "AWAY_TEAM_TRICODE",
    "TOTAL_POINTS",
    "SCRIPT_LABEL",
]

preview_cols = [c for c in candidate_cols if c in game_row.index]

print("\nDemo game (subset of columns):")
display(game_row[preview_cols])


In [None]:
probs = clf.predict_proba(x_row)[0]  # shape (3,)
class_order = list(clf.classes_)     # e.g. [0, 1, 2]

# Map class indices to GRIND/BALANCED/CHAOS
i_grind = class_order.index(0)
i_bal   = class_order.index(1)
i_chaos = class_order.index(2)

p_grind = float(probs[i_grind])
p_bal   = float(probs[i_bal])
p_chaos = float(probs[i_chaos])

print("Script probabilities for this game:")
print(f"P_GRIND   = {p_grind:.3f}")
print(f"P_BALANCED= {p_bal:.3f}")
print(f"P_CHAOS   = {p_chaos:.3f}")
print("Sum:", p_grind + p_bal + p_chaos)


In [None]:
from qepc.brain.lambda_builder import expected_total_from_script_mix

expected_total = expected_total_from_script_mix(
    script_lambdas=script_lambdas,
    p_grind=p_grind,
    p_balanced=p_bal,
    p_chaos=p_chaos,
)

print("\nScript-level lambdas:")
display(script_lambdas)

print("\nFor this specific game:")
actual_total = float(game_row["TOTAL_POINTS"])
print(f"Actual final total:         {actual_total:.1f}")
print(f"Script-mixture expected total: {expected_total:.1f}")


In [None]:
import numpy as np

# Global league-average total for the season
league_avg_total = games_df["TOTAL_POINTS"].mean()

print(f"League average total for {season}: {league_avg_total:.2f}")


In [None]:
from qepc.brain.lambda_builder import expected_total_from_script_mix

# 1) Script probabilities for all games
probs_all = clf.predict_proba(X_all)  # shape (n_games, 3)
class_order = list(clf.classes_)      # e.g. [0, 1, 2]

i_grind = class_order.index(0)
i_bal   = class_order.index(1)
i_chaos = class_order.index(2)

model_df_with_preds = model_df.copy()

model_df_with_preds["P_GRIND"]    = probs_all[:, i_grind]
model_df_with_preds["P_BALANCED"] = probs_all[:, i_bal]
model_df_with_preds["P_CHAOS"]    = probs_all[:, i_chaos]

# 2) Use script mixture to get expected total for each game
def _mix_expected_total(row):
    return expected_total_from_script_mix(
        script_lambdas=script_lambdas,
        p_grind=row["P_GRIND"],
        p_balanced=row["P_BALANCED"],
        p_chaos=row["P_CHAOS"],
    )

model_df_with_preds["EXPECTED_TOTAL_QEPC"] = model_df_with_preds.apply(
    _mix_expected_total,
    axis=1,
)

# 3) Baseline prediction (same number for every game)
model_df_with_preds["EXPECTED_TOTAL_BASELINE"] = league_avg_total

print("model_df_with_preds shape:", model_df_with_preds.shape)
display(model_df_with_preds.head())


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

actual = model_df_with_preds["TOTAL_POINTS"].values
pred_qepc = model_df_with_preds["EXPECTED_TOTAL_QEPC"].values
pred_base = model_df_with_preds["EXPECTED_TOTAL_BASELINE"].values

# QEPC script-mixture model
mae_qepc = mean_absolute_error(actual, pred_qepc)
rmse_qepc = np.sqrt(mean_squared_error(actual, pred_qepc))

# Baseline: constant league average
mae_base = mean_absolute_error(actual, pred_base)
rmse_base = np.sqrt(mean_squared_error(actual, pred_base))

print(f"QEPC script-mixture total model ({season})")
print(f"  MAE  (QEPC)     : {mae_qepc:.3f}")
print(f"  RMSE (QEPC)     : {rmse_qepc:.3f}")
print()
print("Baseline: always predict league average total")
print(f"  MAE  (baseline) : {mae_base:.3f}")
print(f"  RMSE (baseline) : {rmse_base:.3f}")
print()

improve_mae = mae_base - mae_qepc
improve_rmse = rmse_base - rmse_qepc
print(f"Improvement in MAE  (baseline - QEPC): {improve_mae:.3f}")
print(f"Improvement in RMSE (baseline - QEPC): {improve_rmse:.3f}")
