In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parents[1]

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)
print("qepc in root?", (PROJECT_ROOT / "qepc").exists())


In [None]:
import pandas as pd

from qepc.brain.games_loader import fetch_league_games, build_games_table
from qepc.brain.scripts import label_game_scripts_by_total_points
from qepc.brain.lambda_builder import build_script_level_lambdas

season = "2023-24"

team_games = fetch_league_games(season)
games_df = build_games_table(team_games)

print("games_df rows:", len(games_df))
display(games_df.head())

scripts_df = label_game_scripts_by_total_points(
    games_df,
    low_quantile=0.25,
    high_quantile=0.75,
)

print("scripts_df rows:", len(scripts_df))
display(scripts_df.head())

script_lambdas = build_script_level_lambdas(games_df, scripts_df)

print("Script-level lambdas:")
display(script_lambdas)


In [None]:
from qepc.brain.teams_loader import fetch_league_team_season_stats
import numpy as np

# Advanced team stats
team_stats_adv = fetch_league_team_season_stats(
    season,
    measure_type="Advanced",
)

adv_cols_keep = [
    "TEAM_ID",
    "TEAM_NAME",
    "TEAM_ABBREVIATION",
    "GP",
    "W",
    "L",
    "W_PCT",
    "MIN",
    "OFF_RATING",
    "DEF_RATING",
    "NET_RATING",
    "PACE",
    "PIE",
]
adv_cols_keep = [c for c in adv_cols_keep if c in team_stats_adv.columns]
team_adv_small = team_stats_adv[adv_cols_keep].copy()

home_adv = team_adv_small.add_prefix("HOME_")
away_adv = team_adv_small.add_prefix("AWAY_")

games_feat = games_df.copy()

games_feat = games_feat.merge(
    home_adv,
    left_on="HOME_TEAM_ID",
    right_on="HOME_TEAM_ID",
    how="left",
)

games_feat = games_feat.merge(
    away_adv,
    left_on="AWAY_TEAM_ID",
    right_on="AWAY_TEAM_ID",
    how="left",
)

games_with_scripts = games_feat.merge(
    scripts_df[["GAME_ID", "SCRIPT_LABEL", "SCRIPT_INDEX"]],
    on="GAME_ID",
    how="left",
)

print("games_with_scripts shape:", games_with_scripts.shape)
display(games_with_scripts.head())


In [None]:
import joblib

model_dir = PROJECT_ROOT / "data" / "processed" / "nba" / "models"
model_path = model_dir / f"script_classifier_rf_{season}.joblib"

print("Loading classifier from:", model_path)
clf = joblib.load(model_path)
print("Loaded classifier. Classes:", clf.classes_)

feature_cols = [
    "HOME_OFF_RATING",
    "HOME_DEF_RATING",
    "HOME_NET_RATING",
    "HOME_PACE",
    "HOME_PIE",
    "AWAY_OFF_RATING",
    "AWAY_DEF_RATING",
    "AWAY_NET_RATING",
    "AWAY_PACE",
    "AWAY_PIE",
]
feature_cols = [c for c in feature_cols if c in games_with_scripts.columns]
print("Using feature columns:", feature_cols)

model_df = games_with_scripts.dropna(subset=["SCRIPT_INDEX"]).copy()
X_all = model_df[feature_cols].values.astype(float)

print("model_df shape:", model_df.shape)


In [None]:
from qepc.brain.lambda_builder import build_team_script_lambdas

team_script_lambdas = build_team_script_lambdas(
    team_games_df=team_games,
    games_df=games_df,
    scripts_df=scripts_df,
)

print("team_script_lambdas shape:", team_script_lambdas.shape)
display(team_script_lambdas.head())


In [None]:
import numpy as np

def build_team_script_param_dict(team_script_lambdas_df: pd.DataFrame):
    """
    Build a dict mapping (TEAM_ID, TEAM_ROLE, SCRIPT_LABEL) -> (mean_team_pts, std_team_pts)
    """
    d = {}
    for _, row in team_script_lambdas_df.iterrows():
        key = (int(row["TEAM_ID"]), row["TEAM_ROLE"], row["SCRIPT_LABEL"])
        mean_t = float(row["mean_team_pts"])
        std_t = float(row["std_team_pts"]) if not np.isnan(row["std_team_pts"]) else 0.0
        d[key] = (mean_t, std_t)
    return d


team_script_params = build_team_script_param_dict(team_script_lambdas)
print("Sample entries from team_script_params (first 5):")
for i, (k, v) in enumerate(team_script_params.items()):
    print(k, "->", v)
    if i >= 4:
        break


In [None]:
# Fallback: global script-level means for team points (approx half total)
script_fallback = {}
for _, row in script_lambdas.iterrows():
    label = row["SCRIPT_LABEL"]
    mean_total = float(row["mean_total_pts"])
    std_total = float(row["std_total_pts"]) if not np.isnan(row["std_total_pts"]) else 0.0
    script_fallback[label] = (mean_total / 2.0, std_total / 2.0)

print("Script-level fallback (approx per-team Î»):", script_fallback)


In [None]:
def simulate_team_totals_for_row(
    game_row: pd.Series,
    x_row: np.ndarray,
    clf,
    team_script_params: dict,
    script_fallback: dict,
    n_sims: int = 5000,
    random_state: int | None = 42,
):
    """
    Simulate home/away/team totals for a single game using:
      - script probabilities from clf
      - team+script mean/std from team_script_params
      - fallback per-script means if team+script is missing
    """
    rng = np.random.default_rng(random_state)

    # Extract home/away team IDs
    if "HOME_TEAM_ID" not in game_row.index or "AWAY_TEAM_ID" not in game_row.index:
        raise ValueError("game_row must contain HOME_TEAM_ID and AWAY_TEAM_ID")

    home_id = int(game_row["HOME_TEAM_ID"])
    away_id = int(game_row["AWAY_TEAM_ID"])

    # --- 1) Script probabilities from classifier ---
    probs = clf.predict_proba(x_row)[0]
    class_order = list(clf.classes_)  # [0,1,2] mapping to indices

    # Map class index -> script label
    idx_to_label = {0: "GRIND", 1: "BALANCED", 2: "CHAOS"}

    p_grind = float(probs[class_order.index(0)])
    p_bal   = float(probs[class_order.index(1)])
    p_chaos = float(probs[class_order.index(2)])

    p_vec = np.array([p_grind, p_bal, p_chaos])
    labels = np.array(["GRIND", "BALANCED", "CHAOS"])

    # --- 2) Expected totals (analytic, using team-level params) ---

    expected_home = 0.0
    expected_away = 0.0

    for label, p in zip(labels, p_vec):
        # Home
        key_home = (home_id, "HOME", label)
        if key_home in team_script_params:
            mean_home, _ = team_script_params[key_home]
        else:
            mean_home, _ = script_fallback[label]

        # Away
        key_away = (away_id, "AWAY", label)
        if key_away in team_script_params:
            mean_away, _ = team_script_params[key_away]
        else:
            mean_away, _ = script_fallback[label]

        expected_home += p * mean_home
        expected_away += p * mean_away

    expected_total = expected_home + expected_away

    # --- 3) Monte Carlo simulation ---

    # Sample scripts for each universe
    script_indices = rng.choice(len(labels), size=n_sims, p=p_vec)
    sampled_labels = labels[script_indices]

    samples_home = np.empty(n_sims, dtype=float)
    samples_away = np.empty(n_sims, dtype=float)

    for i, label in enumerate(sampled_labels):
        # Home team points
        key_home = (home_id, "HOME", label)
        if key_home in team_script_params:
            mean_home, std_home = team_script_params[key_home]
        else:
            mean_home, std_home = script_fallback[label]

        # Away team points
        key_away = (away_id, "AWAY", label)
        if key_away in team_script_params:
            mean_away, std_away = team_script_params[key_away]
        else:
            mean_away, std_away = script_fallback[label]

        if std_home <= 0:
            samples_home[i] = mean_home
        else:
            samples_home[i] = rng.normal(loc=mean_home, scale=std_home)

        if std_away <= 0:
            samples_away[i] = mean_away
        else:
            samples_away[i] = rng.normal(loc=mean_away, scale=std_away)

    # Clip to keep scores sane
    samples_home = np.clip(samples_home, 70, 150)
    samples_away = np.clip(samples_away, 70, 150)

    samples_total = samples_home + samples_away

    summary_total = {
        "mean_total": float(samples_total.mean()),
        "std_total": float(samples_total.std()),
        "p_over_230": float((samples_total > 230).mean()),
        "p_over_240": float((samples_total > 240).mean()),
        "p_under_220": float((samples_total < 220).mean()),
        "p_between_220_240": float(
            ((samples_total >= 220) & (samples_total <= 240)).mean()
        ),
        "p95_total": float(np.percentile(samples_total, 95)),
        "p05_total": float(np.percentile(samples_total, 5)),
        }

    summary_home = {
        "mean_home": float(samples_home.mean()),
        "std_home": float(samples_home.std()),
        "p_home_over_110": float((samples_home > 110).mean()),
        "p05_home": float(np.percentile(samples_home, 5)),
        "p95_home": float(np.percentile(samples_home, 95)),
        }

    summary_away = {
        "mean_away": float(samples_away.mean()),
        "std_away": float(samples_away.std()),
        "p_away_over_110": float((samples_away > 110).mean()),
        "p05_away": float(np.percentile(samples_away, 5)),
        "p95_away": float(np.percentile(samples_away, 95)),
        }

    return {
        "p_grind": p_grind,
        "p_balanced": p_bal,
        "p_chaos": p_chaos,
        "expected_home": float(expected_home),
        "expected_away": float(expected_away),
        "expected_total": float(expected_total),
        "samples_home": samples_home,
        "samples_away": samples_away,
        "samples_total": samples_total,
        "summary_home": summary_home,
        "summary_away": summary_away,
        "summary_total": summary_total,
    }


In [None]:
import numpy as np

def build_script_param_dict(script_lambdas_df: pd.DataFrame):
    """
    Build a dict:
        label -> (mean_total_pts, std_total_pts)
    """
    d = {}
    for _, row in script_lambdas_df.iterrows():
        label = row["SCRIPT_LABEL"]
        mean_t = float(row["mean_total_pts"])
        std_t = float(row["std_total_pts"]) if not np.isnan(row["std_total_pts"]) else 0.0
        d[label] = (mean_t, std_t)
    return d


script_params = build_script_param_dict(script_lambdas)
print("script_params:", script_params)


In [None]:
def simulate_game_totals_for_row(
    game_row: pd.Series,
    x_row: np.ndarray,
    clf,
    script_params: dict,
    n_sims: int = 5000,
    random_state: int | None = 42,
):
    """
    Simulate total points for a single game using:
      - script probabilities from clf
      - per-script mean/std from script_params
      - Normal approximation for totals in each script

    Returns:
      dict with:
        - p_grind, p_balanced, p_chaos
        - expected_total_qepc
        - samples (np.array of simulated totals)
        - summary stats (mean, std, percentiles)
    """
    rng = np.random.default_rng(random_state)

    # 1) Script probabilities from classifier
    probs = clf.predict_proba(x_row)[0]
    class_order = list(clf.classes_)  # [0,1,2] mapping to [GRIND,BALANCED,CHAOS]

    # Map classes to script labels
    label_for_class = {
        0: "GRIND",
        1: "BALANCED",
        2: "CHAOS",
    }
    # Build a vector of probabilities in GRIND/BALANCED/CHAOS order
    p_grind = float(probs[class_order.index(0)])
    p_bal   = float(probs[class_order.index(1)])
    p_chaos = float(probs[class_order.index(2)])

    p_vec = np.array([p_grind, p_bal, p_chaos])
    labels = np.array(["GRIND", "BALANCED", "CHAOS"])

    # 2) Expected total from mixture (sanity check)
    expected_total_qepc = 0.0
    for label, p in zip(labels, p_vec):
        mean_t, _ = script_params[label]
        expected_total_qepc += p * mean_t

    # 3) Simulate scripts
    script_indices = rng.choice(len(labels), size=n_sims, p=p_vec)
    sampled_labels = labels[script_indices]

    # 4) For each simulation, draw total from Normal(mean_s, std_s)
    samples = np.empty(n_sims, dtype=float)
    for i, label in enumerate(sampled_labels):
        mean_t, std_t = script_params[label]
        if std_t <= 0:
            samples[i] = mean_t
        else:
            samples[i] = rng.normal(loc=mean_t, scale=std_t)

    # (Optional) we can clip unrealistic totals
    samples = np.clip(samples, 120, 320)  # arbitrary, but keeps extremes sane

    # 5) Summaries
    summary = {
        "mean": float(samples.mean()),
        "std": float(samples.std()),
        "p_over_230": float((samples > 230).mean()),
        "p_over_240": float((samples > 240).mean()),
        "p_under_220": float((samples < 220).mean()),
        "p_between_220_240": float(((samples >= 220) & (samples <= 240)).mean()),
        "p95": float(np.percentile(samples, 95)),
        "p05": float(np.percentile(samples, 5)),
    }

    return {
        "p_grind": p_grind,
        "p_balanced": p_bal,
        "p_chaos": p_chaos,
        "expected_total_qepc": float(expected_total_qepc),
        "samples": samples,
        "summary": summary,
    }


In [None]:
# Choose a game
row_idx = 0  # you can change this

game_row = model_df.iloc[row_idx]
x_row = X_all[row_idx : row_idx + 1]

print("Game info (available columns):")
print(list(game_row.index))

candidate_cols = [
    "GAME_ID",
    "GAME_DATE",
    "HOME_TEAM_NAME",
    "AWAY_TEAM_NAME",
    "HOME_TEAM",
    "AWAY_TEAM",
    "HOME_TEAM_ABBREVIATION",
    "AWAY_TEAM_ABBREVIATION",
    "TOTAL_POINTS",
    "SCRIPT_LABEL",
]
preview_cols = [c for c in candidate_cols if c in game_row.index]

print("\nDemo game:")
display(game_row[preview_cols])

result = simulate_game_totals_for_row(
    game_row=game_row,
    x_row=x_row,
    clf=clf,
    script_params=script_params,
    n_sims=5000,
    random_state=123,
)

print("\nScript probabilities:")
print(f"P_GRIND    = {result['p_grind']:.3f}")
print(f"P_BALANCED = {result['p_balanced']:.3f}")
print(f"P_CHAOS    = {result['p_chaos']:.3f}")
print(f"Sum        = {result['p_grind'] + result['p_balanced'] + result['p_chaos']:.3f}")

print("\nExpected total from mixture:", f"{result['expected_total_qepc']:.1f}")

print("\nSimulated summary from multiverse:")
for k, v in result["summary"].items():
    print(f"  {k}: {v:.3f}")

actual_total = float(game_row["TOTAL_POINTS"])
print(f"\nActual final total: {actual_total:.1f}")


In [None]:
import numpy as np
import pandas as pd

all_results = []

n_sims_per_game = 2000  # you can tweak this up/down

n_games = len(model_df)
print(f"Simulating {n_games} games with {n_sims_per_game} sims each...")

for i in range(n_games):
    game_row = model_df.iloc[i]
    x_row = X_all[i : i + 1]  # keep 2D for sklearn

    res = simulate_game_totals_for_row(
        game_row=game_row,
        x_row=x_row,
        clf=clf,
        script_params=script_params,
        n_sims=n_sims_per_game,
        random_state=123 + i,  # different seed per game
    )

    actual_total = float(game_row["TOTAL_POINTS"])

    # Grab script label if present
    script_label = game_row.get("SCRIPT_LABEL", None)

    summary = res["summary"]

    all_results.append(
        {
            "GAME_ID": game_row["GAME_ID"],
            "SCRIPT_LABEL": script_label,
            "ACTUAL_TOTAL": actual_total,
            "P_GRIND": res["p_grind"],
            "P_BALANCED": res["p_balanced"],
            "P_CHAOS": res["p_chaos"],
            "EXPECTED_TOTAL_QEPC": res["expected_total_qepc"],
            "MEAN_SIM": summary["mean"],
            "STD_SIM": summary["std"],
            "P_OVER_230": summary["p_over_230"],
            "P_OVER_240": summary["p_over_240"],
            "P_UNDER_220": summary["p_under_220"],
            "P_BETWEEN_220_240": summary["p_between_220_240"],
            "P05": summary["p05"],
            "P95": summary["p95"],
        }
    )

    if (i + 1) % 100 == 0:
        print(f"  ... {i + 1}/{n_games} games simulated")

eval_df = pd.DataFrame(all_results)

print("\nEvaluation dataframe shape:", eval_df.shape)
display(eval_df.head())


In [None]:
# Is the actual total inside the simulated 90% interval [P05, P95]?
eval_df["INSIDE_90"] = (
    (eval_df["ACTUAL_TOTAL"] >= eval_df["P05"])
    & (eval_df["ACTUAL_TOTAL"] <= eval_df["P95"])
)

coverage_90 = eval_df["INSIDE_90"].mean()

print(f"Fraction of games where actual total is inside [P05, P95]: {coverage_90:.3f}")
print(f"Expected for a well-calibrated 90% interval: ~0.90")


In [None]:
if "SCRIPT_LABEL" in eval_df.columns:
    coverage_by_script = eval_df.groupby("SCRIPT_LABEL")["INSIDE_90"].mean()
    print("Coverage by script label:")
    display(coverage_by_script)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

actual = eval_df["ACTUAL_TOTAL"].values
pred_sim_mean = eval_df["MEAN_SIM"].values

mae_sim = mean_absolute_error(actual, pred_sim_mean)
rmse_sim = np.sqrt(mean_squared_error(actual, pred_sim_mean))

print(f"Simulation-based mean forecast:")
print(f"  MAE  : {mae_sim:.3f}")
print(f"  RMSE : {rmse_sim:.3f}")


In [None]:
# Choose a game (same as before for comparison)
row_idx = 0  # you can change this

game_row = model_df.iloc[row_idx]
x_row = X_all[row_idx : row_idx + 1]

candidate_cols = [
    "GAME_ID",
    "GAME_DATE",
    "HOME_TEAM_ABBREVIATION",
    "AWAY_TEAM_ABBREVIATION",
    "HOME_TEAM_NAME",
    "AWAY_TEAM_NAME",
    "TOTAL_POINTS",
    "SCRIPT_LABEL",
]
preview_cols = [c for c in candidate_cols if c in game_row.index]

print("Game info:")
display(game_row[preview_cols])

result_team = simulate_team_totals_for_row(
    game_row=game_row,
    x_row=x_row,
    clf=clf,
    team_script_params=team_script_params,
    script_fallback=script_fallback,
    n_sims=5000,
    random_state=123,
)

print("\nScript probabilities (from classifier):")
print(f"P_GRIND    = {result_team['p_grind']:.3f}")
print(f"P_BALANCED = {result_team['p_balanced']:.3f}")
print(f"P_CHAOS    = {result_team['p_chaos']:.3f}")

print("\nAnalytic expectations (team-aware):")
print(f"Expected home pts:  {result_team['expected_home']:.1f}")
print(f"Expected away pts:  {result_team['expected_away']:.1f}")
print(f"Expected total pts: {result_team['expected_total']:.1f}")

actual_total = float(game_row["TOTAL_POINTS"])
print(f"\nActual final total: {actual_total:.1f}")

print("\nSimulation summary (home):")
for k, v in result_team["summary_home"].items():
    print(f"  {k}: {v:.3f}")

print("\nSimulation summary (away):")
for k, v in result_team["summary_away"].items():
    print(f"  {k}: {v:.3f}")

print("\nSimulation summary (total):")
for k, v in result_team["summary_total"].items():
    print(f"  {k}: {v:.3f}")


In [None]:
import pandas as pd

# team_games came from fetch_league_games(season)
# games_df came from build_games_table(team_games)

if "PTS" in team_games.columns:
    pts_col = "PTS"
elif "TEAM_POINTS" in team_games.columns:
    pts_col = "TEAM_POINTS"
else:
    raise ValueError("team_games must contain 'PTS' or 'TEAM_POINTS'.")

team_pts = team_games[["GAME_ID", "TEAM_ID", pts_col]].copy()
team_pts = team_pts.rename(columns={pts_col: "TEAM_POINTS"})

gsmall = games_df[["GAME_ID", "HOME_TEAM_ID", "AWAY_TEAM_ID"]].copy()

merged_scores = team_pts.merge(gsmall, on="GAME_ID", how="left")

home_scores = merged_scores[
    merged_scores["TEAM_ID"] == merged_scores["HOME_TEAM_ID"]
][["GAME_ID", "TEAM_POINTS"]].rename(columns={"TEAM_POINTS": "HOME_PTS"})

away_scores = merged_scores[
    merged_scores["TEAM_ID"] == merged_scores["AWAY_TEAM_ID"]
][["GAME_ID", "TEAM_POINTS"]].rename(columns={"TEAM_POINTS": "AWAY_PTS"})

game_scores = home_scores.merge(away_scores, on="GAME_ID", how="inner")

game_scores["GAME_ID"] = game_scores["GAME_ID"].astype(str)

print("game_scores shape:", game_scores.shape)
display(game_scores.head())

score_map = game_scores.set_index("GAME_ID")[["HOME_PTS", "AWAY_PTS"]].to_dict("index")
print("Sample score_map entry:", next(iter(score_map.items())))


In [None]:
import numpy as np

all_team_results = []

n_sims_per_game = 2000
n_games = len(model_df)
print(f"Simulating {n_games} games with {n_sims_per_game} sims each (team-aware)...")

for i in range(n_games):
    game_row = model_df.iloc[i]
    x_row = X_all[i : i + 1]

    game_id_str = str(game_row["GAME_ID"])
    if game_id_str not in score_map:
        continue

    actual_home = float(score_map[game_id_str]["HOME_PTS"])
    actual_away = float(score_map[game_id_str]["AWAY_PTS"])
    actual_total = actual_home + actual_away

    res = simulate_team_totals_for_row(
        game_row=game_row,
        x_row=x_row,
        clf=clf,
        team_script_params=team_script_params,
        script_fallback=script_fallback,
        n_sims=n_sims_per_game,
        random_state=123 + i,
    )

    sh = res["summary_home"]
    sa = res["summary_away"]
    st = res["summary_total"]

    all_team_results.append(
        {
            "GAME_ID": game_id_str,
            "SCRIPT_LABEL": game_row.get("SCRIPT_LABEL", None),
            "ACTUAL_HOME": actual_home,
            "ACTUAL_AWAY": actual_away,
            "ACTUAL_TOTAL": actual_total,
            "EXP_HOME": res["expected_home"],
            "EXP_AWAY": res["expected_away"],
            "EXP_TOTAL": res["expected_total"],
            "MEAN_HOME_SIM": sh["mean_home"],
            "MEAN_AWAY_SIM": sa["mean_away"],
            "MEAN_TOTAL_SIM": st["mean_total"],
            "HOME_P05": sh["p05_home"],
            "HOME_P95": sh["p95_home"],
            "AWAY_P05": sa["p05_away"],
            "AWAY_P95": sa["p95_away"],
            "TOTAL_P05": st["p05_total"],
            "TOTAL_P95": st["p95_total"],
        }
    )

    if (i + 1) % 100 == 0:
        print(f"  ... {i + 1}/{n_games} games processed")

eval_team_df = pd.DataFrame(all_team_results)

print("\nEvaluation team-level dataframe shape:", eval_team_df.shape)
display(eval_team_df.head())


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

league_home_avg = game_scores["HOME_PTS"].mean()
league_away_avg = game_scores["AWAY_PTS"].mean()
league_total_avg = games_df["TOTAL_POINTS"].mean()

print(f"League average home pts : {league_home_avg:.2f}")
print(f"League average away pts : {league_away_avg:.2f}")
print(f"League average total pts: {league_total_avg:.2f}")

act_home = eval_team_df["ACTUAL_HOME"].values
act_away = eval_team_df["ACTUAL_AWAY"].values
act_total = eval_team_df["ACTUAL_TOTAL"].values

pred_home_qepc = eval_team_df["EXP_HOME"].values
pred_away_qepc = eval_team_df["EXP_AWAY"].values
pred_total_qepc = eval_team_df["EXP_TOTAL"].values

pred_home_base = np.full_like(act_home, league_home_avg, dtype=float)
pred_away_base = np.full_like(act_away, league_away_avg, dtype=float)
pred_total_base = np.full_like(act_total, league_total_avg, dtype=float)

def mae_rmse(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return mae, rmse

mae_home_q, rmse_home_q = mae_rmse(act_home, pred_home_qepc)
mae_home_b, rmse_home_b = mae_rmse(act_home, pred_home_base)

mae_away_q, rmse_away_q = mae_rmse(act_away, pred_away_qepc)
mae_away_b, rmse_away_b = mae_rmse(act_away, pred_away_base)

mae_total_q, rmse_total_q = mae_rmse(act_total, pred_total_qepc)
mae_total_b, rmse_total_b = mae_rmse(act_total, pred_total_base)

print("\nHome points forecast:")
print(f"  QEPC   MAE: {mae_home_q:.3f}, RMSE: {rmse_home_q:.3f}")
print(f"  Base   MAE: {mae_home_b:.3f}, RMSE: {rmse_home_b:.3f}")

print("\nAway points forecast:")
print(f"  QEPC   MAE: {mae_away_q:.3f}, RMSE: {rmse_away_q:.3f}")
print(f"  Base   MAE: {mae_away_b:.3f}, RMSE: {rmse_away_b:.3f}")

print("\nTotal points forecast (team-aware):")
print(f"  QEPC   MAE: {mae_total_q:.3f}, RMSE: {rmse_total_q:.3f}")
print(f"  Base   MAE: {mae_total_b:.3f}, RMSE: {rmse_total_b:.3f}")

print("\nImprovements (baseline - QEPC):")
print(f"  Home  MAE: {mae_home_b - mae_home_q:.3f}, RMSE: {rmse_home_b - rmse_home_q:.3f}")
print(f"  Away  MAE: {mae_away_b - mae_away_q:.3f}, RMSE: {rmse_away_b - rmse_away_q:.3f}")
print(f"  Total MAE: {mae_total_b - mae_total_q:.3f}, RMSE: {rmse_total_b - rmse_total_q:.3f}")


In [None]:
eval_team_df["HOME_IN_90"] = (
    (eval_team_df["ACTUAL_HOME"] >= eval_team_df["HOME_P05"])
    & (eval_team_df["ACTUAL_HOME"] <= eval_team_df["HOME_P95"])
)

eval_team_df["AWAY_IN_90"] = (
    (eval_team_df["ACTUAL_AWAY"] >= eval_team_df["AWAY_P05"])
    & (eval_team_df["ACTUAL_AWAY"] <= eval_team_df["AWAY_P95"])
)

eval_team_df["TOTAL_IN_90"] = (
    (eval_team_df["ACTUAL_TOTAL"] >= eval_team_df["TOTAL_P05"])
    & (eval_team_df["ACTUAL_TOTAL"] <= eval_team_df["TOTAL_P95"])
)

cov_home = eval_team_df["HOME_IN_90"].mean()
cov_away = eval_team_df["AWAY_IN_90"].mean()
cov_total = eval_team_df["TOTAL_IN_90"].mean()

print(f"Home  coverage [P05,P95]: {cov_home:.3f}")
print(f"Away  coverage [P05,P95]: {cov_away:.3f}")
print(f"Total coverage [P05,P95]: {cov_total:.3f}")
print("Target for a 90% interval: ~0.90")
