In [1]:
# ==========================================
# CELL 1 – QEPC PATH SETUP
# ==========================================
import sys
from pathlib import Path

NOTEBOOK_DIR = Path.cwd()

PROJECT_ROOT = None
for parent in [NOTEBOOK_DIR] + list(NOTEBOOK_DIR.parents):
    if (parent / "qepc").is_dir():
        PROJECT_ROOT = parent
        break

if PROJECT_ROOT is None:
    PROJECT_ROOT = Path(r"C:\Users\wdors\qepc_project").resolve()

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("PROJECT_ROOT:", PROJECT_ROOT)
print("qepc exists?:", (PROJECT_ROOT / "qepc").is_dir())

DATA_DIR = PROJECT_ROOT / "data"
CACHE_DIR = PROJECT_ROOT / "cache"

print("DATA_DIR:", DATA_DIR)
print("CACHE_DIR:", CACHE_DIR)


NOTEBOOK_DIR: C:\Users\wdorsey\qepc_project\notebooks\nba
PROJECT_ROOT: C:\Users\wdorsey\qepc_project
qepc exists?: True
DATA_DIR: C:\Users\wdorsey\qepc_project\data
CACHE_DIR: C:\Users\wdorsey\qepc_project\cache


In [2]:
# ==========================================
# CELL 2 – LOAD GAMES + MATCHUP ENGINE
# ==========================================
import pandas as pd

from qepc.nba.eoin_data_source import load_eoin_games
from qepc.nba.matchups_eoin import build_matchups_for_date

games_qepc = load_eoin_games()

print("games_qepc shape:", games_qepc.shape)
print(games_qepc[["game_id", "game_date", "home_team_id", "away_team_id", "home_score", "away_score"]].head())


games_qepc shape: (72290, 19)
    game_id   game_date  home_team_id  away_team_id  home_score  away_score
0  22500349  2025-12-05    1610612760    1610612742         132         111
1  22500345  2025-12-05    1610612741    1610612754         105         120
2  22500347  2025-12-05    1610612763    1610612746         107          98
3  22500348  2025-12-05    1610612749    1610612755         101         116
4  22500346  2025-12-05    1610612745    1610612756         117          98


In [3]:
# ==========================================
# CELL 2B – LOAD TEAM BOXES + TEAM STRENGTHS
# ==========================================
from qepc.nba.eoin_data_source import load_eoin_team_boxes
from qepc.nba.eoin_team_stats import build_team_stats_from_eoin
from qepc.nba.team_strengths_eoin import calculate_advanced_strengths_from_eoin

team_boxes_qepc = load_eoin_team_boxes()
print("team_boxes_qepc shape:", team_boxes_qepc.shape)

# Build aggregate team stats from Eoin
team_stats = build_team_stats_from_eoin(team_boxes_qepc)
print("team_stats shape:", team_stats.shape)
display(team_stats.head())

# Build advanced strengths (off_ppg, def_ppg, strength_score, etc.)
strengths_df = calculate_advanced_strengths_from_eoin(team_stats)
print("strengths_df shape:", strengths_df.shape)
display(strengths_df.head())

# Index by team_id for quick lookup
strengths_idx = strengths_df.set_index("team_id")


team_boxes_qepc shape: (144580, 49)
team_stats shape: (34, 14)


Unnamed: 0,team_id,games_played,wins,losses,win_pct,pts_for,pts_against,pts_diff,off_ppg,def_ppg,reb_total,reb_pg,ast_total,ast_pg
0,15016,1,0,1,0.0,97,107,-10,97.0,107.0,48.0,48.0,23.0,23.0
1,15018,3,0,3,0.0,257,395,-138,85.666667,131.666667,80.0,26.666667,49.0,16.333333
2,50013,1,0,1,0.0,92,127,-35,92.0,127.0,42.0,42.0,24.0,24.0
3,50014,1,0,1,0.0,88,123,-35,88.0,123.0,38.0,38.0,18.0,18.0
4,1610612737,6462,3163,3299,0.489477,668300,671894,-3594,103.419994,103.976168,174313.0,26.975085,87465.0,13.535283


Built advanced strengths from Eoin team_stats:
      team_id  games_played   win_pct     off_ppg     def_ppg  \
0  1610612738          6883  0.596833  106.253523  103.205434   
1  1610612747          6898  0.585242  106.762540  104.581328   
2  1610612759          4492  0.585485  104.987311  102.372663   
3  1610612756          5074  0.532913  107.760347  106.615491   
4  1610612760          5192  0.539676  105.905817  104.664291   
5  1610612749          5075  0.521773  105.209064  104.075862   
6  1610612743          4360  0.505963  108.001376  108.068349   
7  1610612745          5160  0.518411  105.497093  104.875000   
8  1610612762          4552  0.527900  103.588752  102.658172   
9  1610612757          4865  0.515313  105.058582  104.502980   

   pts_diff_per_game  strength_score  strength_rank  
0           3.048089        0.779025              1  
1           2.181212        0.728369              2  
2           2.614648        0.700651              3  
3           1.144856 

Unnamed: 0,team_id,games_played,wins,losses,win_pct,pts_for,pts_against,pts_diff,off_ppg,def_ppg,...,reb_pg,ast_total,ast_pg,pts_diff_per_game,z_win_pct,z_off_ppg,z_def_ppg,z_pts_diff_pg,strength_score,strength_rank
0,1610612738,6883,4108,2775,0.596833,731343,710363,20980,106.253523,103.205434,...,26.04315,98151.0,14.259916,3.048089,0.971514,0.818578,0.412303,0.618246,0.779025,1
1,1610612747,6898,4037,2861,0.585242,736448,721402,15046,106.76254,104.581328,...,26.703392,100839.0,14.618585,2.181212,0.901392,0.921677,0.213312,0.540485,0.728369,2
2,1610612759,4492,2630,1862,0.585485,471603,459858,11745,104.987311,102.372663,...,38.395815,94752.0,21.0935,2.614648,0.902864,0.56211,0.532743,0.579365,0.700651,3
3,1610612756,5074,2704,2370,0.532913,546776,540967,5809,107.760347,106.615491,...,32.803114,95952.0,18.910524,1.144856,0.584811,1.12378,-0.080881,0.447522,0.584849,4
4,1610612760,5192,2802,2390,0.539676,549863,543417,6446,105.905817,104.664291,...,32.47265,89262.0,17.192219,1.241525,0.625729,0.748151,0.201313,0.456194,0.556911,5


In [4]:
# ==========================================
# CELL 2C – MODERN-ERA TEAM STRENGTHS (2022+)
# ==========================================
import datetime as dt
import pandas as pd

from qepc.nba.eoin_team_stats import build_team_stats_from_eoin
from qepc.nba.team_strengths_eoin import calculate_advanced_strengths_from_eoin

# Use only recent seasons for strengths. You can change this to 2023-10-01 to test.
modern_cutoff = dt.date(2022, 10, 1)

if "game_date" not in team_boxes_qepc.columns:
    raise ValueError("team_boxes_qepc is missing 'game_date' column.")

if not pd.api.types.is_datetime64_any_dtype(team_boxes_qepc["game_date"]):
    team_boxes_qepc["game_date"] = pd.to_datetime(team_boxes_qepc["game_date"]).dt.date

team_boxes_modern = team_boxes_qepc[team_boxes_qepc["game_date"] >= modern_cutoff].copy()

print("Modern-era team boxes:", len(team_boxes_modern))
print("Modern date range:",
      team_boxes_modern["game_date"].min(),
      "→",
      team_boxes_modern["game_date"].max())

# Build team stats and strengths from the modern window only
team_stats_modern = build_team_stats_from_eoin(team_boxes_modern)
print("team_stats_modern shape:", team_stats_modern.shape)

strengths_modern = calculate_advanced_strengths_from_eoin(team_stats_modern)
print("strengths_modern shape:", strengths_modern.shape)

# Index we’ll use for predictions
strengths_idx = strengths_modern.set_index("team_id")
display(strengths_modern.head())


Modern-era team boxes: 9124
Modern date range: 2022-10-01 → 2025-12-05
team_stats_modern shape: (34, 14)
Built advanced strengths from Eoin team_stats:
      team_id  games_played   win_pct     off_ppg     def_ppg  \
0  1610612738           337  0.718101  116.970326  108.451039   
1  1610612760           324  0.685185  118.302469  110.864198   
2  1610612743           334  0.646707  116.131737  112.422156   
3  1610612739           311  0.610932  114.736334  110.118971   
4  1610612752           328  0.603659  114.094512  110.524390   
5  1610612750           326  0.592025  114.230061  110.392638   
6  1610612749           304  0.578947  116.059211  114.473684   
7  1610612744           316  0.556962  115.844937  113.471519   
8  1610612746           302  0.549669  113.102649  111.228477   
9  1610612756           301  0.531561  114.445183  113.737542   

   pts_diff_per_game  strength_score  strength_rank  
0           8.519288        1.187087              1  
1           7.438272    

Unnamed: 0,team_id,games_played,wins,losses,win_pct,pts_for,pts_against,pts_diff,off_ppg,def_ppg,...,reb_pg,ast_total,ast_pg,pts_diff_per_game,z_win_pct,z_off_ppg,z_def_ppg,z_pts_diff_pg,strength_score,strength_rank
0,1610612738,337,242,95,0.718101,39419,36548,2871,116.970326,108.451039,...,45.353116,8753.0,25.973294,8.519288,1.481249,0.736477,1.291717,1.060402,1.187087,1
1,1610612760,324,222,102,0.685185,38330,35920,2410,118.302469,110.864198,...,43.604938,8349.0,25.768519,7.438272,1.307775,0.903596,0.811022,0.967623,1.075218,2
2,1610612743,334,216,118,0.646707,38788,37549,1239,116.131737,112.422156,...,44.383234,9635.0,28.847305,3.709581,1.104983,0.631275,0.50068,0.647603,0.812597,3
3,1610612739,311,190,121,0.610932,35683,34247,1436,114.736334,110.118971,...,43.122186,8269.0,26.588424,4.617363,0.916444,0.45622,0.959469,0.725515,0.771423,4
4,1610612752,328,198,130,0.603659,37423,36252,1171,114.094512,110.52439,...,44.777439,8014.0,24.432927,3.570122,0.878108,0.375702,0.878711,0.635634,0.704945,5


In [5]:
# ==========================================
# CELL 2D – PACE & OFF/DEF RATING FROM EOIN
# ==========================================
import pandas as pd
import numpy as np

# Make sure we have the modern subset
# (re-run Cell 2C first if needed so team_boxes_modern exists)
tb = team_boxes_modern.copy()

# Dean Oliver-style possessions estimate:
# poss ≈ FGA + 0.44 * FTA - ORB + TOV
tb["fga"] = tb["fieldgoalsattempted"].fillna(0)
tb["fta"] = tb["freethrowsattempted"].fillna(0)
tb["orb"] = tb["reboundsoffensive"].fillna(0)
tb["tov"] = tb["turnovers"].fillna(0)

tb["possessions"] = tb["fga"] + 0.44 * tb["fta"] - tb["orb"] + tb["tov"]

# Points for/against
tb["points_for"] = tb["teamscore"].fillna(0)
tb["points_against"] = tb["opponentscore"].fillna(0)

# Aggregate by team_id
grouped = tb.groupby("team_id")

pace_stats = grouped.agg(
    games_played=("game_id", "nunique"),
    total_possessions=("possessions", "sum"),
    total_pts_for=("points_for", "sum"),
    total_pts_against=("points_against", "sum"),
).reset_index()

# Per-game pace
pace_stats["pace_per_game"] = (
    pace_stats["total_possessions"] / pace_stats["games_played"]
)

# Offensive & defensive rating (points per 100 possessions)
pace_stats["off_rating"] = (
    100.0 * pace_stats["total_pts_for"] / pace_stats["total_possessions"]
)
pace_stats["def_rating"] = (
    100.0 * pace_stats["total_pts_against"] / pace_stats["total_possessions"]
)

# Simple sanity print
print("pace_stats shape:", pace_stats.shape)
print("League avg pace:", pace_stats["pace_per_game"].mean())
print("League avg off_rating:", pace_stats["off_rating"].mean())
print("League avg def_rating:", pace_stats["def_rating"].mean())

display(pace_stats.head())

# Index for fast lookup
pace_idx = pace_stats.set_index("team_id")


pace_stats shape: (34, 8)
League avg pace: 101.49685621366764
League avg off_rating: 109.42139567713916
League avg def_rating: 113.29943121600735


Unnamed: 0,team_id,games_played,total_possessions,total_pts_for,total_pts_against,pace_per_game,off_rating,def_rating
0,15016,1,103.8,97,107,103.8,93.44894,103.082852
1,15018,3,296.32,257,395,98.773333,86.730562,133.301836
2,50013,1,101.8,92,127,101.8,90.373281,124.75442
3,50014,1,93.28,88,123,93.28,94.339623,131.861063
4,1610612737,295,30754.2,34741,35077,104.251525,112.963433,114.055966


In [6]:
# ==========================================
# CELL 3 – BUILD BACKTEST POOL (24–25+ GAMES)
# ==========================================
import datetime as dt
import pandas as pd

# Make sure game_date is a date
if "game_date" not in games_qepc.columns:
    raise ValueError("games_qepc is missing 'game_date' column.")

if not pd.api.types.is_datetime64_any_dtype(games_qepc["game_date"]):
    games_qepc["game_date"] = pd.to_datetime(games_qepc["game_date"]).dt.date

# Only completed games with scores, from the current era we care about
min_date = dt.date(2024, 10, 4)  # this is what gave you ~1795 games before

completed_mask = games_qepc["home_score"].notna() & games_qepc["away_score"].notna()
date_mask = games_qepc["game_date"] >= min_date

backtest_pool = games_qepc[completed_mask & date_mask].copy()
backtest_pool.sort_values("game_date", inplace=True)

print("Backtest pool games:", len(backtest_pool))
print("Date range:", backtest_pool["game_date"].min(), "→", backtest_pool["game_date"].max())

display(backtest_pool.head())


Backtest pool games: 1795
Date range: 2024-10-04 → 2025-12-05


Unnamed: 0,game_id,game_datetime,home_team_city,home_team_name,home_team_id,away_team_city,away_team_name,away_team_id,home_score,away_score,winner_team_id,gametype,attendance,arenaid,gamelabel,gamesublabel,seriesgamenumber,game_date,is_final
1795,12400001,2024-10-04 12:00:00+00:00,Denver,Nuggets,1610612743,Boston,Celtics,1610612738,103,107,1610612738,Preseason,12002.0,1000085.0,Preseason,,0.0,2024-10-04,True
1794,12400003,2024-10-04 22:30:00+00:00,Los Angeles,Lakers,1610612747,Minnesota,Timberwolves,1610612750,107,124,1610612750,Preseason,9235.0,1000120.0,Preseason,,0.0,2024-10-04,True
1793,12400004,2024-10-05 19:00:00+00:00,Los Angeles,Clippers,1610612746,Golden State,Warriors,1610612744,90,91,1610612744,Preseason,10300.0,173.0,Preseason,,0.0,2024-10-05,True
1792,12400005,2024-10-06 10:00:00+00:00,Boston,Celtics,1610612738,Denver,Nuggets,1610612743,130,104,1610612738,Preseason,11527.0,1000085.0,Preseason,,0.0,2024-10-06,True
1791,12400006,2024-10-06 17:00:00+00:00,Charlotte,Hornets,1610612766,New York,Knicks,1610612752,109,111,1610612752,Preseason,10486.0,9.0,Preseason,,0.0,2024-10-06,True


In [7]:
# ==========================================
# CELL 3A – BUILD TEAM-GAME REST / B2B INFO
# ==========================================
import pandas as pd

# Ensure games_qepc has proper dates
if not pd.api.types.is_datetime64_any_dtype(games_qepc["game_date"]):
    games_qepc["game_date"] = pd.to_datetime(games_qepc["game_date"]).dt.date

rows = []
for _, row in games_qepc.iterrows():
    gid = row["game_id"]
    gdate = row["game_date"]

    # Home team
    rows.append({
        "game_id": gid,
        "team_id": row["home_team_id"],
        "is_home": True,
        "game_date": gdate,
    })
    # Away team
    rows.append({
        "game_id": gid,
        "team_id": row["away_team_id"],
        "is_home": False,
        "game_date": gdate,
    })

team_games = pd.DataFrame(rows)
team_games.sort_values(["team_id", "game_date"], inplace=True)

team_games["prev_date"] = team_games.groupby("team_id")["game_date"].shift(1)

team_games["days_since_prev"] = (
    pd.to_datetime(team_games["game_date"])
    - pd.to_datetime(team_games["prev_date"])
).dt.days

team_games["days_since_prev"] = team_games["days_since_prev"].fillna(999)
team_games["is_b2b"] = team_games["days_since_prev"] == 1

print("Total team-game rows:", len(team_games))
print("Total B2B appearances:", int(team_games["is_b2b"].sum()))

rest_idx = team_games.set_index(["game_id", "team_id"])
rest_idx.head()


Total team-game rows: 144580
Total B2B appearances: 36332


Unnamed: 0_level_0,Unnamed: 1_level_0,is_home,game_date,prev_date,days_since_prev,is_b2b
game_id,team_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12500009,15016,False,2025-10-03,,999.0,False
12500032,15018,False,2025-10-06,,999.0,False
12500043,15018,False,2025-10-09,2025-10-06,3.0,False
12500055,15018,False,2025-10-13,2025-10-09,4.0,False
12500011,50013,False,2025-10-04,,999.0,False


In [8]:
# ==========================================
# CELL 4 – BACKTEST WITH STRENGTHS + HOME + B2B
# ==========================================
import numpy as np

# Tunable knobs
HOME_BONUS = 1.5     # home bump
AWAY_PENALTY = 0.5   # away slight downgrade
B2B_PENALTY = 1.5    # penalty if playing on a back-to-back


def predict_team_points_with_schedule(row, strengths_index, rest_index,
                                      home_bonus=HOME_BONUS,
                                      away_penalty=AWAY_PENALTY,
                                      b2b_penalty=B2B_PENALTY):
    """
    Predict raw team points using:
      - team off_ppg / def_ppg from strengths_index
      - symmetric blend (home_off + away_def, etc.)
      - home-court advantage
      - B2B schedule penalty
    """
    home_id = row["home_team_id"]
    away_id = row["away_team_id"]

    if home_id not in strengths_index.index or away_id not in strengths_index.index:
        return np.nan, np.nan

    home = strengths_index.loc[home_id]
    away = strengths_index.loc[away_id]

    home_off = home["off_ppg"]
    home_def = home["def_ppg"]
    away_off = away["off_ppg"]
    away_def = away["def_ppg"]

    # Base symmetric model
    home_raw = (home_off + away_def) / 2.0
    away_raw = (away_off + home_def) / 2.0

    # Home-court tweaks
    home_raw += home_bonus
    away_raw -= away_penalty

    # Back-to-back tweaks
    key_home = (row["game_id"], home_id)
    key_away = (row["game_id"], away_id)

    if key_home in rest_index.index and bool(rest_index.loc[key_home, "is_b2b"]):
        home_raw -= b2b_penalty

    if key_away in rest_index.index and bool(rest_index.loc[key_away, "is_b2b"]):
        away_raw -= b2b_penalty

    return home_raw, away_raw


# Run predictions for the whole backtest pool
sample_games = backtest_pool.copy()
print("Sampled games (all):", len(sample_games))

results = []

for _, g in sample_games.iterrows():
    exp_home, exp_away = predict_team_points_with_schedule(g, strengths_idx, rest_idx)
    if np.isnan(exp_home) or np.isnan(exp_away):
        continue

    results.append({
        "game_id": g["game_id"],
        "game_date": g["game_date"],
        "home_team_id": g["home_team_id"],
        "away_team_id": g["away_team_id"],
        "pred_home_pts": exp_home,
        "pred_away_pts": exp_away,
        "actual_home_pts": float(g["home_score"]),
        "actual_away_pts": float(g["away_score"]),
    })

print("Total matched game predictions:", len(results))


Sampled games (all): 1795
Total matched game predictions: 1795


In [9]:
# ==========================================
# CELL 5 – RAW ERROR METRICS
# ==========================================
import numpy as np
import pandas as pd

results_df = pd.DataFrame(results)
print("Backtest rows:", len(results_df))
display(results_df.head())

# Absolute errors
results_df["home_abs_err"] = (results_df["actual_home_pts"] - results_df["pred_home_pts"]).abs()
results_df["away_abs_err"] = (results_df["actual_away_pts"] - results_df["pred_away_pts"]).abs()

# Squared errors
results_df["home_sq_err"] = (results_df["actual_home_pts"] - results_df["pred_home_pts"])**2
results_df["away_sq_err"] = (results_df["actual_away_pts"] - results_df["pred_away_pts"])**2

home_mae = results_df["home_abs_err"].mean()
away_mae = results_df["away_abs_err"].mean()

home_rmse = np.sqrt(results_df["home_sq_err"].mean())
away_rmse = np.sqrt(results_df["away_sq_err"].mean())

results_df["home_err"] = results_df["actual_home_pts"] - results_df["pred_home_pts"]
results_df["away_err"] = results_df["actual_away_pts"] - results_df["pred_away_pts"]

home_bias = results_df["home_err"].mean()
away_bias = results_df["away_err"].mean()

print(f"Home MAE:  {home_mae:.2f} points")
print(f"Away MAE:  {away_mae:.2f} points")
print(f"Home RMSE: {home_rmse:.2f} points")
print(f"Away RMSE: {away_rmse:.2f} points")
print("\nBias (mean error, actual - predicted):")
print(f"Home bias: {home_bias:+.2f}")
print(f"Away bias: {away_bias:+.2f}")


Backtest rows: 1795


Unnamed: 0,game_id,game_date,home_team_id,away_team_id,pred_home_pts,pred_away_pts,actual_home_pts,actual_away_pts
0,12400001,2024-10-04,1610612743,1610612738,113.791388,114.196241,103.0,107.0
1,12400003,2024-10-04,1610612747,1610612750,114.180645,114.155783,107.0,124.0
2,12400004,2024-10-05,1610612746,1610612744,114.787084,113.036707,90.0,91.0
3,12400005,2024-10-06,1610612738,1610612743,116.196241,111.791388,130.0,104.0
4,12400006,2024-10-06,1610612766,1610612752,110.91779,114.662641,109.0,111.0


Home MAE:  9.75 points
Away MAE:  9.92 points
Home RMSE: 12.69 points
Away RMSE: 12.86 points

Bias (mean error, actual - predicted):
Home bias: -0.28
Away bias: -0.15


In [10]:
# ==========================================
# CELL 6 – LINEAR CALIBRATION OF PREDICTIONS
# ==========================================
import numpy as np

# Fit y = m*x + b for home and away separately
home_m, home_b = np.polyfit(results_df["pred_home_pts"], results_df["actual_home_pts"], 1)
away_m, away_b = np.polyfit(results_df["pred_away_pts"], results_df["actual_away_pts"], 1)

print("Home calibration: actual_home ≈ "
      f"{home_m:.3f} * pred_home + {home_b:.3f}")
print("Away calibration: actual_away ≈ "
      f"{away_m:.3f} * pred_away + {away_b:.3f}")

# Apply calibrated predictions
results_df["cal_pred_home_pts"] = home_m * results_df["pred_home_pts"] + home_b
results_df["cal_pred_away_pts"] = away_m * results_df["pred_away_pts"] + away_b

# Recompute errors using calibrated predictions
results_df["cal_home_abs_err"] = (results_df["actual_home_pts"] - results_df["cal_pred_home_pts"]).abs()
results_df["cal_away_abs_err"] = (results_df["actual_away_pts"] - results_df["cal_pred_away_pts"]).abs()

results_df["cal_home_sq_err"] = (results_df["actual_home_pts"] - results_df["cal_pred_home_pts"])**2
results_df["cal_away_sq_err"] = (results_df["actual_away_pts"] - results_df["cal_pred_away_pts"])**2

cal_home_mae = results_df["cal_home_abs_err"].mean()
cal_away_mae = results_df["cal_away_abs_err"].mean()
cal_home_rmse = np.sqrt(results_df["cal_home_sq_err"].mean())
cal_away_rmse = np.sqrt(results_df["cal_away_sq_err"].mean())

# Bias after calibration
results_df["cal_home_err"] = results_df["actual_home_pts"] - results_df["cal_pred_home_pts"]
results_df["cal_away_err"] = results_df["actual_away_pts"] - results_df["cal_pred_away_pts"]

cal_home_bias = results_df["cal_home_err"].mean()
cal_away_bias = results_df["cal_away_err"].mean()

print("\n--- After linear calibration ---")
print(f"Calibrated Home MAE:  {cal_home_mae:.2f} points")
print(f"Calibrated Away MAE:  {cal_away_mae:.2f} points")
print(f"Calibrated Home RMSE: {cal_home_rmse:.2f} points")
print(f"Calibrated Away RMSE: {cal_away_rmse:.2f} points")
print(f"Calibrated Home bias: {cal_home_bias:+.2f}")
print(f"Calibrated Away bias: {cal_away_bias:+.2f}")


Home calibration: actual_home ≈ 1.916 * pred_home + -105.818
Away calibration: actual_away ≈ 1.997 * pred_away + -112.937

--- After linear calibration ---
Calibrated Home MAE:  9.58 points
Calibrated Away MAE:  9.72 points
Calibrated Home RMSE: 12.54 points
Calibrated Away RMSE: 12.67 points
Calibrated Home bias: -0.00
Calibrated Away bias: -0.00
