In [1]:
# ==========================================================
# CELL 1 – PROJECT ROOT + PYTHON PATH
# ==========================================================
from pathlib import Path
import sys

def find_project_root(marker="qepc_project") -> Path:
    here = Path.cwd().resolve()
    for p in [here] + list(here.parents):
        if p.name == marker:
            return p
    raise FileNotFoundError("Could not find qepc_project folder in path hierarchy.")

PROJECT_ROOT = find_project_root("qepc_project")
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)


PROJECT_ROOT: C:\Users\wdorsey\qepc_project


In [2]:
# ==========================================================
# CELL 2 – QEPC IMPORTS
# ==========================================================
import numpy as np
import pandas as pd

from qepc.nba.eoin_data_source import load_eoin_player_boxes
from qepc.nba.player_usage_eoin import (
    PlayerUsageConfig,
    build_player_usage_from_eoin,
    build_player_vs_opponent_splits,
    save_player_usage_to_cache,
    save_player_vs_opp_to_cache,
    load_player_usage_from_cache,
    load_player_vs_opp_from_cache,
)
from qepc.nba.matchups_eoin import build_matchups_for_date


In [3]:
# ==========================================================
# CELL 3 – BUILD (OR LOAD) USAGE + VS-OPP SPLITS
# ==========================================================
# Option A: rebuild from raw Eoin boxes
rebuild = True  # set to True if you want to force rebuild

if rebuild:
    print("Rebuilding usage and vs-opp splits from Eoin data...")
    config = PlayerUsageConfig(min_games=10, recent_window=5)
    usage = build_player_usage_from_eoin(config=config)
    splits = build_player_vs_opponent_splits(min_games_vs_opp=3)

    usage_path = save_player_usage_to_cache(usage)
    splits_path = save_player_vs_opp_to_cache(splits)
    print("Saved usage to:", usage_path)
    print("Saved vs-opp splits to:", splits_path)
else:
    print("Loading usage and vs-opp splits from cache...")
    usage = load_player_usage_from_cache()
    splits = load_player_vs_opp_from_cache()

print("usage shape:", usage.shape)
print("splits shape:", splits.shape)

display(usage.head(10))
display(splits.head(10))


Rebuilding usage and vs-opp splits from Eoin data...
Saved usage to: C:\Users\wdorsey\qepc_project\cache\imports\eoin_player_usage.parquet
Saved vs-opp splits to: C:\Users\wdorsey\qepc_project\cache\imports\eoin_player_vs_opp.parquet
usage shape: (11644, 13)
splits shape: (160954, 8)


Unnamed: 0,player_id,team_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share,pts_avg_lastN,reb_avg_lastN,ast_avg_lastN,player_name
0,76375,76ers,321,27.249221,24.423676,6.753894,0.227431,0.393292,0.276525,21.6,23.4,7.4,Wilt Chamberlain
1,947,76ers,878,25.143508,3.545558,5.497722,0.261029,0.083283,0.268436,10.2,3.2,3.2,Allen Iverson
2,203954,76ers,623,23.331723,9.37037,3.067633,0.207759,0.210974,0.123203,19.8,5.6,2.8,Joel Embiid
3,787,76ers,661,23.189107,11.723147,3.763994,0.215125,0.272842,0.160943,20.8,8.4,3.2,Charles Barkley
4,76681,76ers,977,21.957011,6.735926,3.898669,0.199639,0.148792,0.152498,18.2,6.4,3.4,Julius Erving
5,77449,76ers,388,21.180412,12.085052,1.293814,0.191943,0.268847,0.054506,5.0,2.6,0.6,Moses Malone
6,77532,76ers,266,20.793233,11.296992,3.966165,0.18878,0.232731,0.174042,14.6,8.0,2.8,George McGinnis
7,76487,76ers,693,20.760462,9.97114,3.881674,0.179304,0.185121,0.172598,11.4,6.0,5.6,Billy Cunningham
8,76882,76ers,820,20.330488,4.863415,4.278049,0.173349,0.085913,0.188418,9.4,4.2,2.4,Hal Greer
9,711,76ers,176,19.409091,3.857955,3.397727,0.199118,0.093137,0.16679,17.8,4.2,3.2,Jerry Stackhouse


Unnamed: 0,player_id,team_name,opp_team_name,games_vs_opp,pts_vs_opp,reb_vs_opp,ast_vs_opp,player_name
0,920,Lakers,Trail Blazers,67,10.0,7.328358,1.134328,A.C. Green
1,920,Lakers,Suns,66,9.515152,8.757576,1.19697,A.C. Green
2,920,Lakers,Warriors,57,9.649123,7.666667,0.859649,A.C. Green
3,920,Lakers,SuperSonics,55,11.727273,7.745455,1.018182,A.C. Green
4,920,Lakers,Mavericks,51,9.901961,7.058824,1.058824,A.C. Green
5,920,Lakers,Kings,49,10.285714,8.55102,1.020408,A.C. Green
6,920,Lakers,Clippers,47,11.446809,7.595745,1.170213,A.C. Green
7,920,Lakers,Jazz,46,7.586957,7.413043,0.934783,A.C. Green
8,920,Lakers,Rockets,45,10.511111,7.311111,0.933333,A.C. Green
9,920,Lakers,Spurs,45,10.133333,6.666667,1.044444,A.C. Green


In [4]:
# ==========================================================
# CELL 3B – RESTRICT USAGE TO MODERN / ACTIVE ERA
# ==========================================================
from qepc.nba.eoin_data_source import load_eoin_player_boxes

# Load player boxes so we can see when each player last played for that team
player_boxes_qepc = load_eoin_player_boxes().copy()
player_boxes_qepc["game_date"] = pd.to_datetime(player_boxes_qepc["game_date"])

last_dates = (
    player_boxes_qepc
    .groupby(["player_id", "team_name"], as_index=False)["game_date"]
    .max()
    .rename(columns={"game_date": "last_game_date"})
)

print("last_dates sample:")
display(last_dates.head())

# Merge last_game_date onto usage
usage = usage.merge(last_dates, on=["player_id", "team_name"], how="left")

print("usage with last_game_date shape:", usage.shape)
display(usage.head())


last_dates sample:


Unnamed: 0,player_id,team_name,last_game_date
0,2,Grizzlies,1996-04-22
1,2,Lakers,1997-05-13
2,2,Pacers,1995-06-05
3,3,Celtics,2003-05-13
4,3,Grizzlies,2002-04-18


usage with last_game_date shape: (11644, 14)


Unnamed: 0,player_id,team_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share,pts_avg_lastN,reb_avg_lastN,ast_avg_lastN,player_name,last_game_date
0,76375,76ers,321,27.249221,24.423676,6.753894,0.227431,0.393292,0.276525,21.6,23.4,7.4,Wilt Chamberlain,1968-04-19
1,947,76ers,878,25.143508,3.545558,5.497722,0.261029,0.083283,0.268436,10.2,3.2,3.2,Allen Iverson,2010-02-21
2,203954,76ers,623,23.331723,9.37037,3.067633,0.207759,0.210974,0.123203,19.8,5.6,2.8,Joel Embiid,2025-12-07
3,787,76ers,661,23.189107,11.723147,3.763994,0.215125,0.272842,0.160943,20.8,8.4,3.2,Charles Barkley,1992-04-19
4,76681,76ers,977,21.957011,6.735926,3.898669,0.199639,0.148792,0.152498,18.2,6.4,3.4,Julius Erving,1987-05-04


In [5]:
# ==========================================================
# CELL 3C – FILTER TO MODERN ERA (E.G. 2022+)
# ==========================================================
CUTOFF_DATE = pd.to_datetime("2022-10-01")  # you can tighten to 2023-10-01 or 2024-10-01 later

usage_modern = usage[usage["last_game_date"] >= CUTOFF_DATE].copy()

print("Original usage rows:", len(usage))
print("Modern usage rows:", len(usage_modern))
print("Example modern rows:")
display(
    usage_modern.sort_values(
        ["team_name", "avg_points"],
        ascending=[True, False]
    ).head(20)
)


Original usage rows: 11644
Modern usage rows: 1381
Example modern rows:


Unnamed: 0,player_id,team_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share,pts_avg_lastN,reb_avg_lastN,ast_avg_lastN,player_name,last_game_date
2,203954,76ers,623,23.331723,9.37037,3.067633,0.207759,0.210974,0.123203,19.8,5.6,2.8,Joel Embiid,2025-12-07
11,201935,76ers,113,18.946903,5.743363,9.256637,0.167174,0.139693,0.377776,7.8,4.2,5.2,James Harden,2023-10-16
16,1630178,76ers,438,18.226027,2.856164,4.059361,0.162406,0.067533,0.168305,29.6,5.4,5.6,Tyrese Maxey,2025-12-07
20,1629656,76ers,56,17.821429,4.214286,4.071429,0.155656,0.099439,0.163962,17.8,5.0,5.6,Quentin Grimes,2025-12-07
25,202699,76ers,472,16.580508,6.542373,2.959746,0.148065,0.149101,0.122762,10.8,7.8,1.2,Tobias Harris,2024-05-03
36,1642845,76ers,23,14.652174,5.521739,3.956522,0.126873,0.12235,0.159665,10.0,5.6,3.4,VJ Edgecombe,2025-12-07
38,202331,76ers,59,13.711864,4.525424,3.491525,0.124777,0.112611,0.147798,14.2,4.4,3.2,Paul George,2025-12-07
44,1626162,76ers,180,13.355556,4.694444,1.427778,0.120663,0.113494,0.059988,16.2,4.4,1.6,Kelly Oubre Jr.,2025-11-14
68,1642272,76ers,44,11.5,2.25,2.022727,0.109755,0.05396,0.091281,10.8,3.6,2.8,Jared McCain,2025-12-07
84,1627741,76ers,39,10.74359,2.769231,2.692308,0.099243,0.06319,0.109978,0.4,0.2,0.2,Buddy Hield,2024-05-03


In [6]:
# ==========================================================
# CELL 4 – BUILD MATCHUPS FOR A DATE
# ==========================================================
target_date = "2025-12-05"  # you used this before

matchups = build_matchups_for_date(target_date)
print("matchups shape:", matchups.shape)
display(matchups[[
    "game_id",
    "game_date",
    "home_team_name",
    "away_team_name",
    "exp_home_pts",
    "exp_away_pts",
]].head(10))


Built advanced strengths from Eoin team_stats:
      team_id  games_played   win_pct     off_ppg     def_ppg  \
0  1610612738           338  0.718935  116.982249  108.464497   
1  1610612760           326  0.687117  118.401840  110.766871   
2  1610612743           335  0.647761  116.128358  112.402985   
3  1610612739           312  0.608974  114.669872  110.083333   
4  1610612752           330  0.606061  114.078788  110.463636   
5  1610612750           328  0.591463  114.185976  110.371951   
6  1610612744           318  0.559748  115.814465  113.339623   
7  1610612749           305  0.577049  116.045902  114.504918   
8  1610612746           303  0.547855  113.079208  111.221122   
9  1610612747           321  0.545171  114.971963  115.112150   

   pts_diff_per_game  strength_score  strength_rank  
0           8.517751        1.188377              1  
1           7.634969        1.088435              2  
2           3.725373        0.815295              3  
3           4.586538 

Unnamed: 0,game_id,game_date,home_team_name,away_team_name,exp_home_pts,exp_away_pts
0,22500338,2025-12-05,Celtics,Lakers,116.528433,106.170305
1,22500339,2025-12-05,Magic,Heat,107.097078,104.98813
2,22500340,2025-12-05,Hawks,Nuggets,117.585299,120.746031
3,22500341,2025-12-05,Cavaliers,Spurs,120.086332,109.10343
4,22500342,2025-12-05,Pistons,Trail Blazers,115.719304,112.76079
5,22500343,2025-12-05,Knicks,Jazz,121.135996,107.646313
6,22500344,2025-12-05,Raptors,Hornets,113.301969,108.700988
7,22500345,2025-12-05,Bulls,Pacers,119.594198,118.944816
8,22500346,2025-12-05,Rockets,Suns,115.204974,113.197705
9,22500347,2025-12-05,Grizzlies,Clippers,113.122365,113.318871


In [7]:
# Filter to a specific game, e.g. Celtics vs Lakers on that date
mask = (
    (matchups["home_team_name"] == "Celtics") &
    (matchups["away_team_name"] == "Lakers")
)
game_row = matchups[mask].iloc[0]

home_team = game_row["home_team_name"]
away_team = game_row["away_team_name"]

print("Chosen game:")
print(game_row[[
    "game_id",
    "game_date",
    "home_team_name",
    "away_team_name",
    "exp_home_pts",
    "exp_away_pts",
]])


Chosen game:
game_id             22500338
game_date         2025-12-05
home_team_name       Celtics
away_team_name        Lakers
exp_home_pts      116.528433
exp_away_pts      106.170305
Name: 0, dtype: object


In [8]:
# ==========================================================
# CELL 5 – LAMBDA BLENDER (SEASON + RECENT + VS-OPP)
# ==========================================================
STAT_RECENCY_COL = {
    "points": "pts_avg_lastN",
    "rebounds": "reb_avg_lastN",
    "assists": "ast_avg_lastN",
}

STAT_VSOPP_COL = {
    "points": "pts_vs_opp",
    "rebounds": "reb_vs_opp",
    "assists": "ast_vs_opp",
}

def blend_stat_lambda(row: pd.Series, stat: str,
                      w_season: float = 0.6,
                      w_recent: float = 0.3,
                      w_vs_opp: float = 0.1) -> float:
    """
    Blend season avg, last-N avg, and vs-opponent avg into a single λ
    for the given stat ('points', 'rebounds', 'assists').
    """
    # Which columns to use
    rec_col = STAT_RECENCY_COL[stat]
    vs_col = STAT_VSOPP_COL[stat]

    # Season avg (always present by construction)
    season_val = row[f"avg_{stat}"]

    # Recency avg (may be NaN for players with fewer than N games)
    recent_val = row.get(rec_col, np.nan)

    # Vs-opponent avg (may be NaN if not enough games vs this opponent)
    vs_val = row.get(vs_col, np.nan)

    components = {
        "season": (season_val, w_season),
        "recent": (recent_val, w_recent),
        "vs_opp": (vs_val, w_vs_opp),
    }

    # Keep only components that exist (non-NaN)
    active = {name: (val, w) for name, (val, w) in components.items()
              if (val is not None) and not pd.isna(val)}

    if not active:
        return float("nan")

    total_w = sum(w for (_, w) in active.values())
    if total_w <= 0:
        return float("nan")

    lam = 0.0
    for name, (val, w) in active.items():
        lam += (w / total_w) * float(val)
    return lam


In [9]:
# ==========================================================
# CELL – ROSTER-AWARE LAMBDA BUILDER
# ==========================================================
import numpy as np
import pandas as pd

def get_game_rosters(game_row: pd.Series, player_boxes: pd.DataFrame):
    """
    For a given game_row from matchups, return:
      - home_player_ids
      - away_player_ids
    based on which players actually appeared in that game in player_boxes_qepc.
    """
    gid = game_row["game_id"]
    home_team = game_row["home_team_name"]
    away_team = game_row["away_team_name"]

    game_players = player_boxes[player_boxes["game_id"] == gid]

    home_ids = sorted(
        game_players.loc[game_players["team_name"] == home_team, "player_id"].unique()
    )
    away_ids = sorted(
        game_players.loc[game_players["team_name"] == away_team, "player_id"].unique()
    )

    print(f"Roster for {home_team}: {len(home_ids)} players")
    print("  home_ids:", home_ids)
    print(f"Roster for {away_team}: {len(away_ids)} players")
    print("  away_ids:", away_ids)

    return home_ids, away_ids


def build_player_lambdas_for_game(
    game_row: pd.Series,
    usage_modern: pd.DataFrame,
    splits: pd.DataFrame,
    player_boxes: pd.DataFrame,
):
    """
    Build per-player lambdas for this game:
      - only players on the actual game roster
      - only modern players (usage_modern)
      - blends season + recent + vs-opp into λ_points / λ_reb / λ_ast
    """
    home_team = game_row["home_team_name"]
    away_team = game_row["away_team_name"]

    # --- 1) restrict to actual game rosters ---
    home_ids, away_ids = get_game_rosters(game_row, player_boxes)

    # base usage (season + recency), but:
    # - modern era (usage_modern)
    # - AND on the actual roster for this game
    home_usage = usage_modern[
        (usage_modern["team_name"] == home_team) &
        (usage_modern["player_id"].isin(home_ids))
    ].copy()

    away_usage = usage_modern[
        (usage_modern["team_name"] == away_team) &
        (usage_modern["player_id"].isin(away_ids))
    ].copy()

    print(f"\nHome usage rows after roster filter: {len(home_usage)}")
    print(f"Away usage rows after roster filter: {len(away_usage)}")

    # --- 2) vs-opp splits for these players ---
    home_vs = splits[
        (splits["team_name"] == home_team) &
        (splits["opp_team_name"] == away_team)
    ][["player_id", "games_vs_opp", "pts_vs_opp", "reb_vs_opp", "ast_vs_opp"]]

    away_vs = splits[
        (splits["team_name"] == away_team) &
        (splits["opp_team_name"] == home_team)
    ][["player_id", "games_vs_opp", "pts_vs_opp", "reb_vs_opp", "ast_vs_opp"]]

    home = home_usage.merge(home_vs, on="player_id", how="left")
    away = away_usage.merge(away_vs, on="player_id", how="left")

    # --- 3) compute lambdas using your existing blender ---
    for df in (home, away):
        df["lambda_points"]   = df.apply(lambda r: blend_stat_lambda(r, "points"),   axis=1)
        df["lambda_rebounds"] = df.apply(lambda r: blend_stat_lambda(r, "rebounds"), axis=1)
        df["lambda_assists"]  = df.apply(lambda r: blend_stat_lambda(r, "assists"),  axis=1)

    return home, away


In [10]:
# ==========================================================
# CELL – BUILD LAMBDAS FOR THE CHOSEN GAME
# ==========================================================
home_lambdas, away_lambdas = build_player_lambdas_for_game(
    game_row=game_row,
    usage_modern=usage_modern,
    splits=splits,
    player_boxes=player_boxes_qepc,
)

print("Home team:", game_row["home_team_name"])
display(
    home_lambdas[[
        "player_name",
        "games_played",
        "avg_points", "pts_avg_lastN", "pts_vs_opp",
        "lambda_points",
    ]].sort_values("lambda_points", ascending=False).head(12)
)

print("Away team:", game_row["away_team_name"])
display(
    away_lambdas[[
        "player_name",
        "games_played",
        "avg_points", "pts_avg_lastN", "pts_vs_opp",
        "lambda_points",
    ]].sort_values("lambda_points", ascending=False).head(12)
)


Roster for Celtics: 14 players
  home_ids: [np.int64(1627759), np.int64(1628401), np.int64(1628449), np.int64(1629014), np.int64(1629674), np.int64(1630202), np.int64(1630214), np.int64(1630568), np.int64(1630573), np.int64(1631169), np.int64(1631248), np.int64(1641775), np.int64(1642864), np.int64(1642873)]
Roster for Lakers: 13 players
  away_ids: [np.int64(1628467), np.int64(1629020), np.int64(1629028), np.int64(1629060), np.int64(1629216), np.int64(1629637), np.int64(1630559), np.int64(1631166), np.int64(1631222), np.int64(1641733), np.int64(1642261), np.int64(1642355), np.int64(1642876)]

Home usage rows after roster filter: 14
Away usage rows after roster filter: 12
Home team: Celtics


Unnamed: 0,player_name,games_played,avg_points,pts_avg_lastN,pts_vs_opp,lambda_points
0,Jaylen Brown,824,18.491484,33.0,20.411765,23.036067
1,Derrick White,377,13.960212,22.8,11.857143,16.401842
2,Anfernee Simons,28,13.357143,11.8,,12.838095
3,Payton Pritchard,504,8.468254,19.2,6.9,11.530952
4,Josh Minott,28,7.642857,6.6,,7.295238
5,Sam Hauser,386,6.220207,9.0,5.714286,7.003553
7,Neemias Queta,197,3.93401,12.0,4.6,6.420406
11,Jordan Walsh,175,1.691429,12.2,4.25,5.099857
6,Luka Garza,27,5.592593,1.6,,4.261728
8,Hugo Gonzalez,28,3.107143,5.0,,3.738095


Away team: Lakers


Unnamed: 0,player_name,games_played,avg_points,pts_avg_lastN,pts_vs_opp,lambda_points
0,Austin Reaves,373,14.348525,28.0,15.25,18.534115
1,Deandre Ayton,29,12.965517,14.2,,13.377011
2,Rui Hachimura,235,12.038298,11.2,11.2,11.702979
4,Jake LaRavia,30,8.0,7.2,,7.733333
3,Dalton Knecht,119,8.10084,5.0,9.0,7.260504
6,Gabe Vincent,131,5.312977,7.0,10.666667,6.354453
5,Nick Smith Jr.,18,6.0,4.2,,5.4
7,Jaxson Hayes,189,4.740741,4.8,6.0,4.884444
8,Jarred Vanderbilt,166,3.915663,0.0,5.0,2.849398
9,Bronny James,91,1.505495,1.0,,1.336996
