In [None]:
from qepc.utils.paths import get_project_root
import sys

PROJECT_ROOT = get_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

print('PROJECT_ROOT:', PROJECT_ROOT)


In [None]:
# ==========================================================
# CELL 2 – QEPC IMPORTS
# ==========================================================
import numpy as np
import pandas as pd

from qepc.nba.eoin_data_source import load_eoin_player_boxes
from qepc.nba.player_usage_eoin import (
    PlayerUsageConfig,
    build_player_usage_from_eoin,
    build_player_vs_opponent_splits,
    save_player_usage_to_cache,
    save_player_vs_opp_to_cache,
    load_player_usage_from_cache,
    load_player_vs_opp_from_cache,
)
from qepc.nba.matchups_eoin import build_matchups_for_date


In [None]:
# ==========================================================
# CELL 3 – BUILD (OR LOAD) USAGE + VS-OPP SPLITS
# ==========================================================
# Option A: rebuild from raw Eoin boxes
rebuild = True  # set to True if you want to force rebuild

if rebuild:
    print("Rebuilding usage and vs-opp splits from Eoin data...")
    config = PlayerUsageConfig(min_games=10, recent_window=5)
    usage = build_player_usage_from_eoin(config=config)
    splits = build_player_vs_opponent_splits(min_games_vs_opp=3)

    usage_path = save_player_usage_to_cache(usage)
    splits_path = save_player_vs_opp_to_cache(splits)
    print("Saved usage to:", usage_path)
    print("Saved vs-opp splits to:", splits_path)
else:
    print("Loading usage and vs-opp splits from cache...")
    usage = load_player_usage_from_cache()
    splits = load_player_vs_opp_from_cache()

print("usage shape:", usage.shape)
print("splits shape:", splits.shape)

display(usage.head(10))
display(splits.head(10))


In [None]:
# ==========================================================
# CELL 3B – RESTRICT USAGE TO MODERN / ACTIVE ERA
# ==========================================================
from qepc.nba.eoin_data_source import load_eoin_player_boxes

# Load player boxes so we can see when each player last played for that team
player_boxes_qepc = load_eoin_player_boxes().copy()
player_boxes_qepc["game_date"] = pd.to_datetime(player_boxes_qepc["game_date"])

last_dates = (
    player_boxes_qepc
    .groupby(["player_id", "team_name"], as_index=False)["game_date"]
    .max()
    .rename(columns={"game_date": "last_game_date"})
)

print("last_dates sample:")
display(last_dates.head())

# Merge last_game_date onto usage
usage = usage.merge(last_dates, on=["player_id", "team_name"], how="left")

print("usage with last_game_date shape:", usage.shape)
display(usage.head())


In [None]:
# ==========================================================
# CELL 3C – FILTER TO MODERN ERA (E.G. 2022+)
# ==========================================================
CUTOFF_DATE = pd.to_datetime("2022-10-01")  # you can tighten to 2023-10-01 or 2024-10-01 later

usage_modern = usage[usage["last_game_date"] >= CUTOFF_DATE].copy()

print("Original usage rows:", len(usage))
print("Modern usage rows:", len(usage_modern))
print("Example modern rows:")
display(
    usage_modern.sort_values(
        ["team_name", "avg_points"],
        ascending=[True, False]
    ).head(20)
)


In [None]:
# ==========================================================
# CELL 4 – BUILD MATCHUPS FOR A DATE
# ==========================================================
target_date = "2025-12-05"  # you used this before

matchups = build_matchups_for_date(target_date)
print("matchups shape:", matchups.shape)
display(matchups[[
    "game_id",
    "game_date",
    "home_team_name",
    "away_team_name",
    "exp_home_pts",
    "exp_away_pts",
]].head(10))


In [None]:
# Filter to a specific game, e.g. Celtics vs Lakers on that date
mask = (
    (matchups["home_team_name"] == "Celtics") &
    (matchups["away_team_name"] == "Lakers")
)
game_row = matchups[mask].iloc[0]

home_team = game_row["home_team_name"]
away_team = game_row["away_team_name"]

print("Chosen game:")
print(game_row[[
    "game_id",
    "game_date",
    "home_team_name",
    "away_team_name",
    "exp_home_pts",
    "exp_away_pts",
]])


In [None]:
# ==========================================================
# CELL 5 – LAMBDA BLENDER (SEASON + RECENT + VS-OPP)
# ==========================================================
STAT_RECENCY_COL = {
    "points": "pts_avg_lastN",
    "rebounds": "reb_avg_lastN",
    "assists": "ast_avg_lastN",
}

STAT_VSOPP_COL = {
    "points": "pts_vs_opp",
    "rebounds": "reb_vs_opp",
    "assists": "ast_vs_opp",
}

def blend_stat_lambda(row: pd.Series, stat: str,
                      w_season: float = 0.6,
                      w_recent: float = 0.3,
                      w_vs_opp: float = 0.1) -> float:
    """
    Blend season avg, last-N avg, and vs-opponent avg into a single λ
    for the given stat ('points', 'rebounds', 'assists').
    """
    # Which columns to use
    rec_col = STAT_RECENCY_COL[stat]
    vs_col = STAT_VSOPP_COL[stat]

    # Season avg (always present by construction)
    season_val = row[f"avg_{stat}"]

    # Recency avg (may be NaN for players with fewer than N games)
    recent_val = row.get(rec_col, np.nan)

    # Vs-opponent avg (may be NaN if not enough games vs this opponent)
    vs_val = row.get(vs_col, np.nan)

    components = {
        "season": (season_val, w_season),
        "recent": (recent_val, w_recent),
        "vs_opp": (vs_val, w_vs_opp),
    }

    # Keep only components that exist (non-NaN)
    active = {name: (val, w) for name, (val, w) in components.items()
              if (val is not None) and not pd.isna(val)}

    if not active:
        return float("nan")

    total_w = sum(w for (_, w) in active.values())
    if total_w <= 0:
        return float("nan")

    lam = 0.0
    for name, (val, w) in active.items():
        lam += (w / total_w) * float(val)
    return lam


In [None]:
# ==========================================================
# CELL – ROSTER-AWARE LAMBDA BUILDER
# ==========================================================
import numpy as np
import pandas as pd

def get_game_rosters(game_row: pd.Series, player_boxes: pd.DataFrame):
    """
    For a given game_row from matchups, return:
      - home_player_ids
      - away_player_ids
    based on which players actually appeared in that game in player_boxes_qepc.
    """
    gid = game_row["game_id"]
    home_team = game_row["home_team_name"]
    away_team = game_row["away_team_name"]

    game_players = player_boxes[player_boxes["game_id"] == gid]

    home_ids = sorted(
        game_players.loc[game_players["team_name"] == home_team, "player_id"].unique()
    )
    away_ids = sorted(
        game_players.loc[game_players["team_name"] == away_team, "player_id"].unique()
    )

    print(f"Roster for {home_team}: {len(home_ids)} players")
    print("  home_ids:", home_ids)
    print(f"Roster for {away_team}: {len(away_ids)} players")
    print("  away_ids:", away_ids)

    return home_ids, away_ids


def build_player_lambdas_for_game(
    game_row: pd.Series,
    usage_modern: pd.DataFrame,
    splits: pd.DataFrame,
    player_boxes: pd.DataFrame,
):
    """
    Build per-player lambdas for this game:
      - only players on the actual game roster
      - only modern players (usage_modern)
      - blends season + recent + vs-opp into λ_points / λ_reb / λ_ast
    """
    home_team = game_row["home_team_name"]
    away_team = game_row["away_team_name"]

    # --- 1) restrict to actual game rosters ---
    home_ids, away_ids = get_game_rosters(game_row, player_boxes)

    # base usage (season + recency), but:
    # - modern era (usage_modern)
    # - AND on the actual roster for this game
    home_usage = usage_modern[
        (usage_modern["team_name"] == home_team) &
        (usage_modern["player_id"].isin(home_ids))
    ].copy()

    away_usage = usage_modern[
        (usage_modern["team_name"] == away_team) &
        (usage_modern["player_id"].isin(away_ids))
    ].copy()

    print(f"\nHome usage rows after roster filter: {len(home_usage)}")
    print(f"Away usage rows after roster filter: {len(away_usage)}")

    # --- 2) vs-opp splits for these players ---
    home_vs = splits[
        (splits["team_name"] == home_team) &
        (splits["opp_team_name"] == away_team)
    ][["player_id", "games_vs_opp", "pts_vs_opp", "reb_vs_opp", "ast_vs_opp"]]

    away_vs = splits[
        (splits["team_name"] == away_team) &
        (splits["opp_team_name"] == home_team)
    ][["player_id", "games_vs_opp", "pts_vs_opp", "reb_vs_opp", "ast_vs_opp"]]

    home = home_usage.merge(home_vs, on="player_id", how="left")
    away = away_usage.merge(away_vs, on="player_id", how="left")

    # --- 3) compute lambdas using your existing blender ---
    for df in (home, away):
        df["lambda_points"]   = df.apply(lambda r: blend_stat_lambda(r, "points"),   axis=1)
        df["lambda_rebounds"] = df.apply(lambda r: blend_stat_lambda(r, "rebounds"), axis=1)
        df["lambda_assists"]  = df.apply(lambda r: blend_stat_lambda(r, "assists"),  axis=1)

    return home, away


In [None]:
# ==========================================================
# CELL – BUILD LAMBDAS FOR THE CHOSEN GAME
# ==========================================================
home_lambdas, away_lambdas = build_player_lambdas_for_game(
    game_row=game_row,
    usage_modern=usage_modern,
    splits=splits,
    player_boxes=player_boxes_qepc,
)

print("Home team:", game_row["home_team_name"])
display(
    home_lambdas[[
        "player_name",
        "games_played",
        "avg_points", "pts_avg_lastN", "pts_vs_opp",
        "lambda_points",
    ]].sort_values("lambda_points", ascending=False).head(12)
)

print("Away team:", game_row["away_team_name"])
display(
    away_lambdas[[
        "player_name",
        "games_played",
        "avg_points", "pts_avg_lastN", "pts_vs_opp",
        "lambda_points",
    ]].sort_values("lambda_points", ascending=False).head(12)
)
