# QEPC ‚Äì Player Lambdas (Recency + vs Opponent) ‚Äì Upgraded

This notebook builds **roster-aware** player-level Œª (lambda) projections for **points, rebounds, assists** using:

- Only games **before** the target game date (no data leakage).
- A **lookback window** (default 2 seasons) to avoid ancient-history noise.
- **Roster detection** (from game_id if available; otherwise inferred from recent games).
- A **share model** (team-total √ó player share) with recency + vs-opp adjustments (shrunk to prevent tiny-sample madness).
- Optional fallback logic for players with little/no history.

Numbered cells below are designed to be copy/paste safe.


In [2]:
# ==========================================================
# CELL 1 ‚Äì PROJECT ROOT AUTO-DETECT + IMPORTS
# ==========================================================
from __future__ import annotations

import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd

def find_project_root(start: Path | None = None, package_name: str = "qepc") -> Path | None:
    r"""
    Find the QEPC project root by walking upward from CWD and checking common locations.
    Works across machines (different Windows usernames) without hardcoding C:/Users/<name>/...
    """
    start = (start or Path.cwd()).resolve()

    # 1) Walk up from current directory
    for p in [start, *start.parents]:
        if (p / package_name / "__init__.py").exists():
            return p
        if p.name.lower() == "qepc_project" and (p / package_name).exists():
            return p

    # 2) Environment override (optional)
    env = os.getenv("QEPC_PROJECT_ROOT")
    if env:
        cand = Path(env).expanduser().resolve()
        if (cand / package_name / "__init__.py").exists():
            return cand

    # 3) Common spots under home directory
    home = Path.home()
    for cand in [
        home / "qepc_project",
        home / "Documents" / "qepc_project",
        home / "Desktop" / "qepc_project",
    ]:
        if (cand / package_name / "__init__.py").exists():
            return cand

    return None

PROJECT_ROOT = find_project_root()

if PROJECT_ROOT is None:
    raise FileNotFoundError(
        "Could not auto-detect your QEPC project root.\n"
        "Fix options:\n"
        "  1) Open Jupyter with working directory inside your repo\n"
        "  2) Or set env var QEPC_PROJECT_ROOT to the repo path\n"
        "     e.g. PowerShell:  $env:QEPC_PROJECT_ROOT = 'C:/Users/YOU/qepc_project'\n"
    )

# Make sure Python can import `qepc`
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("‚úÖ PROJECT_ROOT:", PROJECT_ROOT)

import qepc
print("‚úÖ qepc package:", Path(qepc.__file__).resolve())

CACHE_IMPORTS = PROJECT_ROOT / "cache" / "imports"
print("üì¶ CACHE_IMPORTS:", CACHE_IMPORTS)


‚úÖ PROJECT_ROOT: C:\Users\wdorsey\qepc_project
‚úÖ qepc package: C:\Users\wdorsey\qepc_project\qepc\__init__.py
üì¶ CACHE_IMPORTS: C:\Users\wdorsey\qepc_project\cache\imports


In [3]:
# ==========================================================
# CELL 2 ‚Äì LOAD QEPC-READY EOIN DATA (WITH FALLBACKS)
# ==========================================================
from pathlib import Path

def _coerce_game_date(df: pd.DataFrame, col: str = "game_date") -> pd.DataFrame:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], errors="coerce").dt.date
    return df

def _fallback_load_parquet(name: str) -> pd.DataFrame:
    path = CACHE_IMPORTS / name
    if not path.exists():
        raise FileNotFoundError(f"Missing cache file: {path}")
    return pd.read_parquet(path)

# Try QEPC loaders first (preferred)
try:
    from qepc.nba.eoin_data_source import (
        load_eoin_games,
        load_eoin_player_boxes,
        load_eoin_team_boxes,
        print_eoin_summary,
    )

    games_qepc = load_eoin_games()
    player_boxes_qepc = load_eoin_player_boxes()
    team_boxes_qepc = load_eoin_team_boxes()

    print_eoin_summary(games_qepc, player_boxes_qepc, team_boxes_qepc)

except Exception as e:
    print("‚ö†Ô∏è QEPC loader import failed; falling back to cached parquet in cache/imports/")
    print("   Error:", repr(e))

    games_qepc = _fallback_load_parquet("games_qepc.parquet")
    player_boxes_qepc = _fallback_load_parquet("player_boxes_qepc.parquet")
    team_boxes_qepc = _fallback_load_parquet("team_boxes_qepc.parquet")

# Normalize date columns
games_qepc = _coerce_game_date(games_qepc, "game_date")
player_boxes_qepc = _coerce_game_date(player_boxes_qepc, "game_date")
team_boxes_qepc = _coerce_game_date(team_boxes_qepc, "game_date")

print("\nShapes:")
print("  games_qepc:", games_qepc.shape)
print("  player_boxes_qepc:", player_boxes_qepc.shape)
print("  team_boxes_qepc:", team_boxes_qepc.shape)

# Quick schema sanity checks (non-fatal)
for col in ["game_id", "team_name", "opp_team_name", "player_id", "points", "reboundstotal", "assists", "numminutes"]:
    if col not in player_boxes_qepc.columns:
        print(f"‚ö†Ô∏è player_boxes_qepc missing expected column: {col}")


=== Eoin / QEPC Data Summary ===
Games:            72311 rows, 18 columns
  game_datetime: 1946-11-26 23:00:00+00:00  ‚Üí  2025-12-10 17:00:00+00:00
Player boxes:   1639424 rows, 36 columns
  game_datetime: 1946-11-26 23:00:00+00:00  ‚Üí  2025-12-10 17:00:00+00:00
Team boxes:      144622 rows, 49 columns
  game_datetime: 1946-11-26 23:00:00+00:00  ‚Üí  2025-12-10 17:00:00+00:00
Max season record seen in team_boxes: 68.0‚Äì65.0 (approx)

Shapes:
  games_qepc: (72311, 18)
  player_boxes_qepc: (1639424, 36)
  team_boxes_qepc: (144622, 49)


In [4]:
# ==========================================================
# CELL 3 ‚Äì PICK A GAME (BY DATE + OPTIONAL TEAMS) + GET TEAM TOTAL TARGETS
# ==========================================================
import datetime as _dt

# ----- USER CONTROLS -----
GAME_DATE = "2025-12-05"        # YYYY-MM-DD
HOME_TEAM = "Celtics"           # or None
AWAY_TEAM = "Lakers"            # or None

# If your matchup engine exists, use it (best).
# Otherwise we fall back to a minimal table built from games_qepc.
try:
    from qepc.nba.matchups_eoin import build_matchups_for_date
    matchups = build_matchups_for_date(GAME_DATE)
    print("‚úÖ Loaded matchups via qepc.nba.matchups_eoin.build_matchups_for_date")
except Exception as e:
    print("‚ö†Ô∏è Could not import matchup engine; using games_qepc as fallback.")
    print("   Error:", repr(e))
    gd = pd.to_datetime(GAME_DATE).date()
    cols = [c for c in ["game_id","game_date","home_team_name","away_team_name","home_team_id","away_team_id"] if c in games_qepc.columns]
    matchups = games_qepc[games_qepc["game_date"] == gd][cols].copy()
    # fallback team totals if matchup engine missing
    matchups["exp_home_pts"] = np.nan
    matchups["exp_away_pts"] = np.nan

print("matchups.shape:", matchups.shape)
display(matchups.head(10))

# Pick the game row
if HOME_TEAM and AWAY_TEAM:
    sel = matchups[
        (matchups["home_team_name"].astype(str) == str(HOME_TEAM))
        & (matchups["away_team_name"].astype(str) == str(AWAY_TEAM))
    ]
    if len(sel) == 0:
        raise ValueError(f"No matchup found for {GAME_DATE} with {HOME_TEAM} vs {AWAY_TEAM}.")
    game_row = sel.iloc[0]
else:
    if len(matchups) == 0:
        raise ValueError(f"No games found for date {GAME_DATE}.")
    game_row = matchups.iloc[0]

home_team = str(game_row["home_team_name"])
away_team = str(game_row["away_team_name"])
asof_date = pd.to_datetime(game_row["game_date"]).date()
game_id = int(game_row["game_id"]) if "game_id" in game_row and pd.notna(game_row["game_id"]) else None

print("\nüéØ Selected game:")
print("  game_id:", game_id)
print("  date:", asof_date)
print("  matchup:", f"{away_team} @ {home_team}")

home_pts_target = float(game_row.get("exp_home_pts", np.nan))
away_pts_target = float(game_row.get("exp_away_pts", np.nan))

print("\nTeam point targets from matchup engine (NaN means fallback/unknown):")
print("  home_pts_target:", home_pts_target)
print("  away_pts_target:", away_pts_target)


Built advanced strengths from Eoin team_stats:
      team_id  games_played   win_pct     off_ppg     def_ppg  \
0  1610612738           338  0.718935  116.982249  108.464497   
1  1610612760           326  0.687117  118.401840  110.766871   
2  1610612743           335  0.647761  116.128358  112.402985   
3  1610612739           312  0.608974  114.669872  110.083333   
4  1610612752           330  0.606061  114.078788  110.463636   
5  1610612750           328  0.591463  114.185976  110.371951   
6  1610612744           318  0.559748  115.814465  113.339623   
7  1610612749           305  0.577049  116.045902  114.504918   
8  1610612746           303  0.547855  113.079208  111.221122   
9  1610612747           321  0.545171  114.971963  115.112150   

   pts_diff_per_game  strength_score  strength_rank  
0           8.517751        1.188377              1  
1           7.634969        1.088435              2  
2           3.725373        0.815295              3  
3           4.586538 

Unnamed: 0,game_id,game_date,game_datetime,home_team_id,away_team_id,home_team_name,away_team_name,home_strength_score,away_strength_score,home_off_ppg,home_def_ppg,away_off_ppg,away_def_ppg,home_is_b2b,away_is_b2b,exp_home_pts_raw,exp_away_pts_raw,exp_home_pts,exp_away_pts
0,22500338,2025-12-05,2025-12-05 14:00:00+00:00,1610612738,1610612747,Celtics,Lakers,1.188377,0.416736,116.982249,108.464497,114.971963,115.11215,True,True,116.047199,109.71823,116.528433,106.170305
1,22500339,2025-12-05,2025-12-05 14:00:00+00:00,1610612753,1610612748,Magic,Heat,0.339996,0.348076,109.140468,108.752508,110.5,110.109091,False,False,111.12478,109.126254,107.097078,104.98813
2,22500340,2025-12-05,2025-12-05 14:30:00+00:00,1610612737,1610612743,Hawks,Nuggets,0.245819,0.815295,117.794613,118.905724,116.128358,112.402985,False,False,116.598799,117.017041,117.585299,120.746031
3,22500341,2025-12-05,2025-12-05 14:30:00+00:00,1610612739,1610612759,Cavaliers,Spurs,0.76528,-0.182687,114.669872,110.083333,113.290657,118.138408,False,False,117.90414,111.186995,120.086332,109.10343
4,22500342,2025-12-05,2025-12-05 14:30:00+00:00,1610612765,1610612757,Pistons,Trail Blazers,-0.179004,-0.225382,112.228669,116.170648,110.866197,116.021127,False,False,115.624898,113.018423,115.719304,112.76079
5,22500343,2025-12-05,2025-12-05 14:30:00+00:00,1610612752,1610612762,Knicks,Jazz,0.711743,-0.244136,114.078788,110.463636,114.451049,119.825175,False,True,118.451981,110.457343,121.135996,107.646313
6,22500344,2025-12-05,2025-12-05 14:30:00+00:00,1610612761,1610612766,Raptors,Hornets,0.057433,-0.549142,112.5,114.667808,108.303136,116.226481,True,False,114.36324,110.985472,113.301969,108.700988
7,22500345,2025-12-05,2025-12-05 15:00:00+00:00,1610612741,1610612754,Bulls,Pacers,0.232353,0.379903,114.686007,115.566553,117.663609,117.608563,False,False,117.647285,116.115081,119.594198,118.944816
8,22500346,2025-12-05,2025-12-05 15:00:00+00:00,1610612745,1610612756,Rockets,Suns,0.365803,0.415879,113.924138,113.134483,114.339934,113.788779,False,False,115.356458,113.237208,115.204974,113.197705
9,22500347,2025-12-05,2025-12-05 15:00:00+00:00,1610612763,1610612746,Grizzlies,Clippers,0.301533,0.503728,114.317881,114.516556,113.079208,111.221122,False,False,114.269501,113.297882,113.122365,113.318871



üéØ Selected game:
  game_id: 22500338
  date: 2025-12-05
  matchup: Lakers @ Celtics

Team point targets from matchup engine (NaN means fallback/unknown):
  home_pts_target: 116.52843333517667
  away_pts_target: 106.17030496875519


In [5]:
# ==========================================================
# CELL 4 ‚Äì CONFIG: LOOKBACK WINDOW + MODEL WEIGHTS (NO DATA LEAKAGE)
# ==========================================================
from datetime import timedelta

# How much history to use before the target game date
LOOKBACK_DAYS = 365 * 2        # ~2 seasons; increase if you want more stability, decrease if you want more recency
LAST_N_GAMES = 10              # player recency window (per player)
MIN_GAMES_PLAYER = 5           # minimum games to trust season averages
MIN_GAMES_VS_OPP = 3           # minimum games vs opponent to even consider it
VSOPP_SHRINK_K = 8             # bigger = more shrinkage toward season avg when vs-opp sample is small

# Share-model adjustment strengths
W_RECENCY = 0.60               # how much recency nudges shares (0..1)
W_VSOPP   = 0.40               # how much matchup history nudges shares (0..1)

# Clip factors to avoid nuclear weirdness from tiny samples / injuries / etc.
FACTOR_CLIP = (0.60, 1.60)

# If True, we build points via TEAM_TOTAL √ó ADJUSTED_SHARE (recommended).
# If False, we build points from blended per-player averages then scale to team total.
USE_SHARE_MODEL = True

print("Config loaded.")
print("  LOOKBACK_DAYS:", LOOKBACK_DAYS)
print("  LAST_N_GAMES:", LAST_N_GAMES)
print("  USE_SHARE_MODEL:", USE_SHARE_MODEL)


Config loaded.
  LOOKBACK_DAYS: 730
  LAST_N_GAMES: 10
  USE_SHARE_MODEL: True


In [6]:
# ==========================================================
# CELL 5 ‚Äì BUILD HISTORY SLICE + GET ROSTERS (GAME_ID OR RECENT INFERENCE)
# ==========================================================
def slice_player_history(player_boxes: pd.DataFrame, asof_date: _dt.date, lookback_days: int) -> pd.DataFrame:
    pb = player_boxes.copy()
    pb["game_date"] = pd.to_datetime(pb["game_date"], errors="coerce").dt.date
    pb = pb[pb["game_date"].notna()]
    pb = pb[pb["game_date"] < asof_date]  # STRICTLY BEFORE (no leakage)
    if lookback_days is not None and lookback_days > 0:
        cutoff = asof_date - timedelta(days=int(lookback_days))
        pb = pb[pb["game_date"] >= cutoff]
    return pb

pb_hist = slice_player_history(player_boxes_qepc, asof_date=asof_date, lookback_days=LOOKBACK_DAYS)
print("pb_hist rows:", len(pb_hist), "| date window:", (pb_hist["game_date"].min(), pb_hist["game_date"].max()))

def roster_from_game_id(player_boxes: pd.DataFrame, game_id: int, team_name: str) -> list[int]:
    gp = player_boxes[player_boxes["game_id"].astype(int) == int(game_id)]
    ids = gp.loc[gp["team_name"].astype(str) == str(team_name), "player_id"].unique()
    return sorted(int(x) for x in ids)

def roster_from_recent_games(player_boxes_hist: pd.DataFrame, team_name: str, lookback_games: int = 6) -> list[int]:
    team_rows = player_boxes_hist[player_boxes_hist["team_name"].astype(str) == str(team_name)].copy()
    if len(team_rows) == 0:
        return []
    # last N unique games for that team (in the history slice)
    gids = (
        team_rows.sort_values("game_date")["game_id"]
        .dropna()
        .astype(int)
        .drop_duplicates()
        .tail(int(lookback_games))
        .tolist()
    )
    ids = team_rows[team_rows["game_id"].astype(int).isin(gids)]["player_id"].unique()
    return sorted(int(x) for x in ids)

def get_roster_ids(game_id: int | None, team_name: str) -> list[int]:
    if game_id is not None:
        ids = roster_from_game_id(player_boxes_qepc, game_id, team_name)
        if len(ids) > 0:
            return ids
    # fallback (works for "future" games where we don't have a game_id roster)
    return roster_from_recent_games(pb_hist, team_name, lookback_games=6)

home_ids = get_roster_ids(game_id, home_team)
away_ids = get_roster_ids(game_id, away_team)

print(f"Roster for {home_team}: {len(home_ids)} players")
print("  home_ids:", home_ids)
print(f"Roster for {away_team}: {len(away_ids)} players")
print("  away_ids:", away_ids)

if len(home_ids) == 0 or len(away_ids) == 0:
    print("‚ö†Ô∏è One roster is empty. If this is a future game, increase LOOKBACK_DAYS or make sure pb_hist has recent games.")


pb_hist rows: 75826 | date window: (datetime.date(2023, 12, 6), datetime.date(2025, 12, 4))
Roster for Celtics: 14 players
  home_ids: [1627759, 1628401, 1628449, 1629014, 1629674, 1630202, 1630214, 1630568, 1630573, 1631169, 1631248, 1641775, 1642864, 1642873]
Roster for Lakers: 13 players
  away_ids: [1628467, 1629020, 1629028, 1629060, 1629216, 1629637, 1630559, 1631166, 1631222, 1641733, 1642261, 1642355, 1642876]


In [7]:
# ==========================================================
# CELL 6 ‚Äì BUILD ROSTER USAGE TABLES + VS-OPP SPLITS (POINTS/REB/AST)
# ==========================================================
def _safe_name_from_rows(rows: pd.DataFrame, pid: int) -> str:
    if "firstname" in rows.columns and "lastname" in rows.columns and len(rows) > 0:
        first = str(rows["firstname"].iloc[-1]) if pd.notna(rows["firstname"].iloc[-1]) else ""
        last  = str(rows["lastname"].iloc[-1]) if pd.notna(rows["lastname"].iloc[-1]) else ""
        nm = (first + " " + last).strip()
        if nm and nm.lower() != "nan":
            return nm
    return f"player_{pid}"

def _add_team_totals(pb_team: pd.DataFrame) -> pd.DataFrame:
    totals = (
        pb_team.groupby(["team_name","game_id"], as_index=False)
        .agg(
            team_pts=("points","sum"),
            team_reb=("reboundstotal","sum"),
            team_ast=("assists","sum"),
        )
    )
    out = pb_team.merge(totals, on=["team_name","game_id"], how="left")
    for num, den, outcol in [
        ("points","team_pts","points_share"),
        ("reboundstotal","team_reb","reb_share"),
        ("assists","team_ast","ast_share"),
    ]:
        out[outcol] = np.where(out[den] > 0, out[num] / out[den], np.nan)
    return out

def build_roster_usage(
    pb_hist: pd.DataFrame,
    team_name: str,
    roster_ids: list[int],
    last_n_games: int,
) -> pd.DataFrame:
    # only history for this team + roster
    pb_team = pb_hist[
        (pb_hist["team_name"].astype(str) == str(team_name))
        & (pb_hist["player_id"].astype(int).isin([int(x) for x in roster_ids]))
    ].copy()

    pb_team = _add_team_totals(pb_team)

    # season-ish aggregates (within history slice)
    agg = pb_team.groupby("player_id").agg(
        games_played=("game_id", "nunique"),
        avg_points=("points", "mean"),
        avg_rebounds=("reboundstotal", "mean"),
        avg_assists=("assists", "mean"),
        avg_minutes=("numminutes", "mean"),
        mean_points_share=("points_share", "mean"),
        mean_rebounds_share=("reb_share", "mean"),
        mean_assists_share=("ast_share", "mean"),
        last_seen=("game_date", "max"),
    ).reset_index()

    # last N games per player
    pb_team_sorted = pb_team.sort_values(["player_id", "game_date"])
    lastN = pb_team_sorted.groupby("player_id", group_keys=False).tail(int(last_n_games))

    last_agg = lastN.groupby("player_id").agg(
        pts_avg_lastN=("points","mean"),
        reb_avg_lastN=("reboundstotal","mean"),
        ast_avg_lastN=("assists","mean"),
        min_avg_lastN=("numminutes","mean"),
    ).reset_index()

    usage = agg.merge(last_agg, on="player_id", how="left")

    # player names (last known within this team slice; fallback to global last seen)
    names = []
    for pid in usage["player_id"].astype(int).tolist():
        rows = pb_team_sorted[pb_team_sorted["player_id"].astype(int) == int(pid)]
        if len(rows) == 0:
            rows = pb_hist[pb_hist["player_id"].astype(int) == int(pid)].sort_values("game_date")
        names.append(_safe_name_from_rows(rows, int(pid)))
    usage["player_name"] = names
    usage["team_name"] = str(team_name)

    # ensure every roster_id exists (new guys / no history)
    have = set(usage["player_id"].astype(int).tolist())
    missing = [int(x) for x in roster_ids if int(x) not in have]
    if missing:
        fb_rows = []
        for pid in missing:
            rows = pb_hist[pb_hist["player_id"].astype(int) == int(pid)].sort_values("game_date")
            fb_rows.append({
                "player_id": int(pid),
                "team_name": str(team_name),
                "player_name": _safe_name_from_rows(rows, int(pid)),
                "games_played": 0,
                "avg_points": 0.0,
                "avg_rebounds": 0.0,
                "avg_assists": 0.0,
                "avg_minutes": 0.0,
                "mean_points_share": np.nan,
                "mean_rebounds_share": np.nan,
                "mean_assists_share": np.nan,
                "pts_avg_lastN": np.nan,
                "reb_avg_lastN": np.nan,
                "ast_avg_lastN": np.nan,
                "min_avg_lastN": np.nan,
                "last_seen": pd.NaT,
            })
        usage = pd.concat([usage, pd.DataFrame(fb_rows)], ignore_index=True)

    return usage

def build_vs_opp_splits(
    pb_hist: pd.DataFrame,
    team_name: str,
    opp_team_name: str,
    roster_ids: list[int],
) -> pd.DataFrame:
    pb_vs = pb_hist[
        (pb_hist["team_name"].astype(str) == str(team_name))
        & (pb_hist["opp_team_name"].astype(str) == str(opp_team_name))
        & (pb_hist["player_id"].astype(int).isin([int(x) for x in roster_ids]))
    ].copy()

    if len(pb_vs) == 0:
        return pd.DataFrame(columns=["player_id","games_vs_opp","pts_vs_opp","reb_vs_opp","ast_vs_opp"])

    splits = pb_vs.groupby("player_id").agg(
        games_vs_opp=("game_id","nunique"),
        pts_vs_opp=("points","mean"),
        reb_vs_opp=("reboundstotal","mean"),
        ast_vs_opp=("assists","mean"),
    ).reset_index()

    return splits

# Build usage + splits for both teams
home_usage = build_roster_usage(pb_hist, home_team, home_ids, LAST_N_GAMES)
away_usage = build_roster_usage(pb_hist, away_team, away_ids, LAST_N_GAMES)

home_vs = build_vs_opp_splits(pb_hist, home_team, away_team, home_ids)
away_vs = build_vs_opp_splits(pb_hist, away_team, home_team, away_ids)

home = home_usage.merge(home_vs, on="player_id", how="left")
away = away_usage.merge(away_vs, on="player_id", how="left")

print("home usage shape:", home.shape)
display(home.sort_values("avg_points", ascending=False).head(12))

print("away usage shape:", away.shape)
display(away.sort_values("avg_points", ascending=False).head(12))


home usage shape: (14, 20)


Unnamed: 0,player_id,games_played,avg_points,avg_rebounds,avg_assists,avg_minutes,mean_points_share,mean_rebounds_share,mean_assists_share,last_seen,pts_avg_lastN,reb_avg_lastN,ast_avg_lastN,min_avg_lastN,player_name,team_name,games_vs_opp,pts_vs_opp,reb_vs_opp,ast_vs_opp
0,1627759,183,21.825137,5.431694,3.797814,33.383081,0.349087,0.274943,0.278124,2025-12-02,30.1,7.3,5.7,34.92,Jaylen Brown,Celtics,4.0,18.75,6.75,3.5
1,1628401,197,15.756345,4.243655,4.604061,33.230158,0.25702,0.217218,0.348402,2025-12-04,19.2,4.7,5.5,33.381,Derrick White,Celtics,4.0,10.0,3.5,6.75
3,1629014,26,13.615385,2.115385,2.461538,24.2792,0.11878,0.046117,0.102632,2025-12-04,11.9,2.2,3.3,21.893,Anfernee Simons,Celtics,,,,
5,1630202,204,12.563725,3.406863,3.485294,25.62703,0.195758,0.168035,0.245872,2025-12-04,20.5,5.0,4.8,32.368,Payton Pritchard,Celtics,4.0,7.0,1.5,1.25
9,1631169,26,7.884615,4.884615,1.153846,20.0488,0.066876,0.109426,0.048122,2025-12-04,7.1,4.1,1.2,17.461,Josh Minott,Celtics,,,,
8,1630573,194,7.551546,2.989691,0.958763,20.248324,0.125109,0.150049,0.069101,2025-12-04,6.5,3.4,1.8,21.26,Sam Hauser,Celtics,4.0,6.25,1.75,0.0
7,1630568,25,6.04,3.56,0.64,13.946364,0.053283,0.07673,0.026796,2025-12-04,4.7,2.3,0.4,13.211429,Luka Garza,Celtics,,,,
4,1629674,181,3.939227,3.088398,0.607735,14.343115,0.053716,0.115957,0.036314,2025-12-04,11.5,8.2,1.4,23.48,Neemias Queta,Celtics,4.0,3.25,2.25,1.25
12,1642864,26,3.0,1.769231,0.5,12.635714,0.026388,0.041012,0.020913,2025-12-04,2.3,1.4,0.3,10.798571,Hugo Gonzalez,Celtics,,,,
2,1628449,26,2.269231,1.615385,0.192308,13.445833,0.021697,0.033746,0.010041,2025-12-04,0.3,0.8,0.1,13.335,Chris Boucher,Celtics,,,,


away usage shape: (13, 20)


Unnamed: 0,player_id,games_played,avg_points,avg_rebounds,avg_assists,avg_minutes,mean_points_share,mean_rebounds_share,mean_assists_share,last_seen,pts_avg_lastN,reb_avg_lastN,ast_avg_lastN,min_avg_lastN,player_name,team_name,games_vs_opp,pts_vs_opp,reb_vs_opp,ast_vs_opp
6,1630559,174,19.017241,4.396552,5.54023,33.757824,0.422597,0.267182,0.585269,2025-12-04,27.9,6.1,5.0,36.697,Austin Reaves,Lakers,4.0,20.5,3.5,3.5
2,1629028,26,13.269231,8.0,1.0,28.120417,0.159143,0.252927,0.064504,2025-12-04,15.0,9.1,0.8,28.621,Deandre Ayton,Lakers,,,,
3,1629060,162,12.734568,4.277778,1.148148,29.643677,0.279226,0.262548,0.136608,2025-12-04,12.0,3.5,0.3,31.469,Rui Hachimura,Lakers,3.0,12.333333,6.0,2.0
8,1631222,27,8.518519,3.740741,2.037037,25.327308,0.10175,0.117003,0.136727,2025-12-04,6.6,3.2,1.0,21.755,Jake LaRavia,Lakers,,,,
10,1642261,116,8.241379,2.525862,0.836207,18.851308,0.164409,0.151521,0.08197,2025-12-04,5.0,1.4,0.5,12.88625,Dalton Knecht,Lakers,2.0,11.0,4.5,0.5
9,1641733,17,5.588235,1.0,1.235294,13.421429,0.06228,0.029507,0.071619,2025-12-04,5.7,0.8,1.4,13.545,Nick Smith Jr.,Lakers,,,,
4,1629216,118,5.254237,1.067797,1.262712,19.830748,0.104266,0.068282,0.133937,2025-12-04,4.0,1.0,1.9,21.679,Gabe Vincent,Lakers,2.0,7.0,0.5,2.5
5,1629637,162,4.907407,3.691358,0.759259,15.569172,0.101105,0.206551,0.077817,2025-12-04,5.9,3.6,0.9,16.768,Jaxson Hayes,Lakers,3.0,7.0,6.333333,1.0
1,1629020,111,3.495495,3.90991,1.009009,17.466437,0.076602,0.221291,0.10946,2025-12-04,0.9,1.2,0.4,12.353333,Jarred Vanderbilt,Lakers,3.0,6.666667,7.0,1.333333
7,1631166,1,2.0,0.0,0.0,1.12,0.033333,0.0,0.0,2025-11-25,2.0,0.0,0.0,1.12,Drew Timme,Lakers,,,,


In [8]:
# ==========================================================
# CELL 7 ‚Äì BUILD PLAYER Œª (POINTS/REB/AST) + SCALE TO TEAM TARGETS
# ==========================================================
def _clip(x: float, lo: float, hi: float) -> float:
    return float(min(max(x, lo), hi))

def _ratio_factor(season: float, recent: float, vsopp: float, games_vs: float) -> float:
    # recency ratio
    f_rec = 1.0
    if pd.notna(season) and season > 0 and pd.notna(recent):
        f_rec = float(recent) / float(season)

    # vs-opp ratio (shrunk toward season to avoid 1-game magic)
    f_vs = 1.0
    if pd.notna(season) and season > 0 and pd.notna(vsopp) and pd.notna(games_vs) and float(games_vs) >= float(MIN_GAMES_VS_OPP):
        g = float(games_vs)
        shrunk = (float(vsopp) * g + float(season) * float(VSOPP_SHRINK_K)) / (g + float(VSOPP_SHRINK_K))
        f_vs = float(shrunk) / float(season)

    # combine (exponent weights)
    f = (f_rec ** float(W_RECENCY)) * (f_vs ** float(W_VSOPP))
    return _clip(f, FACTOR_CLIP[0], FACTOR_CLIP[1])

def _team_points_fallback(team_name: str) -> float:
    tmp = pb_hist[pb_hist["team_name"].astype(str) == str(team_name)]
    if len(tmp) == 0:
        return float("nan")
    by_game = tmp.groupby("game_id")["points"].sum()
    return float(by_game.mean())

def _team_stat_target_from_team_boxes(team_name: str, stat: str) -> float:
    # stat in {"rebounds","assists"} mapped to likely columns
    tb = team_boxes_qepc.copy()
    if "team_name" not in tb.columns:
        return float("nan")
    tb = tb[tb["team_name"].astype(str) == str(team_name)]
    tb = tb[tb["game_date"].notna()]
    tb = tb[tb["game_date"] < asof_date]
    if LOOKBACK_DAYS is not None and LOOKBACK_DAYS > 0:
        cutoff = asof_date - timedelta(days=int(LOOKBACK_DAYS))
        tb = tb[tb["game_date"] >= cutoff]

    candidates = []
    if stat == "rebounds":
        candidates = ["reboundstotal","reboundsTotal","rebounds_total","reb_total"]
    elif stat == "assists":
        candidates = ["assists","ast","assists_total"]
    elif stat == "points":
        candidates = ["points","pts","score"]
    for c in candidates:
        if c in tb.columns:
            val = tb[c].mean()
            return float(val) if pd.notna(val) else float("nan")
    return float("nan")

def _team_stat_target_from_player_boxes(team_name: str, col: str) -> float:
    tmp = pb_hist[pb_hist["team_name"].astype(str) == str(team_name)]
    if len(tmp) == 0:
        return float("nan")
    by_game = tmp.groupby("game_id")[col].sum()
    return float(by_game.mean())

def _get_team_targets(team_name: str) -> dict:
    pts = _team_points_fallback(team_name)
    reb = _team_stat_target_from_team_boxes(team_name, "rebounds")
    ast = _team_stat_target_from_team_boxes(team_name, "assists")

    if not np.isfinite(reb):
        reb = _team_stat_target_from_player_boxes(team_name, "reboundstotal")
    if not np.isfinite(ast):
        ast = _team_stat_target_from_player_boxes(team_name, "assists")

    return {"pts": pts, "reb": reb, "ast": ast}

home_targets = _get_team_targets(home_team)
away_targets = _get_team_targets(away_team)

# If matchup engine gave point targets, use them; otherwise fall back to team averages in history slice.
if not np.isfinite(home_pts_target):
    home_pts_target = home_targets["pts"]
if not np.isfinite(away_pts_target):
    away_pts_target = away_targets["pts"]

print("Team targets (points from matchup engine if available):")
print("  home:", home_team, "| pts:", home_pts_target, "| reb:", home_targets["reb"], "| ast:", home_targets["ast"])
print("  away:", away_team, "| pts:", away_pts_target, "| reb:", away_targets["reb"], "| ast:", away_targets["ast"])

def _base_share_fallback(df: pd.DataFrame, season_col: str, share_col: str) -> pd.Series:
    s = df[share_col]
    if s.notna().sum() > 0 and float(s.fillna(0).sum()) > 0:
        return s
    # fallback to season averages
    denom = df[season_col].fillna(0).sum()
    if denom <= 0:
        return pd.Series(np.ones(len(df)) / max(len(df), 1), index=df.index)
    return df[season_col].fillna(0) / denom

def build_team_lambdas(team_df: pd.DataFrame, team_pts: float, team_reb: float, team_ast: float) -> pd.DataFrame:
    df = team_df.copy()

    # ---- POINTS ----
    if USE_SHARE_MODEL:
        base = _base_share_fallback(df, "avg_points", "mean_points_share")
        factors = df.apply(
            lambda r: _ratio_factor(
                r.get("avg_points", np.nan),
                r.get("pts_avg_lastN", np.nan),
                r.get("pts_vs_opp", np.nan),
                r.get("games_vs_opp", np.nan),
            ),
            axis=1,
        )
        adj = base.fillna(0) * factors.astype(float)
        if float(adj.sum()) <= 0:
            adj = np.ones(len(df))
        share = adj / float(adj.sum())
        df["lambda_points"] = float(team_pts) * share
    else:
        # blended per-player mean then scaled (older approach)
        w_season, w_recent, w_vs = 0.50, 0.35, 0.15
        def _blend(season, recent, vs, games_vs):
            parts, weights = [], []
            if pd.notna(season): parts.append(float(season)); weights.append(w_season)
            if pd.notna(recent): parts.append(float(recent)); weights.append(w_recent)
            if pd.notna(vs) and pd.notna(games_vs) and float(games_vs) >= float(MIN_GAMES_VS_OPP):
                parts.append(float(vs)); weights.append(w_vs)
            if not parts: return 0.0
            w = np.array(weights, dtype=float); w = w / w.sum()
            return float(np.dot(parts, w))
        df["lambda_points_raw"] = df.apply(lambda r: _blend(r.get("avg_points",np.nan), r.get("pts_avg_lastN",np.nan), r.get("pts_vs_opp",np.nan), r.get("games_vs_opp",np.nan)), axis=1)
        s = float(df["lambda_points_raw"].sum())
        df["lambda_points"] = df["lambda_points_raw"] * (float(team_pts)/s) if s > 0 else df["lambda_points_raw"]

    # ---- REBOUNDS ----
    base_reb = _base_share_fallback(df, "avg_rebounds", "mean_rebounds_share")
    reb_factors = df.apply(
        lambda r: _ratio_factor(
            r.get("avg_rebounds", np.nan),
            r.get("reb_avg_lastN", np.nan),
            r.get("reb_vs_opp", np.nan),
            r.get("games_vs_opp", np.nan),
        ),
        axis=1,
    )
    adj_reb = base_reb.fillna(0) * reb_factors.astype(float)
    if float(adj_reb.sum()) <= 0:
        adj_reb = np.ones(len(df))
    reb_share = adj_reb / float(adj_reb.sum())
    df["lambda_rebounds"] = float(team_reb) * reb_share if np.isfinite(team_reb) else df.get("avg_rebounds", 0.0)

    # ---- ASSISTS ----
    base_ast = _base_share_fallback(df, "avg_assists", "mean_assists_share")
    ast_factors = df.apply(
        lambda r: _ratio_factor(
            r.get("avg_assists", np.nan),
            r.get("ast_avg_lastN", np.nan),
            r.get("ast_vs_opp", np.nan),
            r.get("games_vs_opp", np.nan),
        ),
        axis=1,
    )
    adj_ast = base_ast.fillna(0) * ast_factors.astype(float)
    if float(adj_ast.sum()) <= 0:
        adj_ast = np.ones(len(df))
    ast_share = adj_ast / float(adj_ast.sum())
    df["lambda_assists"] = float(team_ast) * ast_share if np.isfinite(team_ast) else df.get("avg_assists", 0.0)

    return df

home_lambdas = build_team_lambdas(home, team_pts=home_pts_target, team_reb=home_targets["reb"], team_ast=home_targets["ast"])
away_lambdas = build_team_lambdas(away, team_pts=away_pts_target, team_reb=away_targets["reb"], team_ast=away_targets["ast"])

print("\nSanity checks (sums):")
print("  sum(home Œª_points):", round(float(home_lambdas["lambda_points"].sum()), 3))
print("  sum(away Œª_points):", round(float(away_lambdas["lambda_points"].sum()), 3))


Team targets (points from matchup engine if available):
  home: Celtics | pts: 116.52843333517667 | reb: 45.21463414634146 | ast: 25.89268292682927
  away: Lakers | pts: 106.17030496875519 | reb: 42.40860215053763 | ast: 26.629032258064516

Sanity checks (sums):
  sum(home Œª_points): 116.528
  sum(away Œª_points): 106.17


In [9]:
# ==========================================================
# CELL 8 ‚Äì CLEAN VIEW OUTPUT + TOP PLAYERS
# ==========================================================
def clean_view(df: pd.DataFrame) -> pd.DataFrame:
    cols = [
        "team_name","player_id","player_name",
        "games_played",
        "avg_points","pts_avg_lastN","pts_vs_opp","games_vs_opp","lambda_points",
        "avg_rebounds","reb_avg_lastN","reb_vs_opp","lambda_rebounds",
        "avg_assists","ast_avg_lastN","ast_vs_opp","lambda_assists",
        "last_seen",
    ]
    keep = [c for c in cols if c in df.columns]
    out = df[keep].copy()

    # nicer rounding
    for c in [x for x in out.columns if x.startswith("avg_") or x.endswith("_lastN") or x.startswith("pts_") or x.startswith("reb_") or x.startswith("ast_") or x.startswith("lambda_")]:
        if c in out.columns and pd.api.types.is_numeric_dtype(out[c]):
            out[c] = out[c].astype(float).round(3)
    return out

home_view = clean_view(home_lambdas).sort_values("lambda_points", ascending=False).reset_index(drop=True)
away_view = clean_view(away_lambdas).sort_values("lambda_points", ascending=False).reset_index(drop=True)

print("Top home players by Œª_points:")
display(home_view.head(12))

print("Top away players by Œª_points:")
display(away_view.head(12))

final_view = pd.concat([home_view, away_view], ignore_index=True)
print("\nFINAL (home + away) preview:")
display(final_view.head(30))


Top home players by Œª_points:


Unnamed: 0,team_name,player_id,player_name,games_played,avg_points,pts_avg_lastN,pts_vs_opp,games_vs_opp,lambda_points,avg_rebounds,reb_avg_lastN,reb_vs_opp,lambda_rebounds,avg_assists,ast_avg_lastN,ast_vs_opp,lambda_assists,last_seen
0,Celtics,1627759,Jaylen Brown,183,21.825,30.1,18.75,4.0,32.539,5.432,7.3,6.75,9.884,3.798,5.7,3.5,6.098,2025-12-02
1,Celtics,1628401,Derrick White,197,15.756,19.2,10.0,4.0,21.526,4.244,4.7,3.5,6.58,4.604,5.5,6.75,7.132,2025-12-04
2,Celtics,1630202,Payton Pritchard,204,12.564,20.5,7.0,4.0,19.302,3.407,5.0,1.5,5.684,3.485,4.8,1.25,4.699,2025-12-04
3,Celtics,1630573,Sam Hauser,194,7.552,6.5,6.25,4.0,8.75,2.99,3.4,1.75,4.457,0.959,1.8,0.0,1.489,2025-12-04
4,Celtics,1629014,Anfernee Simons,26,13.615,11.9,,,8.584,2.115,2.2,,1.378,2.462,3.3,,2.125,2025-12-04
5,Celtics,1629674,Neemias Queta,181,3.939,11.5,3.25,4.0,6.734,3.088,8.2,2.25,5.415,0.608,1.4,1.25,1.009,2025-12-04
6,Celtics,1631169,Josh Minott,26,7.885,7.1,,,4.921,4.885,4.1,,2.875,1.154,1.2,,0.856,2025-12-04
7,Celtics,1630568,Luka Garza,25,6.04,4.7,,,3.592,3.56,2.3,,1.723,0.64,0.4,,0.351,2025-12-04
8,Celtics,1631248,Baylor Scheierman,101,2.208,4.5,0.0,2.0,3.55,1.307,1.9,0.0,1.713,0.594,1.1,0.5,0.826,2025-12-04
9,Celtics,1641775,Jordan Walsh,163,1.613,8.2,0.0,3.0,2.406,1.239,5.6,0.0,1.993,0.344,1.4,0.0,0.519,2025-12-04


Top away players by Œª_points:


Unnamed: 0,team_name,player_id,player_name,games_played,avg_points,pts_avg_lastN,pts_vs_opp,games_vs_opp,lambda_points,avg_rebounds,reb_avg_lastN,reb_vs_opp,lambda_rebounds,avg_assists,ast_avg_lastN,ast_vs_opp,lambda_assists,last_seen
0,Lakers,1630559,Austin Reaves,174,19.017,27.9,20.5,4.0,35.853,4.397,6.1,3.5,8.604,5.54,5.0,3.5,10.535,2025-12-04
1,Lakers,1629060,Rui Hachimura,162,12.735,12.0,12.333,3.0,17.916,4.278,3.5,6.0,6.604,1.148,0.3,2.0,1.654,2025-12-04
2,Lakers,1629028,Deandre Ayton,26,13.269,15.0,,,11.429,8.0,9.1,,7.436,1.0,0.8,,1.138,2025-12-04
3,Lakers,1642261,Dalton Knecht,116,8.241,5.0,11.0,2.0,8.128,2.526,1.4,4.5,2.894,0.836,0.5,0.5,1.215,2025-12-04
4,Lakers,1629637,Jaxson Hayes,162,4.907,5.9,7.0,3.0,7.873,3.691,3.6,6.333,5.946,0.759,0.9,1.0,1.797,2025-12-04
5,Lakers,1629216,Gabe Vincent,118,5.254,4.0,7.0,2.0,5.907,1.068,1.0,0.5,1.786,1.263,1.9,2.5,3.453,2025-12-04
6,Lakers,1631222,Jake LaRavia,27,8.519,6.6,,,5.825,3.741,3.2,,2.899,2.037,1.0,,1.8,2025-12-04
7,Lakers,1641733,Nick Smith Jr.,17,5.588,5.7,,,4.205,1.0,0.8,,0.702,1.235,1.4,,1.557,2025-12-04
8,Lakers,1629020,Jarred Vanderbilt,111,3.495,0.9,6.667,3.0,3.067,3.91,1.2,7.0,3.613,1.009,0.4,1.333,1.325,2025-12-04
9,Lakers,1631166,Drew Timme,1,2.0,2.0,,,2.224,0.0,0.0,,0.0,0.0,0.0,,0.0,2025-11-25



FINAL (home + away) preview:


Unnamed: 0,team_name,player_id,player_name,games_played,avg_points,pts_avg_lastN,pts_vs_opp,games_vs_opp,lambda_points,avg_rebounds,reb_avg_lastN,reb_vs_opp,lambda_rebounds,avg_assists,ast_avg_lastN,ast_vs_opp,lambda_assists,last_seen
0,Celtics,1627759,Jaylen Brown,183,21.825,30.1,18.75,4.0,32.539,5.432,7.3,6.75,9.884,3.798,5.7,3.5,6.098,2025-12-02
1,Celtics,1628401,Derrick White,197,15.756,19.2,10.0,4.0,21.526,4.244,4.7,3.5,6.58,4.604,5.5,6.75,7.132,2025-12-04
2,Celtics,1630202,Payton Pritchard,204,12.564,20.5,7.0,4.0,19.302,3.407,5.0,1.5,5.684,3.485,4.8,1.25,4.699,2025-12-04
3,Celtics,1630573,Sam Hauser,194,7.552,6.5,6.25,4.0,8.75,2.99,3.4,1.75,4.457,0.959,1.8,0.0,1.489,2025-12-04
4,Celtics,1629014,Anfernee Simons,26,13.615,11.9,,,8.584,2.115,2.2,,1.378,2.462,3.3,,2.125,2025-12-04
5,Celtics,1629674,Neemias Queta,181,3.939,11.5,3.25,4.0,6.734,3.088,8.2,2.25,5.415,0.608,1.4,1.25,1.009,2025-12-04
6,Celtics,1631169,Josh Minott,26,7.885,7.1,,,4.921,4.885,4.1,,2.875,1.154,1.2,,0.856,2025-12-04
7,Celtics,1630568,Luka Garza,25,6.04,4.7,,,3.592,3.56,2.3,,1.723,0.64,0.4,,0.351,2025-12-04
8,Celtics,1631248,Baylor Scheierman,101,2.208,4.5,0.0,2.0,3.55,1.307,1.9,0.0,1.713,0.594,1.1,0.5,0.826,2025-12-04
9,Celtics,1641775,Jordan Walsh,163,1.613,8.2,0.0,3.0,2.406,1.239,5.6,0.0,1.993,0.344,1.4,0.0,0.519,2025-12-04
