In [1]:
# =========================
# CELL 1 – QEPC PATH SETUP
# =========================

import sys
from pathlib import Path
import pandas as pd  # <--- add this

# Point to your local QEPC project root
PROJECT_ROOT = Path(r"C:\Users\wdors\qepc_project")

# Make sure the project root is on sys.path so `import qepc...` works
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)

DATA_DIR = PROJECT_ROOT / "data"
CACHE_DIR = PROJECT_ROOT / "cache"

print("DATA_DIR:", DATA_DIR)
print("CACHE_DIR:", CACHE_DIR)


PROJECT_ROOT: C:\Users\wdors\qepc_project
DATA_DIR: C:\Users\wdors\qepc_project\data
CACHE_DIR: C:\Users\wdors\qepc_project\cache


In [2]:
# ==========================================
# CELL 2 – LOAD EOIN (QEPC-READY) DATA
# ==========================================

from qepc.nba.eoin_data_source import (
    load_eoin_games,
    load_eoin_player_boxes,
    load_eoin_team_boxes,
    print_eoin_summary,
)

games_qepc = load_eoin_games()
player_boxes_qepc = load_eoin_player_boxes()
team_boxes_qepc = load_eoin_team_boxes()

print("games_qepc.shape:", games_qepc.shape)
print("player_boxes_qepc.shape:", player_boxes_qepc.shape)
print("team_boxes_qepc.shape:", team_boxes_qepc.shape)

# Quick peek at the first few rows
print("\nGames sample:")
display(games_qepc.head())

print("\nPlayer boxes sample:")
display(player_boxes_qepc.head())

print("\nTeam boxes sample:")
display(team_boxes_qepc.head())

# Optional summary helper
print_eoin_summary(games_qepc, player_boxes_qepc, team_boxes_qepc)


games_qepc.shape: (72290, 19)
player_boxes_qepc.shape: (1638878, 36)
team_boxes_qepc.shape: (144580, 49)

Games sample:


Unnamed: 0,game_id,game_datetime,home_team_city,home_team_name,home_team_id,away_team_city,away_team_name,away_team_id,home_score,away_score,winner_team_id,gametype,attendance,arenaid,gamelabel,gamesublabel,seriesgamenumber,game_date,is_final
0,22500349,2025-12-05 16:30:00+00:00,Oklahoma City,Thunder,1610612760,Dallas,Mavericks,1610612742,132,111,1610612760,,18203.0,,,,,2025-12-05,True
1,22500345,2025-12-05 15:00:00+00:00,Chicago,Bulls,1610612741,Indiana,Pacers,1610612754,105,120,1610612754,,20471.0,,,,,2025-12-05,True
2,22500347,2025-12-05 15:00:00+00:00,Memphis,Grizzlies,1610612763,LA,Clippers,1610612746,107,98,1610612763,,15052.0,,,,,2025-12-05,True
3,22500348,2025-12-05 15:00:00+00:00,Milwaukee,Bucks,1610612749,Philadelphia,76ers,1610612755,101,116,1610612755,,17341.0,,,,,2025-12-05,True
4,22500346,2025-12-05 15:00:00+00:00,Houston,Rockets,1610612745,Phoenix,Suns,1610612756,117,98,1610612745,,18055.0,,,,,2025-12-05,True



Player boxes sample:


Unnamed: 0,firstname,lastname,player_id,game_id,game_datetime,team_city,team_name,opp_team_city,opp_team_name,gametype,...,freethrowsattempted,freethrowsmade,freethrowspercentage,reboundsdefensive,reboundsoffensive,reboundstotal,foulspersonal,turnovers,plusminuspoints,game_date
0,D'Angelo,Russell,1626156,22500349,2025-12-05 16:30:00+00:00,Dallas,Mavericks,Oklahoma City,Thunder,,...,0.0,0.0,0.0,3.0,0.0,3.0,2.0,1.0,-3.0,2025-12-05
1,Shai,Gilgeous-Alexander,1628983,22500349,2025-12-05 16:30:00+00:00,Oklahoma City,Thunder,Dallas,Mavericks,,...,12.0,11.0,0.917,5.0,0.0,5.0,4.0,2.0,24.0,2025-12-05
2,Caleb,Martin,1628997,22500349,2025-12-05 16:30:00+00:00,Dallas,Mavericks,Oklahoma City,Thunder,,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-2.0,2025-12-05
3,Kenrich,Williams,1629026,22500349,2025-12-05 16:30:00+00:00,Oklahoma City,Thunder,Dallas,Mavericks,,...,2.0,2.0,1.0,0.0,1.0,1.0,0.0,0.0,-3.0,2025-12-05
4,Naji,Marshall,1630230,22500349,2025-12-05 16:30:00+00:00,Dallas,Mavericks,Oklahoma City,Thunder,,...,4.0,2.0,0.5,1.0,0.0,1.0,3.0,0.0,-24.0,2025-12-05



Team boxes sample:


Unnamed: 0,game_id,game_datetime,team_city,team_name,team_id,opp_team_city,opp_team_name,opp_team_id,home,win,...,pointsfastbreak,pointsfromturnovers,pointsinthepaint,pointssecondchance,timestied,timeoutsremaining,seasonwins,seasonlosses,coachid,game_date
0,22500349,2025-12-05 16:30:00+00:00,Dallas,Mavericks,1610612742,Oklahoma City,Thunder,1610612760,0,0,...,12.0,8.0,38.0,8.0,8.0,1.0,8.0,16.0,,2025-12-05
1,22500349,2025-12-05 16:30:00+00:00,Oklahoma City,Thunder,1610612760,Dallas,Mavericks,1610612742,1,1,...,10.0,18.0,62.0,9.0,8.0,1.0,22.0,1.0,,2025-12-05
2,22500345,2025-12-05 15:00:00+00:00,Chicago,Bulls,1610612741,Indiana,Pacers,1610612754,1,0,...,8.0,13.0,36.0,18.0,4.0,0.0,9.0,13.0,,2025-12-05
3,22500346,2025-12-05 15:00:00+00:00,Houston,Rockets,1610612745,Phoenix,Suns,1610612756,1,1,...,16.0,21.0,66.0,8.0,7.0,1.0,15.0,5.0,,2025-12-05
4,22500347,2025-12-05 15:00:00+00:00,LA,Clippers,1610612746,Memphis,Grizzlies,1610612763,0,0,...,13.0,23.0,36.0,9.0,9.0,1.0,6.0,17.0,,2025-12-05


=== Eoin / QEPC Data Summary ===
Games:            72290 rows, 19 columns
  game_datetime: 1946-11-26 19:00:00+00:00  →  2025-12-05 16:30:00+00:00
Player boxes:   1638878 rows, 36 columns
  game_datetime: 1946-11-26 19:00:00+00:00  →  2025-12-05 16:30:00+00:00
Team boxes:      144580 rows, 49 columns
  game_datetime: 1946-11-26 19:00:00+00:00  →  2025-12-05 16:30:00+00:00
Max season record seen in team_boxes: 68.0–65.0 (approx)


In [3]:
# ==========================================
# CELL 3 – TEAM STATS & TEAM STRENGTHS
# ==========================================

from qepc.nba.eoin_team_stats import (
    build_team_stats_from_eoin,
    save_team_stats_to_cache,
)
from qepc.nba.team_strengths_eoin import (
    calculate_advanced_strengths_from_eoin,
    save_advanced_strengths_to_cache,
)

# Build team_stats from Eoin team boxes
team_stats = build_team_stats_from_eoin(team_boxes_qepc)

print("team_stats.shape:", team_stats.shape)
print("\nteam_stats sample:")
display(team_stats.head())

# Save to cache (parquet)
team_stats_path = save_team_stats_to_cache(team_stats)
print("\nSaved team_stats to:", team_stats_path)

# Build strengths from team_stats
team_strengths = calculate_advanced_strengths_from_eoin(team_stats, verbose=True)

print("\nteam_strengths.shape:", team_strengths.shape)
print("\nteam_strengths (top 10 by strength_score):")
display(
    team_strengths[
        [
            "team_id",
            "games_played",
            "win_pct",
            "off_ppg",
            "def_ppg",
            "pts_diff_per_game",
            "strength_score",
            "strength_rank",
        ]
    ].head(10)
)

# Save strengths to cache
team_strengths_path = save_advanced_strengths_to_cache(team_strengths)
print("\nSaved team_strengths to:", team_strengths_path)


team_stats.shape: (34, 14)

team_stats sample:


Unnamed: 0,team_id,games_played,wins,losses,win_pct,pts_for,pts_against,pts_diff,off_ppg,def_ppg,reb_total,reb_pg,ast_total,ast_pg
0,15016,1,0,1,0.0,97,107,-10,97.0,107.0,48.0,48.0,23.0,23.0
1,15018,3,0,3,0.0,257,395,-138,85.666667,131.666667,80.0,26.666667,49.0,16.333333
2,50013,1,0,1,0.0,92,127,-35,92.0,127.0,42.0,42.0,24.0,24.0
3,50014,1,0,1,0.0,88,123,-35,88.0,123.0,38.0,38.0,18.0,18.0
4,1610612737,6462,3163,3299,0.489477,668300,671894,-3594,103.419994,103.976168,174313.0,26.975085,87465.0,13.535283


Saved team_stats to: C:\Users\wdors\qepc_project\cache\imports\eoin_team_stats.parquet

Saved team_stats to: C:\Users\wdors\qepc_project\cache\imports\eoin_team_stats.parquet
Built advanced strengths from Eoin team_stats:
      team_id  games_played   win_pct     off_ppg     def_ppg  \
0  1610612738          6883  0.596833  106.253523  103.205434   
1  1610612747          6898  0.585242  106.762540  104.581328   
2  1610612759          4492  0.585485  104.987311  102.372663   
3  1610612756          5074  0.532913  107.760347  106.615491   
4  1610612760          5192  0.539676  105.905817  104.664291   
5  1610612749          5075  0.521773  105.209064  104.075862   
6  1610612743          4360  0.505963  108.001376  108.068349   
7  1610612745          5160  0.518411  105.497093  104.875000   
8  1610612762          4552  0.527900  103.588752  102.658172   
9  1610612757          4865  0.515313  105.058582  104.502980   

   pts_diff_per_game  strength_score  strength_rank  
0       

Unnamed: 0,team_id,games_played,win_pct,off_ppg,def_ppg,pts_diff_per_game,strength_score,strength_rank
0,1610612738,6883,0.596833,106.253523,103.205434,3.048089,0.779025,1
1,1610612747,6898,0.585242,106.76254,104.581328,2.181212,0.728369,2
2,1610612759,4492,0.585485,104.987311,102.372663,2.614648,0.700651,3
3,1610612756,5074,0.532913,107.760347,106.615491,1.144856,0.584849,4
4,1610612760,5192,0.539676,105.905817,104.664291,1.241525,0.556911,5
5,1610612749,5075,0.521773,105.209064,104.075862,1.133202,0.490957,6
6,1610612743,4360,0.505963,108.001376,108.068349,-0.066972,0.475774,7
7,1610612745,5160,0.518411,105.497093,104.875,0.622093,0.469176,8
8,1610612762,4552,0.5279,103.588752,102.658172,0.93058,0.455196,9
9,1610612757,4865,0.515313,105.058582,104.50298,0.555601,0.447508,10


Saved advanced strengths to: C:\Users\wdors\qepc_project\cache\imports\eoin_team_strengths.parquet

Saved team_strengths to: C:\Users\wdors\qepc_project\cache\imports\eoin_team_strengths.parquet


In [4]:
# ==========================================================
# CELL 4 – MATCHUPS FOR A DATE + TEAM PTS/REB/AST EXPECTED
# ==========================================================

from qepc.nba.matchups_eoin import build_matchups_for_date, MatchupConfig
from qepc.nba.team_scoring_eoin import attach_scoring_predictions, ScoringConfig

# Choose a date you know has games in Games.csv
TARGET_DATE = "2025-12-05"

# Include final games for sanity/backtests
matchup_config = MatchupConfig(include_final_games=True)
matchups = build_matchups_for_date(TARGET_DATE, config=matchup_config)

print(f"Matchups on {TARGET_DATE}: {len(matchups)} games")
display(
    matchups[
        [
            "game_id",
            "game_date",
            "game_datetime",
            "home_team_name",
            "away_team_name",
            "home_team_id",
            "away_team_id",
            "home_strength_score",
            "away_strength_score",
        ]
    ].head(20)
)

# Attach expected points / rebounds / assists for each team
scoring_config = ScoringConfig(
    home_court_advantage=1.5,
    weight_off_edge=0.7,
    weight_def_edge=0.7,
    # REB/AST weights can stay defaults for now
)

matchups_with_scores = attach_scoring_predictions(matchups, config=scoring_config)

print("\nColumns with expected stats (exp_*):")
print([c for c in matchups_with_scores.columns if c.startswith("exp_")])

print("\nMatchups with team expectations sample:")
display(
    matchups_with_scores[
        [
            "game_id",
            "game_date",
            "home_team_name",
            "away_team_name",
            "exp_home_pts",
            "exp_away_pts",
            "exp_home_reb",
            "exp_away_reb",
            "exp_home_ast",
            "exp_away_ast",
        ]
    ].head(20)
)


Matchups on 2025-12-05: 12 games


Unnamed: 0,game_id,game_date,game_datetime,home_team_name,away_team_name,home_team_id,away_team_id,home_strength_score,away_strength_score
0,22500339,2025-12-05,2025-12-05 14:00:00+00:00,Magic,Heat,1610612753,1610612748,0.196416,0.329388
1,22500338,2025-12-05,2025-12-05 14:00:00+00:00,Celtics,Lakers,1610612738,1610612747,0.779025,0.728369
2,22500341,2025-12-05,2025-12-05 14:30:00+00:00,Cavaliers,Spurs,1610612739,1610612759,0.204271,0.700651
3,22500343,2025-12-05,2025-12-05 14:30:00+00:00,Knicks,Jazz,1610612752,1610612762,0.274537,0.455196
4,22500340,2025-12-05,2025-12-05 14:30:00+00:00,Hawks,Nuggets,1610612737,1610612743,0.296308,0.475774
5,22500344,2025-12-05,2025-12-05 14:30:00+00:00,Raptors,Hornets,1610612761,1610612766,0.229671,-0.02824
6,22500342,2025-12-05,2025-12-05 14:30:00+00:00,Pistons,Trail Blazers,1610612765,1610612757,0.220679,0.447508
7,22500345,2025-12-05,2025-12-05 15:00:00+00:00,Bulls,Pacers,1610612741,1610612754,0.366119,0.352127
8,22500347,2025-12-05,2025-12-05 15:00:00+00:00,Grizzlies,Clippers,1610612763,1610612746,0.025832,0.075263
9,22500348,2025-12-05,2025-12-05 15:00:00+00:00,Bucks,76ers,1610612749,1610612755,0.490957,0.428929



Columns with expected stats (exp_*):
['exp_home_pts', 'exp_away_pts', 'exp_home_reb', 'exp_away_reb', 'exp_home_ast', 'exp_away_ast']

Matchups with team expectations sample:


Unnamed: 0,game_id,game_date,home_team_name,away_team_name,exp_home_pts,exp_away_pts,exp_home_reb,exp_away_reb,exp_home_ast,exp_away_ast
0,22500339,2025-12-05,Magic,Heat,109.787019,105.268142,39.096112,38.346505,20.60031,20.210821
1,22500338,2025-12-05,Celtics,Lakers,109.492322,109.31176,30.747403,30.777525,16.762972,16.642307
2,22500341,2025-12-05,Cavaliers,Spurs,107.780371,108.673197,34.671308,36.623736,18.802474,19.879764
3,22500343,2025-12-05,Knicks,Jazz,108.316113,107.209889,30.489426,35.8418,16.26646,19.840483
4,22500340,2025-12-05,Hawks,Nuggets,105.067937,109.639431,31.213371,36.878237,16.400656,20.129115
5,22500344,2025-12-05,Raptors,Hornets,107.137849,105.839621,38.718128,38.385503,20.798303,20.977721
6,22500342,2025-12-05,Pistons,Trail Blazers,106.871364,108.088979,30.788791,34.705993,16.353421,18.383682
7,22500345,2025-12-05,Bulls,Pacers,107.969677,107.579364,33.905362,36.479685,18.446368,19.728952
8,22500347,2025-12-05,Grizzlies,Clippers,103.971856,108.172461,38.833376,34.232865,20.705656,18.339439
9,22500348,2025-12-05,Bucks,76ers,108.772511,107.356289,33.960902,30.429492,18.596561,16.165813


In [5]:
# ===============================================
# CELL 5 – PLAYER USAGE (PTS / REB / AST SHARES)
# ===============================================

from qepc.nba.player_usage_eoin import build_player_usage_from_eoin

# Build per-player usage table from recent games
player_usage = build_player_usage_from_eoin(
    player_boxes=player_boxes_qepc,
    min_games=5,          # you can crank this up later
    cutoff_date="2024-10-01",  # keep it recent
)

print("player_usage.shape:", player_usage.shape)

print("\nplayer_usage sample:")
display(
    player_usage[
        [
            "player_id",
            "player_name",
            "team_name",
            "games_played",
            "avg_points",
            "avg_rebounds",
            "avg_assists",
            "mean_points_share",
            "mean_rebounds_share",
            "mean_assists_share",
        ]
    ].head(20)
)


player_usage.shape: (894, 13)

player_usage sample:


Unnamed: 0,player_id,player_name,team_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share
0,2544,LeBron James,Lakers,90,21.455556,6.922222,7.222222,0.189008,0.165061,0.279046
1,101108,Chris Paul,Clippers,24,3.0,1.541667,2.875,0.025567,0.036275,0.113587
2,101108,Chris Paul,Spurs,84,8.630952,3.47619,7.214286,0.075845,0.07753,0.253688
3,200768,Kyle Lowry,76ers,67,2.298507,1.134328,1.626866,0.020919,0.02813,0.071155
4,200782,P.J. Tucker,Knicks,35,0.257143,0.228571,0.0,0.002326,0.006471,0.0
5,201142,Kevin Durant,Rockets,21,23.238095,4.571429,3.52381,0.18856,0.093963,0.141759
6,201142,Kevin Durant,Suns,67,25.537313,5.850746,4.134328,0.224922,0.136017,0.143967
7,201143,Al Horford,Celtics,87,7.218391,5.022989,1.712644,0.062607,0.112724,0.07097
8,201143,Al Horford,Warriors,17,5.529412,3.941176,2.0,0.047413,0.09454,0.069068
9,201144,Mike Conley,Timberwolves,117,6.940171,2.34188,3.880342,0.060359,0.054267,0.148645


In [6]:
# ============================================================
# CELL 6 – PLAYER PTS/REB/AST EXPECTATIONS FOR ONE MATCHUP
# ============================================================

import numpy as np

# Try to find a specific matchup (Celtics vs Lakers) on that date
mask_bos_lal = (
    (matchups_with_scores["home_team_name"] == "Celtics")
    & (matchups_with_scores["away_team_name"] == "Lakers")
)

if matchups_with_scores[mask_bos_lal].empty:
    # Fallback: just take the first game on the slate
    example_game = matchups_with_scores.iloc[0]
    print("Could not find Celtics-Lakers; using first game on slate instead.")
else:
    example_game = matchups_with_scores[mask_bos_lal].iloc[0]
    print("Using Celtics-Lakers game.")

display(
    example_game[
        [
            "game_id",
            "game_date",
            "home_team_name",
            "away_team_name",
            "exp_home_pts",
            "exp_away_pts",
            "exp_home_reb",
            "exp_away_reb",
            "exp_home_ast",
            "exp_away_ast",
        ]
    ].to_frame().T
)

home_team = example_game["home_team_name"]
away_team = example_game["away_team_name"]

home_pts_lambda = float(example_game["exp_home_pts"])
away_pts_lambda = float(example_game["exp_away_pts"])

home_reb_lambda = example_game.get("exp_home_reb", np.nan)
away_reb_lambda = example_game.get("exp_away_reb", np.nan)

home_ast_lambda = example_game.get("exp_home_ast", np.nan)
away_ast_lambda = example_game.get("exp_away_ast", np.nan)

print("\nTeam-level expectations:")
print(f"Home {home_team}: λ_pts={home_pts_lambda:.2f}, λ_reb={home_reb_lambda}, λ_ast={home_ast_lambda}")
print(f"Away {away_team}: λ_pts={away_pts_lambda:.2f}, λ_reb={away_reb_lambda}, λ_ast={away_ast_lambda}")


Using Celtics-Lakers game.


Unnamed: 0,game_id,game_date,home_team_name,away_team_name,exp_home_pts,exp_away_pts,exp_home_reb,exp_away_reb,exp_home_ast,exp_away_ast
1,22500338,2025-12-05,Celtics,Lakers,109.492322,109.31176,30.747403,30.777525,16.762972,16.642307



Team-level expectations:
Home Celtics: λ_pts=109.49, λ_reb=30.74740341274342, λ_ast=16.7629721092934
Away Lakers: λ_pts=109.31, λ_reb=30.777524661885195, λ_ast=16.642306790649137


In [7]:
# =====================================================
# CELL 7 – HOME TEAM PLAYER PTS/REB/AST PROJECTIONS
# =====================================================

home_usage = player_usage[player_usage["team_name"] == home_team].copy()

# Expected points
home_usage["exp_points"] = home_pts_lambda * home_usage["mean_points_share"]

# Expected rebounds (if we have both the team λ and share)
if not pd.isna(home_reb_lambda) and "mean_rebounds_share" in home_usage.columns:
    home_usage["exp_rebounds"] = home_reb_lambda * home_usage["mean_rebounds_share"]

# Expected assists
if not pd.isna(home_ast_lambda) and "mean_assists_share" in home_usage.columns:
    home_usage["exp_assists"] = home_ast_lambda * home_usage["mean_assists_share"]

# Sort by expected points descending for now
home_usage = home_usage.sort_values("exp_points", ascending=False)

cols = [
    "player_name",
    "games_played",
    "avg_points",
    "avg_rebounds",
    "avg_assists",
    "mean_points_share",
    "mean_rebounds_share",
    "mean_assists_share",
    "exp_points",
    "exp_rebounds",
    "exp_assists",
]

available_cols = [c for c in cols if c in home_usage.columns]

print(f"Home team projections – {home_team}")
display(home_usage[available_cols].head(15))


Home team projections – Celtics


Unnamed: 0,player_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share,exp_points,exp_rebounds,exp_assists
191,Jayson Tatum,96,23.1875,7.708333,5.15625,0.200395,0.17212,0.201956,21.941727,5.292236,3.385375
170,Jaylen Brown,113,21.079646,5.309735,4.00885,0.182275,0.119534,0.161281,19.957768,3.675345,2.703556
210,Derrick White,120,16.208333,4.3,4.575,0.139558,0.095634,0.182076,15.280535,2.940494,3.052128
396,Payton Pritchard,124,14.459677,3.701613,3.66129,0.124908,0.081659,0.141345,13.676432,2.510795,2.36936
271,Anfernee Simons,27,13.407407,2.185185,2.444444,0.115115,0.047873,0.099706,12.604215,1.471976,1.671361
121,Kristaps Porzingis,69,13.086957,4.855072,1.376812,0.11458,0.106081,0.052068,12.545609,3.261722,0.872811
25,Jrue Holiday,86,9.093023,3.534884,3.174419,0.07859,0.078907,0.127234,8.605042,2.426182,2.132814
588,Josh Minott,27,7.814815,4.814815,1.222222,0.064466,0.106243,0.048111,7.058546,3.266704,0.806486
484,Sam Hauser,117,7.367521,2.888889,0.940171,0.063183,0.064821,0.036924,6.918076,1.993066,0.618959
7,Al Horford,87,7.218391,5.022989,1.712644,0.062607,0.112724,0.07097,6.854967,3.465971,1.189673


In [11]:
# =====================================================
# CELL 8 – AWAY TEAM PLAYER PTS/REB/AST PROJECTIONS
# =====================================================

away_usage = player_usage[player_usage["team_name"] == away_team].copy()

away_usage["exp_points"] = away_pts_lambda * away_usage["mean_points_share"]

if not pd.isna(away_reb_lambda) and "mean_rebounds_share" in away_usage.columns:
    away_usage["exp_rebounds"] = away_reb_lambda * away_usage["mean_rebounds_share"]

if not pd.isna(away_ast_lambda) and "mean_assists_share" in away_usage.columns:
    away_usage["exp_assists"] = away_ast_lambda * away_usage["mean_assists_share"]

away_usage = away_usage.sort_values("exp_points", ascending=False)

home_usage["exp_PRA"] = (
    home_usage["exp_points"].fillna(0)
    + home_usage["exp_rebounds"].fillna(0)
    + home_usage["exp_assists"].fillna(0)
)

cols = [
    "player_name",
    "games_played",
    "avg_points",
    "avg_rebounds",
    "avg_assists",
    "mean_points_share",
    "mean_rebounds_share",
    "mean_assists_share",
    "exp_points",
    "exp_rebounds",
    "exp_assists",
]

available_cols = [c for c in cols if c in away_usage.columns]


print(f"Away team projections – {away_team}")
display(away_usage[available_cols].head(15))


Away team projections – Lakers


Unnamed: 0,player_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share,exp_points,exp_rebounds,exp_assists
283,Luka Doncic,53,28.773585,7.45283,7.056604,0.250301,0.179631,0.287453,27.360838,5.528602,4.78388
49,Anthony Davis,50,23.36,10.74,3.02,0.207878,0.254299,0.113617,22.723537,7.826685,1.890849
0,LeBron James,90,21.455556,6.922222,7.222222,0.189008,0.165061,0.279046,20.660836,5.080166,4.643964
473,Austin Reaves,107,20.607477,4.560748,5.551402,0.179289,0.108391,0.215548,19.598345,3.336005,3.587216
289,Rui Hachimura,99,12.545455,4.343434,1.131313,0.110473,0.103056,0.045036,12.076025,3.171809,0.749506
281,Deandre Ayton,27,13.0,8.074074,0.962963,0.109569,0.198454,0.038928,11.977226,6.107924,0.647845
129,D'Angelo Russell,36,11.333333,2.527778,4.472222,0.10356,0.059579,0.172633,11.320344,1.833705,2.87301
874,R.J. Davis,6,7.166667,1.166667,0.166667,0.075758,0.027682,0.006667,8.281227,0.851971,0.110949
757,Dalton Knecht,117,8.213675,2.512821,0.837607,0.074113,0.059304,0.033095,8.101462,1.825225,0.550769
555,Max Christie,53,8.075472,2.660377,1.320755,0.072572,0.062786,0.051116,7.932927,1.9324,0.850681


In [12]:
# ==========================================
# CELL 9 – POISSON PROP SIMULATION HELPERS
# ==========================================

import numpy as np
import math
from typing import Optional, Dict, Any


def simulate_poisson_prop(
    lam: float,
    line: float,
    n_sims: int = 100_000,
    random_state: Optional[int] = None,
) -> Dict[str, Any]:
    """
    Simulate a single stat (points, rebounds, assists, etc.) as Poisson(lam)
    and estimate probabilities of going over/under a betting line.

    - lam:      expected value (your QEPC λ)
    - line:     betting line (e.g. 22.5, 8.5, 6.0, etc.)
    - n_sims:   number of simulated games
    """
    if lam <= 0 or not np.isfinite(lam):
        raise ValueError(f"Invalid lambda for Poisson: {lam}")

    rng = np.random.default_rng(random_state)
    samples = rng.poisson(lam, size=n_sims)

    if float(line).is_integer():
        line_int = int(line)
        over_mask = samples > line_int
        push_mask = samples == line_int
        under_mask = samples < line_int
        prob_push = push_mask.mean()
    else:
        # For half-lines (e.g. 22.5), push prob is zero
        over_mask = samples > line
        under_mask = samples < line
        prob_push = 0.0

    prob_over = over_mask.mean()
    prob_under = under_mask.mean()

    return {
        "lambda": float(lam),
        "line": float(line),
        "prob_over": float(prob_over),
        "prob_under": float(prob_under),
        "prob_push": float(prob_push),
        "mean_sim": float(samples.mean()),
        "std_sim": float(samples.std(ddof=0)),
        "n_sims": int(n_sims),
    }



In [13]:
# =========================================================
# CELL 10 – RUN PROPS FOR ONE PLAYER (PTS / REB / AST)
# =========================================================

# Choose which side to pull the player from:
#   - home_usage (home team)
#   - away_usage (away team)
target_df = home_usage  # or use away_usage

# Pick a player name from that team (adjust as needed)
# You can inspect:
#   target_df["player_name"].unique()
PLAYER_NAME = target_df["player_name"].iloc[0]  # change this to whoever you want

print("Using player:", PLAYER_NAME)

player_row = target_df[target_df["player_name"] == PLAYER_NAME]
if player_row.empty:
    raise ValueError(f"Player {PLAYER_NAME} not found in target_df.")

player_row = player_row.iloc[0]

# QEPC λs from your usage + team expectations
lam_pts = float(player_row["exp_points"])
lam_reb = float(player_row.get("exp_rebounds", np.nan))
lam_ast = float(player_row.get("exp_assists", np.nan))

print(f"\nQEPC λ for {PLAYER_NAME}:")
print(f"  Points λ = {lam_pts:.2f}")
print(f"  Rebounds λ = {lam_reb:.2f}")
print(f"  Assists λ = {lam_ast:.2f}")

# --- Set example betting lines (edit these to match a real book) ---

line_pts = round(lam_pts) + 0.5      # e.g. around his projection
line_reb = round(lam_reb) + 0.5 if np.isfinite(lam_reb) else None
line_ast = round(lam_ast) + 0.5 if np.isfinite(lam_ast) else None

print(f"\nExample lines (edit these):")
print(f"  Points line:   {line_pts}")
print(f"  Rebounds line: {line_reb}")
print(f"  Assists line:  {line_ast}")

# --- Simulate props using Poisson model ---

results = {}

results["points"] = simulate_poisson_prop(lam_pts, line_pts, n_sims=100_000)

if line_reb is not None and np.isfinite(lam_reb):
    results["rebounds"] = simulate_poisson_prop(lam_reb, line_reb, n_sims=100_000)

if line_ast is not None and np.isfinite(lam_ast):
    results["assists"] = simulate_poisson_prop(lam_ast, line_ast, n_sims=100_000)

print(f"\nSimulation results for {PLAYER_NAME}:")

for stat, res in results.items():
    print(f"\n[{stat.upper()}] line {res['line']} vs λ={res['lambda']:.2f}")
    print(f"  P(Over)  ≈ {res['prob_over']*100:5.2f}%")
    print(f"  P(Under) ≈ {res['prob_under']*100:5.2f}%")
    if res["prob_push"] > 0:
        print(f"  P(Push)  ≈ {res['prob_push']*100:5.2f}%")
    print(f"  Sim mean ≈ {res['mean_sim']:.2f} (simulated)")
    print(f"  Sim std  ≈ {res['std_sim']:.2f}")


Using player: Jayson Tatum

QEPC λ for Jayson Tatum:
  Points λ = 21.94
  Rebounds λ = 5.29
  Assists λ = 3.39

Example lines (edit these):
  Points line:   22.5
  Rebounds line: 5.5
  Assists line:  3.5

Simulation results for Jayson Tatum:

[POINTS] line 22.5 vs λ=21.94
  P(Over)  ≈ 44.02%
  P(Under) ≈ 55.98%
  Sim mean ≈ 21.95 (simulated)
  Sim std  ≈ 4.69

[REBOUNDS] line 5.5 vs λ=5.29
  P(Over)  ≈ 43.53%
  P(Under) ≈ 56.47%
  Sim mean ≈ 5.30 (simulated)
  Sim std  ≈ 2.31

[ASSISTS] line 3.5 vs λ=3.39
  P(Over)  ≈ 43.73%
  P(Under) ≈ 56.27%
  Sim mean ≈ 3.38 (simulated)
  Sim std  ≈ 1.83


In [14]:
# =========================================================
# CELL 11 – BATCH PROPS FOR MULTIPLE PLAYERS (POINTS ONLY)
# =========================================================

# You can choose home_usage or away_usage here
batch_df = home_usage  # or away_usage

# Define a dict of {player_name: points_line}
# (Replace with real players / lines as needed)
points_lines = {}

# Example: auto-generate 5 lines near their QEPC projections
for _, row in batch_df.head(5).iterrows():
    name = row["player_name"]
    lam_pts = float(row["exp_points"])
    # Example line = round λ + 0.5
    points_lines[name] = round(lam_pts) + 0.5

print("Batch points lines (edit this dict as needed):")
for name, line in points_lines.items():
    print(f"  {name}: {line}")

batch_results = []

for name, line in points_lines.items():
    row = batch_df[batch_df["player_name"] == name].iloc[0]
    lam_pts = float(row["exp_points"])

    res = simulate_poisson_prop(lam_pts, line, n_sims=100_000)
    batch_results.append(
        {
            "player_name": name,
            "line_pts": res["line"],
            "lambda_pts": res["lambda"],
            "prob_over": res["prob_over"],
            "prob_under": res["prob_under"],
            "prob_push": res["prob_push"],
            "mean_sim": res["mean_sim"],
            "std_sim": res["std_sim"],
        }
    )

batch_results_df = pd.DataFrame(batch_results)
batch_results_df = batch_results_df.sort_values("prob_over", ascending=False)

print("\nBatch results (sorted by P(Over)):")
display(batch_results_df)


Batch points lines (edit this dict as needed):
  Jayson Tatum: 22.5
  Jaylen Brown: 20.5
  Derrick White: 15.5
  Payton Pritchard: 14.5
  Anfernee Simons: 13.5

Batch results (sorted by P(Over)):


Unnamed: 0,player_name,line_pts,lambda_pts,prob_over,prob_under,prob_push,mean_sim,std_sim
2,Derrick White,15.5,15.280535,0.45968,0.54032,0.0,15.25785,3.905353
0,Jayson Tatum,22.5,21.941727,0.43996,0.56004,0.0,21.95469,4.686226
1,Jaylen Brown,20.5,19.957768,0.43585,0.56415,0.0,19.95132,4.47177
3,Payton Pritchard,14.5,13.676432,0.39425,0.60575,0.0,13.68467,3.707818
4,Anfernee Simons,13.5,12.604215,0.38538,0.61462,0.0,12.62004,3.547719


In [15]:
# =======================================================
# CELL 12 – RECENCY-WEIGHTED PLAYER POINTS (DECOHERENCE)
# =======================================================

from qepc.quantum.decoherence import recency_weighted_groupby_mean

# We assume you still have player_boxes_qepc loaded from earlier cells
# and that it has: player_id, team_name, game_id, game_date, points.

needed_cols = ["player_id", "team_name", "game_id", "game_date", "points"]
missing = [c for c in needed_cols if c not in player_boxes_qepc.columns]
if missing:
    raise ValueError(f"player_boxes_qepc is missing {missing}")

# Let's compute a *recency-weighted* average of points
# per (player_id, team_name), with a coherence time tau of 30 days.

tau_points_days = 30.0

rw_points = recency_weighted_groupby_mean(
    df=player_boxes_qepc,
    date_col="game_date",
    group_cols=["player_id", "team_name"],
    value_cols=["points"],
    tau_days=tau_points_days,
    ref_date=None,        # default: use max(game_date) as "today"
    clip_days=120.0,      # beyond 4 months, everything gets tiny weight
    weight_col_name="w_pts",
)

rw_points = rw_points.rename(columns={"points": "rw_avg_points"})

print("Recency-weighted points per player/team (sample):")
display(rw_points.head(20))


Recency-weighted points per player/team (sample):


Unnamed: 0,player_id,team_name,total_weight,rw_avg_points
0,2,Grizzlies,1.465251,10.2375
1,2,Lakers,18.315639,15.003
2,2,Pacers,3.296815,9.588889
3,3,Celtics,0.989044,1.37037
4,3,Grizzlies,3.608181,5.15736
5,3,Hawks,4.267544,11.802575
6,3,Heat,8.773191,11.605428
7,3,Pistons,2.326086,3.874016
8,7,Bucks,4.047756,6.828054
9,7,Heat,0.622732,3.176471


In [16]:
# ==========================================================
# CELL 13 – ENTROPY OF A PLAYER'S POINTS DISTRIBUTION
# ==========================================================

import numpy as np
from qepc.quantum.entropy import sample_entropy

# We'll use the same λ you used in Cell 10 for PLAYER_NAME
lam_for_entropy = lam_pts  # from Cell 10

n_sims_entropy = 100_000
rng = np.random.default_rng(12345)

samples_pts = rng.poisson(lam_for_entropy, size=n_sims_entropy)

H_bits, pmf = sample_entropy(samples_pts, base=2.0, return_pmf=True)

print(f"Entropy for {PLAYER_NAME}'s points distribution:")
print(f"  H ≈ {H_bits:.3f} bits (0 = deterministic, higher = more spread)")

# Optional: show a quick table of outcomes near the peak
import pandas as pd

values = np.arange(len(pmf))
mask = pmf > 0
df_pmf = pd.DataFrame({"points": values[mask], "prob": pmf[mask]})
df_pmf = df_pmf[
    (df_pmf["points"] >= lam_for_entropy - 10)
    & (df_pmf["points"] <= lam_for_entropy + 10)
]

print("\nLocal view of the simulated PMF around λ:")
display(df_pmf.head(25))


Entropy for Jayson Tatum's points distribution:
  H ≈ 3.868 bits (0 = deterministic, higher = more spread)

Local view of the simulated PMF around λ:


Unnamed: 0,points,prob
2,3,0.00129
3,4,0.00353
4,5,0.00908
5,6,0.01912
6,7,0.03347
7,8,0.05345
8,9,0.07753
9,10,0.09186
10,11,0.10742
11,12,0.11086
