In [3]:
# ==========================================
# CELL 1 – AUTO-DETECT PROJECT ROOT & IMPORTS
# ==========================================
import sys
from pathlib import Path

import numpy as np
import pandas as pd

# Try to auto-detect the project root by walking up from the current working dir
here = Path.cwd().resolve()
print("Current working directory:", here)

PROJECT_ROOT = None
for p in [here, *here.parents]:
    # We treat any directory that contains a "qepc" folder as the project root
    if (p / "qepc").is_dir():
        PROJECT_ROOT = p
        break

if PROJECT_ROOT is None:
    raise FileNotFoundError(
        f"Could not find a 'qepc' package above this notebook.\n"
        f"Started search from: {here}\n"
        "Make sure this notebook is saved somewhere inside your qepc_project folder."
    )

print("Detected PROJECT_ROOT:", PROJECT_ROOT)

# Ensure Python can import the qepc package
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
    print("Added PROJECT_ROOT to sys.path")

# Now these imports should work
from qepc.nba.eoin_data_source import (
    load_eoin_games,
    load_eoin_player_boxes,
)
from qepc.nba.player_usage_eoin import build_player_usage_from_eoin
from qepc.nba.matchups_eoin import build_matchups_for_date


Current working directory: C:\Users\wdorsey\qepc_project\notebooks\nba
Detected PROJECT_ROOT: C:\Users\wdorsey\qepc_project


In [4]:
# ==========================================
# CELL 2 – LOAD EOIN DATA & BUILD USAGE
# ==========================================

games_qepc = load_eoin_games()
player_boxes_qepc = load_eoin_player_boxes()

print("games_qepc shape:", games_qepc.shape)
print("player_boxes_qepc shape:", player_boxes_qepc.shape)

# Build long-run usage from Eoin player boxes
# (this should give you player_id, player_name, team_name, games_played,
#  avg_points, avg_rebounds, avg_assists, mean_points_share, etc.)
usage_df = build_player_usage_from_eoin(player_boxes_qepc)

print("usage_df shape:", usage_df.shape)
display(usage_df.head())


games_qepc shape: (72290, 19)
player_boxes_qepc shape: (1638878, 36)
usage_df shape: (786, 13)


Unnamed: 0,player_id,team_name,games_played,firstname,lastname,avg_points,mean_points_share,avg_rebounds,mean_rebounds_share,avg_assists,mean_assists_share,avg_minutes,player_name
0,2544,Lakers,90,LeBron,James,14.424793,0.12085,4.608175,0.115957,6.94862,0.268296,,LeBron James
1,101108,Clippers,24,Chris,Paul,2.788335,0.024153,1.395722,0.033523,2.559663,0.103174,,Chris Paul
2,101108,Spurs,84,Chris,Paul,8.630952,0.075845,3.47619,0.07753,7.214286,0.253688,,Chris Paul
3,200768,76ers,67,Kyle,Lowry,0.522019,0.004542,0.184684,0.004504,0.37656,0.014868,,Kyle Lowry
4,200782,Knicks,35,P.J.,Tucker,0.257143,0.002326,0.228571,0.006471,0.0,0.0,,P.J. Tucker


In [5]:
# ==========================================
# CELL 3 – CHOOSE DATE & BUILD MATCHUPS
# ==========================================
import datetime as dt

# Pick a date that you know has games in Eoin (e.g. 2025-12-05)
TARGET_DATE = "2025-12-05"

matchups = build_matchups_for_date(TARGET_DATE)

print(f"Matchups for {TARGET_DATE}: {len(matchups)} games")
cols = [
    "game_id",
    "game_date",
    "home_team_name",
    "away_team_name",
    "exp_home_pts_raw",
    "exp_away_pts_raw",
    "exp_home_pts",
    "exp_away_pts",
]
display(matchups[cols])


Built advanced strengths from Eoin team_stats:
      team_id  games_played   win_pct     off_ppg     def_ppg  \
0  1610612738           337  0.718101  116.970326  108.451039   
1  1610612760           324  0.685185  118.302469  110.864198   
2  1610612743           334  0.646707  116.131737  112.422156   
3  1610612739           311  0.610932  114.736334  110.118971   
4  1610612752           328  0.603659  114.094512  110.524390   
5  1610612750           326  0.592025  114.230061  110.392638   
6  1610612749           304  0.578947  116.059211  114.473684   
7  1610612744           316  0.556962  115.844937  113.471519   
8  1610612746           302  0.549669  113.102649  111.228477   
9  1610612756           301  0.531561  114.445183  113.737542   

   pts_diff_per_game  strength_score  strength_rank  
0           8.519288        1.187087              1  
1           7.438272        1.075218              2  
2           3.709581        0.812597              3  
3           4.617363 

Unnamed: 0,game_id,game_date,home_team_name,away_team_name,exp_home_pts_raw,exp_away_pts_raw,exp_home_pts,exp_away_pts
0,22500338,2025-12-05,Celtics,Lakers,116.025916,109.709845,116.487654,106.153561
1,22500339,2025-12-05,Magic,Heat,111.090683,109.135204,107.031749,105.006002
2,22500340,2025-12-05,Hawks,Nuggets,116.594129,117.018411,117.576351,120.748766
3,22500341,2025-12-05,Cavaliers,Spurs,117.911721,111.134398,120.100858,108.998394
4,22500342,2025-12-05,Pistons,Trail Blazers,115.599478,113.05183,115.670601,112.827504
5,22500343,2025-12-05,Knicks,Jazz,118.440239,110.511318,121.113497,107.754102
6,22500344,2025-12-05,Raptors,Hornets,114.38435,110.97456,113.342415,108.679196
7,22500345,2025-12-05,Bulls,Pacers,117.707181,116.104904,119.708959,118.924493
8,22500346,2025-12-05,Rockets,Suns,115.325368,113.274675,115.145405,113.272525
9,22500347,2025-12-05,Grizzlies,Clippers,114.265401,113.340361,113.114509,113.403701


In [6]:
# ==========================================
# CELL 4 – PER-PLAYER EXPECTED POINTS FOR EACH GAME
# ==========================================

def build_team_player_props_for_game(game_row, usage_df):
    """
    For a single game_row from matchups:
    - get the home/away team names,
    - pull usage for that team,
    - multiply team λpoints by mean_points_share to get player λpoints.
    Returns two DataFrames: home_players, away_players.
    """
    home_name = game_row["home_team_name"]
    away_name = game_row["away_team_name"]

    # Team λs from calibrated expectations
    home_lambda = game_row["exp_home_pts"]
    away_lambda = game_row["exp_away_pts"]

    # Slice usage for each team
    home_usage = usage_df[usage_df["team_name"] == home_name].copy()
    away_usage = usage_df[usage_df["team_name"] == away_name].copy()

    # If your usage_df uses team_id instead, switch the key accordingly.
    # For now, we assume team_name matches.

    # Expected points = team λ * share
    if "mean_points_share" not in home_usage.columns or "mean_points_share" not in away_usage.columns:
        raise ValueError("usage_df is missing 'mean_points_share' column. Check player_usage_eoin.py.")

    home_usage["exp_points"] = home_lambda * home_usage["mean_points_share"]
    away_usage["exp_points"] = away_lambda * away_usage["mean_points_share"]

    # Sort by expected points descending
    home_usage = home_usage.sort_values("exp_points", ascending=False)
    away_usage = away_usage.sort_values("exp_points", ascending=False)

    return home_usage, away_usage


all_rows = []

for _, g in matchups.iterrows():
    home_players, away_players = build_team_player_props_for_game(g, usage_df)

    # Keep top N per team for readability
    top_n = 10

    home_top = home_players.head(top_n).copy()
    home_top["game_id"] = g["game_id"]
    home_top["team_side"] = "home"

    away_top = away_players.head(top_n).copy()
    away_top["game_id"] = g["game_id"]
    away_top["team_side"] = "away"

    all_rows.append(home_top)
    all_rows.append(away_top)

player_props_today = pd.concat(all_rows, ignore_index=True)

print("Total player rows (top N per team):", len(player_props_today))

# Show a compact summary
cols = [
    "game_id",
    "team_side",
    "team_name",
    "player_id",
    "player_name",
    "games_played",
    "avg_points",
    "mean_points_share",
    "exp_points",
]
display(player_props_today[cols].head(30))


Total player rows (top N per team): 240


Unnamed: 0,game_id,team_side,team_name,player_id,player_name,games_played,avg_points,mean_points_share,exp_points
0,22500338,home,Celtics,1627759,Jaylen Brown,113,27.80172,0.237364,27.650028
1,22500338,home,Celtics,1628369,Jayson Tatum,96,17.480676,0.151075,17.598326
2,22500338,home,Celtics,1628401,Derrick White,120,17.890551,0.148732,17.325407
3,22500338,home,Celtics,1630202,Payton Pritchard,124,17.028241,0.14283,16.637947
4,22500338,home,Celtics,204001,Kristaps Porzingis,69,13.086957,0.11458,13.347133
5,22500338,home,Celtics,1629014,Anfernee Simons,27,13.049378,0.110018,12.815705
6,22500338,home,Celtics,1629674,Neemias Queta,123,9.490047,0.079268,9.233797
7,22500338,home,Celtics,201950,Jrue Holiday,86,9.093023,0.07859,9.154808
8,22500338,home,Celtics,201143,Al Horford,87,7.218391,0.062607,7.292923
9,22500338,home,Celtics,1631169,Josh Minott,27,7.410514,0.059894,6.976911


In [7]:
# ==========================================
# CELL 5 – SIMPLE POISSON OVER/UNDER PROBS
# ==========================================
from math import exp, factorial

def poisson_cdf(k, lam):
    """P(X ≤ k) for X ~ Poisson(lam)."""
    # naive sum; for practical use you might swap to scipy, but this is fine here
    return sum((lam**i / factorial(i)) * exp(-lam) for i in range(0, k + 1))

def prob_over(line, lam):
    """P(X > line) under Poisson(lam) with half-point-ish lines."""
    # If line is e.g. 23.5, use ceil
    threshold = int(np.floor(line))
    return 1.0 - poisson_cdf(threshold, lam)


# Example: compute over probabilities for a few top players
example = player_props_today.head(15).copy()
example["line_points"] = example["exp_points"].round(1) + 1.5  # pretend book hangs ~exp+1.5
example["prob_over"] = example.apply(
    lambda r: prob_over(r["line_points"], r["exp_points"]),
    axis=1,
)

display(
    example[
        ["game_id", "team_name", "player_name",
         "exp_points", "line_points", "prob_over"]
    ]
)


Unnamed: 0,game_id,team_name,player_name,exp_points,line_points,prob_over
0,22500338,Celtics,Jaylen Brown,27.650028,29.2,0.352167
1,22500338,Celtics,Jayson Tatum,17.598326,19.1,0.313917
2,22500338,Celtics,Derrick White,17.325407,18.8,0.374885
3,22500338,Celtics,Payton Pritchard,16.637947,18.1,0.312511
4,22500338,Celtics,Kristaps Porzingis,13.347133,14.8,0.360724
5,22500338,Celtics,Anfernee Simons,12.815705,14.3,0.306196
6,22500338,Celtics,Neemias Queta,9.233797,10.7,0.322067
7,22500338,Celtics,Jrue Holiday,9.154808,10.7,0.312518
8,22500338,Celtics,Al Horford,7.292923,8.8,0.30982
9,22500338,Celtics,Josh Minott,6.976911,8.5,0.267903
