In [None]:
# ==========================================
# CELL 1 – AUTO-DETECT PROJECT ROOT & IMPORTS
# ==========================================
import sys
from pathlib import Path

import numpy as np
import pandas as pd

# Try to auto-detect the project root by walking up from the current working dir
here = Path.cwd().resolve()
print("Current working directory:", here)

PROJECT_ROOT = None
for p in [here, *here.parents]:
    # We treat any directory that contains a "qepc" folder as the project root
    if (p / "qepc").is_dir():
        PROJECT_ROOT = p
        break

if PROJECT_ROOT is None:
    raise FileNotFoundError(
        f"Could not find a 'qepc' package above this notebook.\n"
        f"Started search from: {here}\n"
        "Make sure this notebook is saved somewhere inside your qepc_project folder."
    )

print("Detected PROJECT_ROOT:", PROJECT_ROOT)

# Ensure Python can import the qepc package
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
    print("Added PROJECT_ROOT to sys.path")

# Now these imports should work
from qepc.nba.eoin_data_source import (
    load_eoin_games,
    load_eoin_player_boxes,
)
from qepc.nba.player_usage_eoin import build_player_usage_from_eoin
from qepc.nba.matchups_eoin import build_matchups_for_date


In [None]:
# ==========================================
# CELL 2 – LOAD EOIN DATA & BUILD USAGE
# ==========================================

games_qepc = load_eoin_games()
player_boxes_qepc = load_eoin_player_boxes()

print("games_qepc shape:", games_qepc.shape)
print("player_boxes_qepc shape:", player_boxes_qepc.shape)

# Build long-run usage from Eoin player boxes
# (this should give you player_id, player_name, team_name, games_played,
#  avg_points, avg_rebounds, avg_assists, mean_points_share, etc.)
usage_df = build_player_usage_from_eoin(player_boxes_qepc)

print("usage_df shape:", usage_df.shape)
display(usage_df.head())


In [None]:
# ==========================================
# CELL 3 – CHOOSE DATE & BUILD MATCHUPS
# ==========================================
import datetime as dt

# Pick a date that you know has games in Eoin (e.g. 2025-12-05)
TARGET_DATE = "2025-12-05"

matchups = build_matchups_for_date(TARGET_DATE)

print(f"Matchups for {TARGET_DATE}: {len(matchups)} games")
cols = [
    "game_id",
    "game_date",
    "home_team_name",
    "away_team_name",
    "exp_home_pts_raw",
    "exp_away_pts_raw",
    "exp_home_pts",
    "exp_away_pts",
]
display(matchups[cols])


In [None]:
# ==========================================
# CELL 4 – PER-PLAYER EXPECTED POINTS FOR EACH GAME
# ==========================================

def build_team_player_props_for_game(game_row, usage_df):
    """
    For a single game_row from matchups:
    - get the home/away team names,
    - pull usage for that team,
    - multiply team λpoints by mean_points_share to get player λpoints.
    Returns two DataFrames: home_players, away_players.
    """
    home_name = game_row["home_team_name"]
    away_name = game_row["away_team_name"]

    # Team λs from calibrated expectations
    home_lambda = game_row["exp_home_pts"]
    away_lambda = game_row["exp_away_pts"]

    # Slice usage for each team
    home_usage = usage_df[usage_df["team_name"] == home_name].copy()
    away_usage = usage_df[usage_df["team_name"] == away_name].copy()

    # If your usage_df uses team_id instead, switch the key accordingly.
    # For now, we assume team_name matches.

    # Expected points = team λ * share
    if "mean_points_share" not in home_usage.columns or "mean_points_share" not in away_usage.columns:
        raise ValueError("usage_df is missing 'mean_points_share' column. Check player_usage_eoin.py.")

    home_usage["exp_points"] = home_lambda * home_usage["mean_points_share"]
    away_usage["exp_points"] = away_lambda * away_usage["mean_points_share"]

    # Sort by expected points descending
    home_usage = home_usage.sort_values("exp_points", ascending=False)
    away_usage = away_usage.sort_values("exp_points", ascending=False)

    return home_usage, away_usage


all_rows = []

for _, g in matchups.iterrows():
    home_players, away_players = build_team_player_props_for_game(g, usage_df)

    # Keep top N per team for readability
    top_n = 10

    home_top = home_players.head(top_n).copy()
    home_top["game_id"] = g["game_id"]
    home_top["team_side"] = "home"

    away_top = away_players.head(top_n).copy()
    away_top["game_id"] = g["game_id"]
    away_top["team_side"] = "away"

    all_rows.append(home_top)
    all_rows.append(away_top)

player_props_today = pd.concat(all_rows, ignore_index=True)

print("Total player rows (top N per team):", len(player_props_today))

# Show a compact summary
cols = [
    "game_id",
    "team_side",
    "team_name",
    "player_id",
    "player_name",
    "games_played",
    "avg_points",
    "mean_points_share",
    "exp_points",
]
display(player_props_today[cols].head(30))


In [None]:
# ==========================================
# CELL 5 – SIMPLE POISSON OVER/UNDER PROBS
# ==========================================
from math import exp, factorial

def poisson_cdf(k, lam):
    """P(X ≤ k) for X ~ Poisson(lam)."""
    # naive sum; for practical use you might swap to scipy, but this is fine here
    return sum((lam**i / factorial(i)) * exp(-lam) for i in range(0, k + 1))

def prob_over(line, lam):
    """P(X > line) under Poisson(lam) with half-point-ish lines."""
    # If line is e.g. 23.5, use ceil
    threshold = int(np.floor(line))
    return 1.0 - poisson_cdf(threshold, lam)


# Example: compute over probabilities for a few top players
example = player_props_today.head(15).copy()
example["line_points"] = example["exp_points"].round(1) + 1.5  # pretend book hangs ~exp+1.5
example["prob_over"] = example.apply(
    lambda r: prob_over(r["line_points"], r["exp_points"]),
    axis=1,
)

display(
    example[
        ["game_id", "team_name", "player_name",
         "exp_points", "line_points", "prob_over"]
    ]
)
