In [None]:
# =========================
# CELL 1 – QEPC PATH SETUP
# =========================

import sys
from pathlib import Path
import pandas as pd  # still fine to keep this here

# Try to auto-detect the project root by walking up from the current directory
NOTEBOOK_DIR = Path.cwd()

PROJECT_ROOT = None
for parent in [NOTEBOOK_DIR] + list(NOTEBOOK_DIR.parents):
    # We treat any directory that contains a "qepc" folder as the project root
    if (parent / "qepc").is_dir():
        PROJECT_ROOT = parent
        break

# Fallback: if auto-detect fails, use the old hard-coded path
if PROJECT_ROOT is None:
    PROJECT_ROOT = Path(r"C:\Users\wdors\qepc_project").resolve()

# Make sure the project root is on sys.path so `import qepc...` works
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("PROJECT_ROOT:", PROJECT_ROOT)
print("qepc package exists here?:", (PROJECT_ROOT / "qepc").is_dir())

DATA_DIR = PROJECT_ROOT / "data"
CACHE_DIR = PROJECT_ROOT / "cache"

print("DATA_DIR:", DATA_DIR)
print("CACHE_DIR:", CACHE_DIR)


In [None]:
# ==========================================
# CELL 2 – LOAD EOIN (QEPC-READY) DATA
# ==========================================

from qepc.nba.eoin_data_source import (
    load_eoin_games,
    load_eoin_player_boxes,
    load_eoin_team_boxes,
    print_eoin_summary,
)

games_qepc = load_eoin_games()
player_boxes_qepc = load_eoin_player_boxes()
team_boxes_qepc = load_eoin_team_boxes()

print("games_qepc.shape:", games_qepc.shape)
print("player_boxes_qepc.shape:", player_boxes_qepc.shape)
print("team_boxes_qepc.shape:", team_boxes_qepc.shape)

# Quick peek at the first few rows
print("\nGames sample:")
display(games_qepc.head())

print("\nPlayer boxes sample:")
display(player_boxes_qepc.head())

print("\nTeam boxes sample:")
display(team_boxes_qepc.head())

# Optional summary helper
print_eoin_summary(games_qepc, player_boxes_qepc, team_boxes_qepc)


In [None]:
# ==========================================
# CELL 3 – TEAM STATS & TEAM STRENGTHS
# ==========================================

from qepc.nba.eoin_team_stats import (
    build_team_stats_from_eoin,
    save_team_stats_to_cache,
)
from qepc.nba.team_strengths_eoin import (
    calculate_advanced_strengths_from_eoin,
    save_advanced_strengths_to_cache,
)

# Build team_stats from Eoin team boxes
team_stats = build_team_stats_from_eoin(team_boxes_qepc)

print("team_stats.shape:", team_stats.shape)
print("\nteam_stats sample:")
display(team_stats.head())

# Save to cache (parquet)
team_stats_path = save_team_stats_to_cache(team_stats)
print("\nSaved team_stats to:", team_stats_path)

# Build strengths from team_stats
team_strengths = calculate_advanced_strengths_from_eoin(team_stats, verbose=True)

print("\nteam_strengths.shape:", team_strengths.shape)
print("\nteam_strengths (top 10 by strength_score):")
display(
    team_strengths[
        [
            "team_id",
            "games_played",
            "win_pct",
            "off_ppg",
            "def_ppg",
            "pts_diff_per_game",
            "strength_score",
            "strength_rank",
        ]
    ].head(10)
)

# Save strengths to cache
team_strengths_path = save_advanced_strengths_to_cache(team_strengths)
print("\nSaved team_strengths to:", team_strengths_path)


In [None]:
# ==========================================================
# CELL 4 – MATCHUPS FOR A DATE + TEAM PTS/REB/AST EXPECTED
# ==========================================================

from qepc.nba.matchups_eoin import build_matchups_for_date, MatchupConfig
from qepc.nba.team_scoring_eoin import attach_scoring_predictions, ScoringConfig

# Choose a date you know has games in Games.csv
TARGET_DATE = "2025-12-05"

# Include final games for sanity/backtests
matchup_config = MatchupConfig(include_final_games=True)
matchups = build_matchups_for_date(TARGET_DATE, config=matchup_config)

print(f"Matchups on {TARGET_DATE}: {len(matchups)} games")
display(
    matchups[
        [
            "game_id",
            "game_date",
            "game_datetime",
            "home_team_name",
            "away_team_name",
            "home_team_id",
            "away_team_id",
            "home_strength_score",
            "away_strength_score",
        ]
    ].head(20)
)

# Attach expected points / rebounds / assists for each team
scoring_config = ScoringConfig(
    home_court_advantage=1.5,
    weight_off_edge=0.7,
    weight_def_edge=0.7,
    # REB/AST weights can stay defaults for now
)

matchups_with_scores = attach_scoring_predictions(matchups, config=scoring_config)

print("\nColumns with expected stats (exp_*):")
print([c for c in matchups_with_scores.columns if c.startswith("exp_")])

print("\nMatchups with team expectations sample:")
display(
    matchups_with_scores[
        [
            "game_id",
            "game_date",
            "home_team_name",
            "away_team_name",
            "exp_home_pts",
            "exp_away_pts",
            "exp_home_reb",
            "exp_away_reb",
            "exp_home_ast",
            "exp_away_ast",
        ]
    ].head(20)
)


In [None]:
# ===============================================
# CELL 5 – PLAYER USAGE (PTS / REB / AST SHARES)
# ===============================================

from qepc.nba.player_usage_eoin import build_player_usage_from_eoin

player_usage = build_player_usage_from_eoin(
    player_boxes=player_boxes_qepc,
    min_games=5,                 # tune as you like
    cutoff_date="2024-10-01",    # keep it recent
    use_recency_weights=True,    # <-- NEW: turn decoherence on
    tau_points_days=30.0,        # coherence time for scoring
    # tau_rebounds_days=None,    # None -> same as points
    # tau_assists_days=None,     # None -> same as points
)

print("player_usage.shape:", player_usage.shape)

display(
    player_usage[
        [
            "player_id",
            "player_name",
            "team_name",
            "games_played",
            "avg_points",
            "avg_rebounds",
            "avg_assists",
            "mean_points_share",
            "mean_rebounds_share",
            "mean_assists_share",
        ]
    ].head(20)
)


In [None]:
# ============================================================
# CELL 6 – PLAYER PTS/REB/AST EXPECTATIONS FOR ONE MATCHUP
# ============================================================

import numpy as np

# Try to find a specific matchup (Celtics vs Lakers) on that date
mask_bos_lal = (
    (matchups_with_scores["home_team_name"] == "Celtics")
    & (matchups_with_scores["away_team_name"] == "Lakers")
)

if matchups_with_scores[mask_bos_lal].empty:
    # Fallback: just take the first game on the slate
    example_game = matchups_with_scores.iloc[0]
    print("Could not find Celtics-Lakers; using first game on slate instead.")
else:
    example_game = matchups_with_scores[mask_bos_lal].iloc[0]
    print("Using Celtics-Lakers game.")

display(
    example_game[
        [
            "game_id",
            "game_date",
            "home_team_name",
            "away_team_name",
            "exp_home_pts",
            "exp_away_pts",
            "exp_home_reb",
            "exp_away_reb",
            "exp_home_ast",
            "exp_away_ast",
        ]
    ].to_frame().T
)

home_team = example_game["home_team_name"]
away_team = example_game["away_team_name"]

home_pts_lambda = float(example_game["exp_home_pts"])
away_pts_lambda = float(example_game["exp_away_pts"])

home_reb_lambda = example_game.get("exp_home_reb", np.nan)
away_reb_lambda = example_game.get("exp_away_reb", np.nan)

home_ast_lambda = example_game.get("exp_home_ast", np.nan)
away_ast_lambda = example_game.get("exp_away_ast", np.nan)

print("\nTeam-level expectations:")
print(f"Home {home_team}: λ_pts={home_pts_lambda:.2f}, λ_reb={home_reb_lambda}, λ_ast={home_ast_lambda}")
print(f"Away {away_team}: λ_pts={away_pts_lambda:.2f}, λ_reb={away_reb_lambda}, λ_ast={away_ast_lambda}")


In [None]:
# ==========================================================
# CELL 6A – COMPARE UNWEIGHTED VS RECENCY-WEIGHTED USAGE
# ==========================================================

from qepc.nba.player_usage_eoin import build_player_usage_from_eoin

# 1) Old-school usage: no recency weights
player_usage_flat = build_player_usage_from_eoin(
    player_boxes=player_boxes_qepc,
    min_games=5,
    cutoff_date=None,            # use full history
    use_recency_weights=False,
)

# 2) New quantum usage: recency-weighted
player_usage_rw = player_usage  # from Cell 5 (already recency-weighted)

# Align on (player_id, team_name)
cols_key = ["player_id", "team_name"]
cols_keep = [
    "player_id", "team_name", "player_name",
    "avg_points", "avg_rebounds", "avg_assists",
]

flat = player_usage_flat[cols_keep].rename(
    columns={
        "avg_points": "flat_avg_points",
        "avg_rebounds": "flat_avg_rebounds",
        "avg_assists": "flat_avg_assists",
    }
)

rw = player_usage_rw[cols_keep].rename(
    columns={
        "avg_points": "rw_avg_points",
        "avg_rebounds": "rw_avg_rebounds",
        "avg_assists": "rw_avg_assists",
    }
)

merged = flat.merge(rw, on=["player_id", "team_name", "player_name"], how="inner")

# Show players where recency moved points the most
merged["delta_points"] = merged["rw_avg_points"] - merged["flat_avg_points"]

merged_sorted = merged.sort_values("delta_points", ascending=False)

print("Players whose scoring average moved UP the most with recency weights:")
display(merged_sorted.head(15))

print("Players whose scoring average moved DOWN the most with recency weights:")
display(merged_sorted.tail(15))


In [None]:
# =====================================================
# CELL 7 – HOME TEAM PLAYER PTS/REB/AST PROJECTIONS
# =====================================================

home_usage = player_usage[player_usage["team_name"] == home_team].copy()

# Expected points
home_usage["exp_points"] = home_pts_lambda * home_usage["mean_points_share"]

# Expected rebounds (if we have both the team λ and share)
if not pd.isna(home_reb_lambda) and "mean_rebounds_share" in home_usage.columns:
    home_usage["exp_rebounds"] = home_reb_lambda * home_usage["mean_rebounds_share"]

# Expected assists
if not pd.isna(home_ast_lambda) and "mean_assists_share" in home_usage.columns:
    home_usage["exp_assists"] = home_ast_lambda * home_usage["mean_assists_share"]

# Sort by expected points descending for now
home_usage = home_usage.sort_values("exp_points", ascending=False)

cols = [
    "player_name",
    "games_played",
    "avg_points",
    "avg_rebounds",
    "avg_assists",
    "mean_points_share",
    "mean_rebounds_share",
    "mean_assists_share",
    "exp_points",
    "exp_rebounds",
    "exp_assists",
]

available_cols = [c for c in cols if c in home_usage.columns]

print(f"Home team projections – {home_team}")
display(home_usage[available_cols].head(15))


In [None]:
# =====================================================
# CELL 8 – AWAY TEAM PLAYER PTS/REB/AST PROJECTIONS
# =====================================================

away_usage = player_usage[player_usage["team_name"] == away_team].copy()

away_usage["exp_points"] = away_pts_lambda * away_usage["mean_points_share"]

if not pd.isna(away_reb_lambda) and "mean_rebounds_share" in away_usage.columns:
    away_usage["exp_rebounds"] = away_reb_lambda * away_usage["mean_rebounds_share"]

if not pd.isna(away_ast_lambda) and "mean_assists_share" in away_usage.columns:
    away_usage["exp_assists"] = away_ast_lambda * away_usage["mean_assists_share"]

away_usage = away_usage.sort_values("exp_points", ascending=False)

home_usage["exp_PRA"] = (
    home_usage["exp_points"].fillna(0)
    + home_usage["exp_rebounds"].fillna(0)
    + home_usage["exp_assists"].fillna(0)
)

cols = [
    "player_name",
    "games_played",
    "avg_points",
    "avg_rebounds",
    "avg_assists",
    "mean_points_share",
    "mean_rebounds_share",
    "mean_assists_share",
    "exp_points",
    "exp_rebounds",
    "exp_assists",
]

available_cols = [c for c in cols if c in away_usage.columns]


print(f"Away team projections – {away_team}")
display(away_usage[available_cols].head(15))


In [None]:
# ==========================================
# CELL 9 – POISSON PROP SIMULATION HELPERS
# ==========================================

import numpy as np
import math
from typing import Optional, Dict, Any


def simulate_poisson_prop(
    lam: float,
    line: float,
    n_sims: int = 100_000,
    random_state: Optional[int] = None,
) -> Dict[str, Any]:
    """
    Simulate a single stat (points, rebounds, assists, etc.) as Poisson(lam)
    and estimate probabilities of going over/under a betting line.

    - lam:      expected value (your QEPC λ)
    - line:     betting line (e.g. 22.5, 8.5, 6.0, etc.)
    - n_sims:   number of simulated games
    """
    if lam <= 0 or not np.isfinite(lam):
        raise ValueError(f"Invalid lambda for Poisson: {lam}")

    rng = np.random.default_rng(random_state)
    samples = rng.poisson(lam, size=n_sims)

    if float(line).is_integer():
        line_int = int(line)
        over_mask = samples > line_int
        push_mask = samples == line_int
        under_mask = samples < line_int
        prob_push = push_mask.mean()
    else:
        # For half-lines (e.g. 22.5), push prob is zero
        over_mask = samples > line
        under_mask = samples < line
        prob_push = 0.0

    prob_over = over_mask.mean()
    prob_under = under_mask.mean()

    return {
        "lambda": float(lam),
        "line": float(line),
        "prob_over": float(prob_over),
        "prob_under": float(prob_under),
        "prob_push": float(prob_push),
        "mean_sim": float(samples.mean()),
        "std_sim": float(samples.std(ddof=0)),
        "n_sims": int(n_sims),
    }



In [None]:
# =========================================================
# CELL 10 – RUN PROPS FOR ONE PLAYER (PTS / REB / AST)
# =========================================================

# Choose which side to pull the player from:
#   - home_usage (home team)
#   - away_usage (away team)
target_df = home_usage  # or use away_usage

# Pick a player name from that team (adjust as needed)
# You can inspect:
#   target_df["player_name"].unique()
PLAYER_NAME = target_df["player_name"].iloc[0]  # change this to whoever you want

print("Using player:", PLAYER_NAME)

player_row = target_df[target_df["player_name"] == PLAYER_NAME]
if player_row.empty:
    raise ValueError(f"Player {PLAYER_NAME} not found in target_df.")

player_row = player_row.iloc[0]

# QEPC λs from your usage + team expectations
lam_pts = float(player_row["exp_points"])
lam_reb = float(player_row.get("exp_rebounds", np.nan))
lam_ast = float(player_row.get("exp_assists", np.nan))

print(f"\nQEPC λ for {PLAYER_NAME}:")
print(f"  Points λ = {lam_pts:.2f}")
print(f"  Rebounds λ = {lam_reb:.2f}")
print(f"  Assists λ = {lam_ast:.2f}")

# --- Set example betting lines (edit these to match a real book) ---

line_pts = round(lam_pts) + 0.5      # e.g. around his projection
line_reb = round(lam_reb) + 0.5 if np.isfinite(lam_reb) else None
line_ast = round(lam_ast) + 0.5 if np.isfinite(lam_ast) else None

print(f"\nExample lines (edit these):")
print(f"  Points line:   {line_pts}")
print(f"  Rebounds line: {line_reb}")
print(f"  Assists line:  {line_ast}")

# --- Simulate props using Poisson model ---

results = {}

results["points"] = simulate_poisson_prop(lam_pts, line_pts, n_sims=100_000)

if line_reb is not None and np.isfinite(lam_reb):
    results["rebounds"] = simulate_poisson_prop(lam_reb, line_reb, n_sims=100_000)

if line_ast is not None and np.isfinite(lam_ast):
    results["assists"] = simulate_poisson_prop(lam_ast, line_ast, n_sims=100_000)

print(f"\nSimulation results for {PLAYER_NAME}:")

for stat, res in results.items():
    print(f"\n[{stat.upper()}] line {res['line']} vs λ={res['lambda']:.2f}")
    print(f"  P(Over)  ≈ {res['prob_over']*100:5.2f}%")
    print(f"  P(Under) ≈ {res['prob_under']*100:5.2f}%")
    if res["prob_push"] > 0:
        print(f"  P(Push)  ≈ {res['prob_push']*100:5.2f}%")
    print(f"  Sim mean ≈ {res['mean_sim']:.2f} (simulated)")
    print(f"  Sim std  ≈ {res['std_sim']:.2f}")


In [None]:
# =========================================================
# CELL 11 – BATCH PROPS FOR MULTIPLE PLAYERS (POINTS ONLY)
# =========================================================

# You can choose home_usage or away_usage here
batch_df = home_usage  # or away_usage

# Define a dict of {player_name: points_line}
# (Replace with real players / lines as needed)
points_lines = {}

# Example: auto-generate 5 lines near their QEPC projections
for _, row in batch_df.head(5).iterrows():
    name = row["player_name"]
    lam_pts = float(row["exp_points"])
    # Example line = round λ + 0.5
    points_lines[name] = round(lam_pts) + 0.5

print("Batch points lines (edit this dict as needed):")
for name, line in points_lines.items():
    print(f"  {name}: {line}")

batch_results = []

for name, line in points_lines.items():
    row = batch_df[batch_df["player_name"] == name].iloc[0]
    lam_pts = float(row["exp_points"])

    res = simulate_poisson_prop(lam_pts, line, n_sims=100_000)
    batch_results.append(
        {
            "player_name": name,
            "line_pts": res["line"],
            "lambda_pts": res["lambda"],
            "prob_over": res["prob_over"],
            "prob_under": res["prob_under"],
            "prob_push": res["prob_push"],
            "mean_sim": res["mean_sim"],
            "std_sim": res["std_sim"],
        }
    )

batch_results_df = pd.DataFrame(batch_results)
batch_results_df = batch_results_df.sort_values("prob_over", ascending=False)

print("\nBatch results (sorted by P(Over)):")
display(batch_results_df)


In [None]:
# =======================================================
# CELL 12 – RECENCY-WEIGHTED PLAYER POINTS (DECOHERENCE)
# =======================================================

from qepc.quantum.decoherence import recency_weighted_groupby_mean

# We assume you still have player_boxes_qepc loaded from earlier cells
# and that it has: player_id, team_name, game_id, game_date, points.

needed_cols = ["player_id", "team_name", "game_id", "game_date", "points"]
missing = [c for c in needed_cols if c not in player_boxes_qepc.columns]
if missing:
    raise ValueError(f"player_boxes_qepc is missing {missing}")

# Let's compute a *recency-weighted* average of points
# per (player_id, team_name), with a coherence time tau of 30 days.

tau_points_days = 30.0

rw_points = recency_weighted_groupby_mean(
    df=player_boxes_qepc,
    date_col="game_date",
    group_cols=["player_id", "team_name"],
    value_cols=["points"],
    tau_days=tau_points_days,
    ref_date=None,        # default: use max(game_date) as "today"
    clip_days=120.0,      # beyond 4 months, everything gets tiny weight
    weight_col_name="w_pts",
)

rw_points = rw_points.rename(columns={"points": "rw_avg_points"})

print("Recency-weighted points per player/team (sample):")
display(rw_points.head(20))


In [None]:
# ==========================================================
# CELL 13 – ENTROPY OF A PLAYER'S POINTS DISTRIBUTION
# ==========================================================

import numpy as np
from qepc.quantum.entropy import sample_entropy

# We'll use the same λ you used in Cell 10 for PLAYER_NAME
lam_for_entropy = lam_pts  # from Cell 10

n_sims_entropy = 100_000
rng = np.random.default_rng(12345)

samples_pts = rng.poisson(lam_for_entropy, size=n_sims_entropy)

H_bits, pmf = sample_entropy(samples_pts, base=2.0, return_pmf=True)

print(f"Entropy for {PLAYER_NAME}'s points distribution:")
print(f"  H ≈ {H_bits:.3f} bits (0 = deterministic, higher = more spread)")

# Optional: show a quick table of outcomes near the peak
import pandas as pd

values = np.arange(len(pmf))
mask = pmf > 0
df_pmf = pd.DataFrame({"points": values[mask], "prob": pmf[mask]})
df_pmf = df_pmf[
    (df_pmf["points"] >= lam_for_entropy - 10)
    & (df_pmf["points"] <= lam_for_entropy + 10)
]

print("\nLocal view of the simulated PMF around λ:")
display(df_pmf.head(25))


In [None]:
# ==========================================================
# CELL 13 – BATCH PROPS WITH EDGE + ENTROPY (POINTS)
# ==========================================================

import numpy as np
import pandas as pd
from qepc.quantum.entropy import sample_entropy

# Choose which team to analyze
batch_df = home_usage  # or away_usage

# Define lines: either auto-near-λ or hand-edit later
points_lines = {
    "Jayson Tatum": 28.5,
    "Jaylen Brown": 23.5,
    "Derrick White": 15.5,
}


for _, row in batch_df.head(8).iterrows():  # take first 8 players for example
    name = row["player_name"]
    lam_pts_i = float(row["exp_points"])
    # Example: line = round λ + 0.5 (just a placeholder)
    points_lines[name] = round(lam_pts_i) + 0.5

print("Points lines used in this batch (edit these as you like):")
for name, line in points_lines.items():
    print(f"  {name}: {line}")

rng = np.random.default_rng(777)
n_sims = 50_000  # fewer is fine for batch

rows = []

for name, line in points_lines.items():
    row = batch_df[batch_df["player_name"] == name].iloc[0]
    lam = float(row["exp_points"])

    # 1) Poisson prop simulation result (P over/under)
    sim_result = simulate_poisson_prop(lam, line, n_sims=n_sims, random_state=42)

    # 2) Raw samples for entropy
    samples = rng.poisson(lam, size=n_sims)
    H_bits, _ = sample_entropy(samples, base=2.0, return_pmf=False)

    rows.append(
        {
            "player_name": name,
            "lambda_pts": lam,
            "line_pts": sim_result["line"],
            "prob_over": sim_result["prob_over"],
            "prob_under": sim_result["prob_under"],
            "prob_push": sim_result["prob_push"],
            "mean_sim": sim_result["mean_sim"],
            "std_sim": sim_result["std_sim"],
            "entropy_bits": H_bits,
        }
    )

batch_results_df = pd.DataFrame(rows)

# Edge = model's over prob minus a neutral 50% (for half lines)
batch_results_df["edge_over_pct"] = (batch_results_df["prob_over"] - 0.5) * 100.0

# Sort by some combination: big edge, lower entropy first
batch_results_df = batch_results_df.sort_values(
    ["edge_over_pct", "entropy_bits"], ascending=[False, True]
)

print("\nBatch props with edge + entropy (points):")
display(
    batch_results_df[
        [
            "player_name",
            "lambda_pts",
            "line_pts",
            "prob_over",
            "prob_under",
            "edge_over_pct",
            "entropy_bits",
            "mean_sim",
            "std_sim",
        ]
    ]
)


In [None]:
# ==========================================================
# CELL 14 – Entanglements
# ==========================================================

ent_points_home = build_team_entanglement(
    player_boxes=player_boxes_qepc,
    team_name=home_team,
    stat_col="points",
    min_shared_games=5,        # <= super loose, just for debug
    date_col="game_date",
    cutoff_date="2024-10-01",
)

print(f"Entanglement table for {home_team} (points) – sample:")
print("Rows in ent_points_home:", len(ent_points_home))
display(ent_points_home.head(20))


In [None]:
# ==========================================================
# CELL 14A – DEBUG: JAYLEN VS TATUM DIRECTLY
# ==========================================================

import numpy as np
import pandas as pd

# Filter Celtics + cutoff
celtics_recent = player_boxes_qepc[
    (player_boxes_qepc["team_name"] == "Celtics")
    & (player_boxes_qepc["game_date"] >= pd.to_datetime("2024-10-01").date())
].copy()

print("Celtics recent rows:", len(celtics_recent))
print("Unique games:", celtics_recent["game_id"].nunique())

# Find Jaylen + Tatum IDs from usage (they should be in home_usage)
jaylen_row = home_usage[home_usage["player_name"] == "Jaylen Brown"].iloc[0]
tatum_row = home_usage[home_usage["player_name"] == "Jayson Tatum"].iloc[0]

jaylen_id = int(jaylen_row["player_id"])
tatum_id = int(tatum_row["player_id"])

print("Jaylen id:", jaylen_id)
print("Tatum id:", tatum_id)

# Pivot game x player for points
pivot = celtics_recent.pivot_table(
    index="game_id",
    columns="player_id",
    values="points",
    aggfunc="sum",
)

print("pivot shape:", pivot.shape)

# Grab the two series (may raise KeyError if ID not present)
jaylen_pts = pivot.get(jaylen_id)
tatum_pts = pivot.get(tatum_id)

if jaylen_pts is None or tatum_pts is None:
    print("One of the players is missing from pivot entirely.")
else:
    mask = jaylen_pts.notna() & tatum_pts.notna()
    shared = mask.sum()
    print("Shared games with both having points:", shared)

    if shared > 0:
        corr = np.corrcoef(jaylen_pts[mask], tatum_pts[mask])[0, 1]
        print("Jaylen/Tatum corr over that window:", corr)
    else:
        print("No games where both logged non-NA points in this window.")


In [None]:
# ==========================================================
# CELL 14B – LOOK AT SHARED GAME COUNTS FOR CELTICS
# ==========================================================
import pandas as pd

# Same filter as entanglement
celtics_recent = player_boxes_qepc[
    (player_boxes_qepc["team_name"] == "Celtics")
    & (player_boxes_qepc["game_date"] >= pd.to_datetime("2024-10-01").date())
].copy()

print("Celtics recent rows:", len(celtics_recent))
print("Unique games:", celtics_recent["game_id"].nunique())

pivot = celtics_recent.pivot_table(
    index="game_id",
    columns="player_id",
    values="points",
    aggfunc="sum",
)

valid_mask = ~pivot.isna()
shared_counts = valid_mask.T @ valid_mask  # each [i,j] = shared games

# Turn into a DataFrame of pairs with their n_shared
pairs = []
player_ids = list(pivot.columns)
for i in range(len(player_ids)):
    for j in range(i + 1, len(player_ids)):
        pid_a = int(player_ids[i])
        pid_b = int(player_ids[j])
        n_shared = int(shared_counts.iloc[i, j])
        pairs.append({"player_id_a": pid_a, "player_id_b": pid_b, "n_shared_games": n_shared})

shared_df = pd.DataFrame(pairs)
shared_df = shared_df.sort_values("n_shared_games", ascending=False)

print("Top 20 pairs by shared games:")
display(shared_df.head(20))


In [None]:
# =================================================================
# CELL 15 – STAR-FOCUSED ENTANGLEMENT VIEW (POINTS, HOME TEAM)
# =================================================================

from qepc.quantum.entanglement import get_player_entanglement_view

# Make sure we're looking at the same team's usage as ent_points_home
# (i.e., target_df in Cell 10 should be home_usage for this to match)
star_name = PLAYER_NAME
print("Current star (from props cell):", star_name)

# Get star's player_id from home_usage
star_row = home_usage[home_usage["player_name"] == star_name]
if star_row.empty:
    raise ValueError(f"PLAYER_NAME={star_name!r} not found in home_usage. "
                     "Make sure target_df in Cell 10 is home_usage.")

star_id = int(star_row["player_id"].iloc[0])

print(f"Star player_id: {star_id}")

star_ent = get_player_entanglement_view(
    ent_df=ent_points_home,
    player_id=star_id,
    min_shared_games=5,  # match the above
)


if star_ent.empty:
    print("No entanglement pairs found for this player with sufficient shared games.")
else:
    print(f"\nTop positively entangled teammates with {star_name} (points):")
    display(star_ent[star_ent["corr"] > 0].head(10))

    print(f"\nTop negatively entangled teammates with {star_name} (points):")
    display(star_ent[star_ent["corr"] < 0].head(10))


In [None]:
# ==========================================================
# CELL 16 – DEFINE SGP PICKS (POINTS, SAME GAME)
# ==========================================================

from qepc.quantum.correlated_sim import PlayerPropConfig

# Choose which side to work with
sgp_home = True   # set False if you want away team instead

if sgp_home:
    sgp_df = home_usage
    team_label = home_team
else:
    sgp_df = away_usage
    team_label = away_team

print(f"Building SGP for team: {team_label}")

# See available names quickly (optional)
print("\nSample of available players on this team:")
display(sgp_df[["player_id", "player_name", "avg_points", "exp_points"]].head(15))

# ---- DEFINE YOUR SGP PLAYERS + LINES HERE ----
# You can either:
#   1) Hardcode a dict of {player_name: line_pts}, or
#   2) Start from exp_points and tweak.

# Example: auto-pick top 3 scorers by exp_points and set line near λ
top_players = sgp_df.sort_values("exp_points", ascending=False).head(3)

sgp_lines = {}
for _, row in top_players.iterrows():
    name = row["player_name"]
    lam_pts = float(row["exp_points"])
    # Example line = round λ + 0.5
    sgp_lines[name] = round(lam_pts) + 0.5

print("\nInitial SGP lines (edit this dict if you want specific lines):")
for name, line in sgp_lines.items():
    print(f"  {name}: {line}")

# If you want to override with real lines, you can do something like:
# sgp_lines = {
#     "Jayson Tatum": 28.5,
#     "Jaylen Brown": 22.5,
#     "Derrick White": 14.5,
# }

# Build PlayerPropConfig list
sgp_players: list[PlayerPropConfig] = []

for name, line in sgp_lines.items():
    row = sgp_df[sgp_df["player_name"] == name]
    if row.empty:
        raise ValueError(f"SGP player {name!r} not found in usage table for {team_label}.")
    row = row.iloc[0]
    lam_pts = float(row["exp_points"])
    sgp_players.append(
        PlayerPropConfig(
            player_id=int(row["player_id"]),
            player_name=name,
            lambda_pts=lam_pts,
            line_pts=float(line),
        )
    )

print(f"\nConfigured {len(sgp_players)} players for entangled SGP sim.")
for p in sgp_players:
    print(f"  - {p.player_name}: λ={p.lambda_pts:.2f}, line={p.line_pts}")


In [None]:
# ==========================================================
# CELL 17 – RUN ENTANGLED SGP SIM (POINTS, SAME TEAM)
# ==========================================================

from qepc.quantum.correlated_sim import simulate_entangled_points

# Hyperparameters for volatility (tweak if you want)
N_SIMS = 100_000
TEAM_SIGMA = 0.25   # team-level volatility (all players share)
PLAYER_SIGMA = 0.15 # individual volatility

sgp_result = simulate_entangled_points(
    players=sgp_players,
    n_sims=N_SIMS,
    random_state=2025,
    team_sigma=TEAM_SIGMA,
    player_sigma=PLAYER_SIGMA,
)

marginals_df = sgp_result["marginals"]
joint_info = sgp_result["joint"]
pairwise_corr = sgp_result["pairwise_corr"]

print(f"Entangled SGP sim with {joint_info['n_sims']} universes for team {team_label}.\n")

print("Per-player marginal props (from correlated sim):")
display(
    marginals_df[
        [
            "player_name",
            "lambda_pts",
            "line_pts",
            "prob_over",
            "prob_under",
            "mean_sim",
            "std_sim",
        ]
    ]
)

print("\nJoint SGP probabilities (all players' points props):")
print(f"  Naive product P(all overs)   ≈ {joint_info['naive_product_all_over']*100:5.2f}%")
print(f"  Entangled P(all overs)       ≈ {joint_info['prob_all_over']*100:5.2f}%")
print(f"  P(at least one over)         ≈ {joint_info['prob_any_over']*100:5.2f}%")
print(f"  P(none go over)              ≈ {joint_info['prob_none_over']*100:5.2f}%")


In [None]:
# ==========================================================
# CELL 18 – EMPIRICAL CORRELATIONS BETWEEN SGP PLAYERS
# ==========================================================

print("Empirical correlations between simulated points for SGP players:")
display(pairwise_corr)


In [None]:
celtics_recent = player_boxes_qepc[
    (player_boxes_qepc["team_name"] == "Celtics")
    & (player_boxes_qepc["game_date"] >= pd.to_datetime("2024-10-01").date())
]

print("Unique games since cutoff:", celtics_recent["game_id"].nunique())
print("Unique players in that window:", celtics_recent["player_id"].nunique())


In [1]:
# CELL 1 – project root / sys.path (you already have this pattern now)
from pathlib import Path
import sys

def find_project_root(marker="qepc_project") -> Path:
    here = Path.cwd().resolve()
    for p in [here] + list(here.parents):
        if p.name == marker:
            return p
    raise FileNotFoundError("Could not find qepc_project in path hierarchy.")

PROJECT_ROOT = find_project_root("qepc_project")
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)


PROJECT_ROOT: C:\Users\wdorsey\qepc_project


In [2]:
# CELL 2 – import and build usage
from qepc.nba.player_usage_eoin import (
    PlayerUsageConfig,
    build_player_usage_from_eoin,
    build_player_vs_opponent_splits,
)

config = PlayerUsageConfig(min_games=10, recent_window=5)
usage = build_player_usage_from_eoin(config=config)

print("usage shape:", usage.shape)
usage.head(10)


usage shape: (11644, 13)


Unnamed: 0,player_id,team_name,games_played,avg_points,avg_rebounds,avg_assists,mean_points_share,mean_rebounds_share,mean_assists_share,pts_avg_lastN,reb_avg_lastN,ast_avg_lastN,player_name
0,76375,76ers,321,27.249221,24.423676,6.753894,0.227431,0.393292,0.276525,21.6,23.4,7.4,Wilt Chamberlain
1,947,76ers,878,25.143508,3.545558,5.497722,0.261029,0.083283,0.268436,10.2,3.2,3.2,Allen Iverson
2,203954,76ers,623,23.331723,9.37037,3.067633,0.207759,0.210974,0.123203,19.8,5.6,2.8,Joel Embiid
3,787,76ers,661,23.189107,11.723147,3.763994,0.215125,0.272842,0.160943,20.8,8.4,3.2,Charles Barkley
4,76681,76ers,977,21.957011,6.735926,3.898669,0.199639,0.148792,0.152498,18.2,6.4,3.4,Julius Erving
5,77449,76ers,388,21.180412,12.085052,1.293814,0.191943,0.268847,0.054506,5.0,2.6,0.6,Moses Malone
6,77532,76ers,266,20.793233,11.296992,3.966165,0.18878,0.232731,0.174042,14.6,8.0,2.8,George McGinnis
7,76487,76ers,693,20.760462,9.97114,3.881674,0.179304,0.185121,0.172598,11.4,6.0,5.6,Billy Cunningham
8,76882,76ers,820,20.330488,4.863415,4.278049,0.173349,0.085913,0.188418,9.4,4.2,2.4,Hal Greer
9,711,76ers,176,19.409091,3.857955,3.397727,0.199118,0.093137,0.16679,17.8,4.2,3.2,Jerry Stackhouse


In [3]:
# CELL 3 – vs-opponent splits
splits = build_player_vs_opponent_splits(min_games_vs_opp=3)
print("splits shape:", splits.shape)
splits.head(10)


splits shape: (160954, 8)


Unnamed: 0,player_id,team_name,opp_team_name,games_vs_opp,pts_vs_opp,reb_vs_opp,ast_vs_opp,player_name
0,920,Lakers,Trail Blazers,67,10.0,7.328358,1.134328,A.C. Green
1,920,Lakers,Suns,66,9.515152,8.757576,1.19697,A.C. Green
2,920,Lakers,Warriors,57,9.649123,7.666667,0.859649,A.C. Green
3,920,Lakers,SuperSonics,55,11.727273,7.745455,1.018182,A.C. Green
4,920,Lakers,Mavericks,51,9.901961,7.058824,1.058824,A.C. Green
5,920,Lakers,Kings,49,10.285714,8.55102,1.020408,A.C. Green
6,920,Lakers,Clippers,47,11.446809,7.595745,1.170213,A.C. Green
7,920,Lakers,Jazz,46,7.586957,7.413043,0.934783,A.C. Green
8,920,Lakers,Rockets,45,10.511111,7.311111,0.933333,A.C. Green
9,920,Lakers,Spurs,45,10.133333,6.666667,1.044444,A.C. Green


In [None]:
pip install jupyterlab-filelock
jupyter labextension install @jupyterlab/filelock


In [None]:
pip install jupyterlab-filelock
jupyter labextension install jupyterlab/filelock


In [None]:
pip install jupyterlab-filelock

