In [None]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd

print("=== QEPC Rest Table Bootstrap ===")

cwd = Path.cwd()
core_root = None
cur = cwd

for _ in range(8):
    if cur.name == "qepc_core":
        core_root = cur
        break
    if cur.parent == cur:
        break
    cur = cur.parent

if core_root is None:
    raise RuntimeError(f"Could not find qepc_core above {cwd}")

core_str = str(core_root)
if core_str not in sys.path:
    sys.path.insert(0, core_str)

repo_root = core_root.parent.parent.parent
repo_str = str(repo_root)
if repo_str not in sys.path:
    sys.path.append(repo_str)

print("qepc_core root:", core_root)
print("repo root:     ", repo_root)

import qepc
from qepc.config import detect_project_root, QEPCConfig

project_root = detect_project_root()
cfg = QEPCConfig.from_project_root(project_root)

print("project_root:", project_root)
print("raw_root:    ", cfg.raw_root)
print("=== Bootstrap OK ===")


In [None]:
raw_root = cfg.raw_root
schedule_path = raw_root / "NBA_Schedule_All_Seasons.csv"

print("Schedule path:", schedule_path)

sched = pd.read_csv(schedule_path, parse_dates=["gameDate"])
print("Schedule shape:", sched.shape)
print("Date range:", sched["gameDate"].min(), "→", sched["gameDate"].max())
print("Seasons:", sorted(sched["Season"].dropna().unique()))

display(sched.head())


In [None]:
import numpy as np
import pandas as pd

print("=== Build team-level schedule (home+away rows) ===")

# Home side
home_cols = [
    "gameId", "gameDate", "Season",
    "homeTeamId", "homeTeamAbbrev", "homeTeamName",
    "awayTeamId", "awayTeamAbbrev", "awayTeamName",
    "homeScore", "awayScore",
]
home_df = sched[home_cols].copy()
home_df.rename(columns={
    "homeTeamId": "teamId",
    "homeTeamAbbrev": "teamAbbrev",
    "homeTeamName": "teamName",
    "awayTeamId": "opponentTeamId",
    "awayTeamAbbrev": "opponentTeamAbbrev",
    "awayTeamName": "opponentTeamName",
    "homeScore": "teamScore",
    "awayScore": "opponentScore",
}, inplace=True)
home_df["is_home"] = 1.0

# Away side
away_cols = [
    "gameId", "gameDate", "Season",
    "awayTeamId", "awayTeamAbbrev", "awayTeamName",
    "homeTeamId", "homeTeamAbbrev", "homeTeamName",
    "homeScore", "awayScore",
]
away_df = sched[away_cols].copy()
away_df.rename(columns={
    "awayTeamId": "teamId",
    "awayTeamAbbrev": "teamAbbrev",
    "awayTeamName": "teamName",
    "homeTeamId": "opponentTeamId",
    "homeTeamAbbrev": "opponentTeamAbbrev",
    "homeTeamName": "opponentTeamName",
    "awayScore": "teamScore",
    "homeScore": "opponentScore",
}, inplace=True)
away_df["is_home"] = 0.0

team_sched = pd.concat([home_df, away_df], ignore_index=True)

# Tidy types
team_sched["gameId"] = team_sched["gameId"].astype(str)
team_sched["teamId"] = pd.to_numeric(team_sched["teamId"], errors="coerce").astype("Int64")
team_sched["opponentTeamId"] = pd.to_numeric(team_sched["opponentTeamId"], errors="coerce").astype("Int64")
team_sched["gameDate"] = pd.to_datetime(team_sched["gameDate"], errors="coerce")

print("team_sched shape:", team_sched.shape)
print("team_sched date range:", team_sched["gameDate"].min(), "→", team_sched["gameDate"].max())
print("Seasons:", sorted(team_sched["Season"].dropna().unique()))

display(team_sched.head())


In [None]:
print("=== Recomputing rest & B2B per team *per season* ===")

# Start fresh from team_sched
team_rest = team_sched.copy()

# Sort so diffs are well-defined
team_rest = team_rest.sort_values(
    ["teamId", "Season", "gameDate", "gameId"],
    kind="mergesort",  # stable
)

# Group by team and season (critical to avoid 600+ day off-season gaps)
grp = team_rest.groupby(["teamId", "Season"], dropna=False, sort=False)

# Previous game date per team-season
prev_date = grp["gameDate"].shift(1)
team_rest["prev_gameDate"] = prev_date

# Day gaps
team_rest["days_since_last_game"] = (team_rest["gameDate"] - prev_date).dt.days

# Treat giant gaps as "offseason" (e.g. > 60 days) and ignore them for rest logic
offseason_mask = team_rest["days_since_last_game"] > 60
team_rest.loc[offseason_mask, "days_since_last_game"] = np.nan

# Valid "has previous game" flag
has_prev = team_rest["prev_gameDate"].notna() & team_rest["days_since_last_game"].notna()

# Back-to-back = played the previous day
team_rest["is_back_to_back"] = np.where(
    has_prev & (team_rest["days_since_last_game"] == 1),
    1.0,
    0.0,
)

# Rested 3+ days
team_rest["is_rested_3plus"] = np.where(
    has_prev & (team_rest["days_since_last_game"] >= 3),
    1.0,
    0.0,
)

# For rows with no previous game in that season, make rest fields NaN
team_rest.loc[~has_prev, ["is_back_to_back", "is_rested_3plus"]] = np.nan

print("Done recomputing rest features.")
print("team_rest shape:", team_rest.shape)


In [None]:
# === QEPC team_rest deep debug ===

import traceback
import numpy as np
import pandas as pd

print("=== QEPC team_rest DEEP DEBUG ===")

# 1) Does team_rest even exist?
if "team_rest" not in globals():
    print("❌ 'team_rest' is not defined in this notebook.")
else:
    print("✅ 'team_rest' exists")

    # 2) Basic shape & columns
    try:
        print("\n-- BASIC INFO --")
        print("Shape:", team_rest.shape)
        print("Columns:", list(team_rest.columns))
    except Exception as e:
        print("⚠️ Error while reading shape/columns:")
        traceback.print_exc()

    # 3) Check key columns exist
    key_cols = [
        "gameDate",
        "Season",
        "teamId",
        "teamName",
        "days_since_last_game",
        "is_back_to_back",
        "is_rested_3plus",
    ]

    print("\n-- KEY COLUMN PRESENCE --")
    for col in key_cols:
        print(f"{col:22s}: {'✅' if col in team_rest.columns else '❌ MISSING'}")

    # 4) Dtypes for key columns that *do* exist
    print("\n-- DTYPES (existing key columns) --")
    for col in key_cols:
        if col in team_rest.columns:
            print(f"{col:22s}: {team_rest[col].dtype}")

    # 5) Quick head of rest-related columns
    rest_cols = [c for c in ["Season", "gameDate", "teamName",
                             "days_since_last_game", "is_back_to_back", "is_rested_3plus"]
                 if c in team_rest.columns]
    if rest_cols:
        print("\n-- SAMPLE ROWS (rest-related cols) --")
        display(team_rest[rest_cols].head(15))

    # 6) Try the sanity metrics one by one with protection
    print("\n-- SANITY METRICS (step-by-step) --")

    # 6a) Null counts
    try:
        print("\n[6a] Null counts:")
        print(team_rest[["days_since_last_game",
                         "is_back_to_back",
                         "is_rested_3plus"]].isna().sum())
    except Exception as e:
        print("❌ Error computing null counts:")
        traceback.print_exc()

    # 6b) Value counts for days_since_last_game
    try:
        print("\n[6b] days_since_last_game value counts (first 20):")
        vc = (team_rest["days_since_last_game"]
              .value_counts()
              .sort_index()
              .head(20))
        print(vc)
    except Exception as e:
        print("❌ Error computing value counts for days_since_last_game:")
        traceback.print_exc()

    # 6c) Back-to-back rate by season
    try:
        print("\n[6c] Back-to-back rate by season:")
        b2b_by_season = (
            team_rest.groupby("Season")["is_back_to_back"]
                     .mean()
                     .sort_index()
        )
        print(b2b_by_season)
    except Exception as e:
        print("❌ Error computing back-to-back rate by season:")
        traceback.print_exc()

    # 6d) Sample team schedule
    try:
        print("\n[6d] Sample team schedule:")
        non_null_team_ids = team_rest["teamId"].dropna().unique()
        if len(non_null_team_ids) == 0:
            print("No non-null teamId values to sample.")
        else:
            sample_team_id = int(non_null_team_ids[0])
            print(f"Sample teamId: {sample_team_id}")
            cols_to_show = [c for c in [
                "Season", "gameDate", "teamName", "opponentTeamName",
                "is_home", "days_since_last_game",
                "is_back_to_back", "is_rested_3plus"
            ] if c in team_rest.columns]

            display(
                team_rest[team_rest["teamId"] == sample_team_id]
                .sort_values("gameDate")
                .head(20)[cols_to_show]
            )
    except Exception as e:
        print("❌ Error showing sample team schedule:")
        traceback.print_exc()

print("\n=== END QEPC team_rest DEEP DEBUG ===")


In [None]:
print("Null counts (key rest columns):")
print(team_rest[["days_since_last_game", "is_back_to_back", "is_rested_3plus"]].isna().sum())

print("\nDays since last game (value counts, first 15):")
print(
    team_rest["days_since_last_game"]
    .value_counts()
    .sort_index()
    .head(15)
)

print("\nBack-to-back rate by season:")
b2b_by_season = (
    team_rest.groupby("Season")["is_back_to_back"]
             .mean()
             .sort_index()
)
print(b2b_by_season)

sample_team_id = int(team_rest["teamId"].dropna().unique()[0])
print(f"\nSample schedule for teamId={sample_team_id}:")
display(
    team_rest[team_rest["teamId"] == sample_team_id]
    .sort_values("gameDate")
    .head(20)[
        [
            "Season", "gameDate", "teamName", "opponentTeamName",
            "is_home", "days_since_last_game", "is_back_to_back", "is_rested_3plus",
        ]
    ]
)
