In [1]:
# === CELL 1: QEPC Experimental Notebook Bootstrap ===
import sys
from pathlib import Path

cwd = Path.cwd()
print("Notebooks CWD:", cwd)

# We expect this notebook to live at:
#   .../qepc_project/experimental/GTP_REWRITE/qepc_core/notebooks
# so qepc_core is its parent directory:
qepc_core = cwd.parent
qepc_pkg = qepc_core / "qepc"

if not qepc_pkg.is_dir():
    raise RuntimeError(
        f"Could not find experimental qepc package at {qepc_pkg}\n"
        "Check that this notebook is inside qepc_core/notebooks."
    )

# 1) Put experimental qepc_core at the FRONT of sys.path
if str(qepc_core) not in sys.path:
    sys.path.insert(0, str(qepc_core))
    print("âž• Added to sys.path (front):", qepc_core)
else:
    print("âœ” qepc_core already in sys.path:", qepc_core)

# 2) Optional: also add repo root at the tail (for old top-level qepc if needed)
repo_root = qepc_core.parent.parent.parent  # .../qepc_project
if repo_root.exists() and str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))
    print("âž• Added repo root to sys.path (tail):", repo_root)

# Sanity check: import experimental qepc
import qepc
from importlib.util import find_spec

print("âœ… Imported qepc from:", Path(qepc.__file__).resolve())
print("qepc spec origin:", find_spec("qepc").origin)
print("=== End QEPC Experimental Bootstrap ===")


Notebooks CWD: C:\Users\wdors\qepc_project\experimental\GTP_REWRITE\qepc_core\notebooks
âž• Added to sys.path (front): C:\Users\wdors\qepc_project\experimental\GTP_REWRITE\qepc_core
âž• Added repo root to sys.path (tail): C:\Users\wdors\qepc_project
âœ… Imported qepc from: C:\Users\wdors\qepc_project\experimental\GTP_REWRITE\qepc_core\qepc\__init__.py
qepc spec origin: C:\Users\wdors\qepc_project\experimental\GTP_REWRITE\qepc_core\qepc\__init__.py
=== End QEPC Experimental Bootstrap ===


In [2]:
# === CELL 2: Inspect existing 5-year player logs ===
from qepc.data_player_logs import load_player_logs_5yr

df_before = load_player_logs_5yr()

print("ðŸ“„ Existing 5-year lean player logs")
print("  Rows:", len(df_before))
print("  Columns:", list(df_before.columns))
print("  Date range:", df_before["gameDate"].min(), "â†’", df_before["gameDate"].max())
print("  Seasons:", sorted(df_before["Season"].unique()))

print("\nSample rows:")
display(df_before.head(10))


ðŸ“„ Existing 5-year lean player logs
  Rows: 123781
  Columns: ['gameId', 'gameDate', 'Season', 'playerId', 'playerName', 'teamId', 'teamAbbrev', 'teamName', 'opponentTeamAbbrev', 'home', 'win', 'minutes', 'pts', 'reboundsTotal', 'assists', 'steals', 'blocks', 'turnovers', 'foulsPersonal', 'fieldGoalsMade', 'fieldGoalsAttempted', 'fieldGoalsPercentage', 'threePointersMade', 'threePointersAttempted', 'threePointersPercentage', 'freeThrowsMade', 'freeThrowsAttempted', 'freeThrowsPercentage', 'plusMinusPoints']
  Date range: 2019-10-22 00:00:00 â†’ 2024-04-14 00:00:00
  Seasons: ['2019-20', '2020-21', '2021-22', '2022-23', '2023-24']

Sample rows:


Unnamed: 0,gameId,gameDate,Season,playerId,playerName,teamId,teamAbbrev,teamName,opponentTeamAbbrev,home,...,fieldGoalsMade,fieldGoalsAttempted,fieldGoalsPercentage,threePointersMade,threePointersAttempted,threePointersPercentage,freeThrowsMade,freeThrowsAttempted,freeThrowsPercentage,plusMinusPoints
0,21900001,2019-10-22,2019-20,200755,JJ Redick,1610612740,NOP,New Orleans Pelicans,TOR,0.0,...,6,9,0.667,4,6,0.667,0,0,0.0,-14
1,21900001,2019-10-22,2019-20,200768,Kyle Lowry,1610612761,TOR,Toronto Raptors,NOP,1.0,...,4,15,0.267,3,11,0.273,11,13,0.846,-1
2,21900001,2019-10-22,2019-20,201188,Marc Gasol,1610612761,TOR,Toronto Raptors,NOP,1.0,...,2,9,0.222,1,4,0.25,1,1,1.0,-2
3,21900001,2019-10-22,2019-20,201586,Serge Ibaka,1610612761,TOR,Toronto Raptors,NOP,1.0,...,4,10,0.4,0,1,0.0,5,6,0.833,6
4,21900001,2019-10-22,2019-20,201950,Jrue Holiday,1610612740,NOP,New Orleans Pelicans,TOR,0.0,...,6,15,0.4,1,6,0.167,0,2,0.0,-14
5,21900001,2019-10-22,2019-20,202324,Derrick Favors,1610612740,NOP,New Orleans Pelicans,TOR,0.0,...,3,6,0.5,0,0,0.0,0,0,0.0,-12
6,21900001,2019-10-22,2019-20,202734,E'Twaun Moore,1610612740,NOP,New Orleans Pelicans,TOR,0.0,...,2,7,0.286,1,3,0.333,0,0,0.0,-1
7,21900001,2019-10-22,2019-20,1626143,Jahlil Okafor,1610612740,NOP,New Orleans Pelicans,TOR,0.0,...,3,3,1.0,0,0,0.0,2,3,0.667,-7
8,21900001,2019-10-22,2019-20,1626181,Norman Powell,1610612761,TOR,Toronto Raptors,NOP,1.0,...,2,7,0.286,1,5,0.2,0,0,0.0,2
9,21900001,2019-10-22,2019-20,1627742,Brandon Ingram,1610612740,NOP,New Orleans Pelicans,TOR,0.0,...,8,19,0.421,2,5,0.4,4,4,1.0,-19


In [3]:
# === CELL 3: Update logs from nba_api ===
from qepc.data_player_logs import update_player_logs_from_nba_api

# Set dry_run=True first if you just want to see what it *would* do without saving.
DRY_RUN = False

df_after = update_player_logs_from_nba_api(
    seasons=None,     # let the function infer which seasons need updating
    dry_run=DRY_RUN,
    verbose=True,
)

print("\nðŸ“Š Player logs AFTER update")
print("  Rows:", len(df_after))
print("  Date range:", df_after["gameDate"].min(), "â†’", df_after["gameDate"].max())
print("  Seasons:", sorted(df_after["Season"].unique()))

print("\nSample of latest rows:")
display(
    df_after.sort_values("gameDate", ascending=False)
            .head(20)
)


=== update_player_logs_from_nba_api ===
Existing rows: 123781
Existing date range: 2019-10-22 00:00:00 â†’ 2024-04-14 00:00:00
Last season in logs: 2023-24
Current NBA season: 2025-26
Seasons to fetch: ['2023-24', '2024-25', '2025-26']

Fetching leaguegamelog for Season=2023-24 (players)...


TypeError: LeagueGameLog.__init__() got an unexpected keyword argument 'player_or_team'

In [None]:
# === CELL 4 (optional): Quick sanity metrics ===
from qepc.data_player_logs import load_player_logs_5yr

df = load_player_logs_5yr()

print("Rows:", len(df))
print("Date range:", df["gameDate"].min(), "â†’", df["gameDate"].max())
print("Seasons:", sorted(df["Season"].unique()))

print("\nNull counts on core columns:")
core_cols = [
    "gameId", "gameDate", "Season",
    "playerId", "playerName",
    "teamId", "teamAbbrev", "teamName",
    "opponentTeamAbbrev", "home", "win",
    "minutes", "pts", "reboundsTotal", "assists"
]
existing_core = [c for c in core_cols if c in df.columns]
print(df[existing_core].isna().sum())
