# QEPC – Fetch & Build Eoin (Kaggle) NBA Universe

This notebook downloads the **Eoin Moore** historical NBA dataset via **kagglehub**, copies the raw CSVs into your repo, and builds the QEPC-ready Parquet caches used by QEPC modules (player points backtests, totals, etc.).

**Design goals**
- No hardcoded machine paths (works across computers).
- Safe to rerun (idempotent): it will overwrite cache files.
- Friendly errors if Kaggle credentials / dependencies are missing.

---


In [1]:
# ==========================================================
# CELL 1 — PROJECT ROOT + PATHS (cross-computer safe)
# ==========================================================
from __future__ import annotations

import os
import sys
from pathlib import Path

def find_project_root(start: Path | None = None) -> Path:
    # Find repo root by walking upward until we hit a marker like:
    # - pyproject.toml (preferred)
    # - .git
    # - qepc/ package folder
    # You can also override with env var QEPC_PROJECT_ROOT.
    env = os.environ.get("QEPC_PROJECT_ROOT")
    if env:
        p = Path(env).expanduser().resolve()
        if p.exists():
            return p

    p = (start or Path.cwd()).resolve()
    for parent in [p] + list(p.parents):
        if (parent / "pyproject.toml").exists() or (parent / ".git").exists() or (parent / "qepc").is_dir():
            return parent
    return p

PROJECT_ROOT = find_project_root()

# Make local imports work even if you didn't `pip install -e .` yet
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

RAW_EOIN = PROJECT_ROOT / "data" / "raw" / "nba" / "eoin"
CACHE_IMPORTS = PROJECT_ROOT / "cache" / "imports"

RAW_EOIN.mkdir(parents=True, exist_ok=True)
CACHE_IMPORTS.mkdir(parents=True, exist_ok=True)

print("PROJECT_ROOT :", PROJECT_ROOT)
print("RAW_EOIN     :", RAW_EOIN)
print("CACHE_IMPORTS:", CACHE_IMPORTS)


PROJECT_ROOT : C:\Users\wdorsey\qepc_project
RAW_EOIN     : C:\Users\wdorsey\qepc_project\data\raw\nba\eoin
CACHE_IMPORTS: C:\Users\wdorsey\qepc_project\cache\imports


In [2]:
# ==========================================================
# CELL 2 — DEPENDENCIES
# ==========================================================
# Needs:
# - pandas
# - pyarrow (parquet writing)
# - kagglehub (dataset download + caching)

def ensure_import(pkg: str, pip_name: str | None = None):
    try:
        __import__(pkg)
        print(f"✅ {pkg} already available")
        return
    except ImportError:
        pass

    pip_name = pip_name or pkg
    print(f"Installing {pip_name} ...")
    %pip install -q {pip_name}
    __import__(pkg)
    print(f"✅ {pkg} installed")

ensure_import("pandas")
ensure_import("pyarrow")
ensure_import("kagglehub")

import pandas as pd
import kagglehub

print("pandas:", pd.__version__)


✅ pandas already available
✅ pyarrow already available
✅ kagglehub already available
pandas: 2.2.3


In [3]:
# ==========================================================
# CELL 3 — DOWNLOAD DATASET VIA kagglehub
# ==========================================================
from pathlib import Path

DATASET_ID = "eoinamoore/historical-nba-data-and-player-box-scores"

# Credentials help:
# - Put kaggle.json in: ~/.kaggle/kaggle.json
#   Windows: C:\Users\<you>\.kaggle\kaggle.json
# - OR set env vars: KAGGLE_USERNAME and KAGGLE_KEY

eoin_kaggle_path = Path(kagglehub.dataset_download(DATASET_ID)).resolve()
print("Kaggle download folder:", eoin_kaggle_path)

csvs = sorted(eoin_kaggle_path.rglob("*.csv"))
print(f"Found {len(csvs)} CSV files under kagglehub folder.")
for p in csvs[:25]:
    print(" -", p.relative_to(eoin_kaggle_path))


Kaggle download folder: C:\Users\wdorsey\.cache\kagglehub\datasets\eoinamoore\historical-nba-data-and-player-box-scores\versions\313
Found 7 CSV files under kagglehub folder.
 - Games.csv
 - LeagueSchedule24_25.csv
 - LeagueSchedule25_26.csv
 - Players.csv
 - PlayerStatistics.csv
 - TeamHistories.csv
 - TeamStatistics.csv


In [4]:
# ==========================================================
# CELL 4 — COPY RAW EOIN CSVs INTO PROJECT (data/raw/nba/eoin)
# ==========================================================
import shutil

wanted_files = [
    "Games.csv",
    "Players.csv",
    "PlayerStatistics.csv",
    "TeamHistories.csv",
    "TeamStatistics.csv",
    "LeagueSchedule24_25.csv",
    "LeagueSchedule25_26.csv",
]

for name in wanted_files:
    matches = list(eoin_kaggle_path.rglob(name))
    if not matches:
        print(f"[WARN] {name} not found in Kaggle folder.")
        continue

    src = matches[0]
    dst = RAW_EOIN / name
    shutil.copy2(src, dst)
    print(f"Copied {src.name} → {dst}")

print("\nCSV files now in RAW_EOIN:")
for p in sorted(RAW_EOIN.glob("*.csv")):
    print(" -", p.name)


Copied Games.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\Games.csv
Copied Players.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\Players.csv
Copied PlayerStatistics.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\PlayerStatistics.csv
Copied TeamHistories.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\TeamHistories.csv
Copied TeamStatistics.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\TeamStatistics.csv
Copied LeagueSchedule24_25.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\LeagueSchedule24_25.csv
Copied LeagueSchedule25_26.csv → C:\Users\wdorsey\qepc_project\data\raw\nba\eoin\LeagueSchedule25_26.csv

CSV files now in RAW_EOIN:
 - Games.csv
 - LeagueSchedule24_25.csv
 - LeagueSchedule25_26.csv
 - Players.csv
 - PlayerStatistics.csv
 - TeamHistories.csv
 - TeamStatistics.csv


In [5]:
# ==========================================================
# CELL 5 — LOAD RAW CSVs
# ==========================================================
from pathlib import Path
import pandas as pd

def load_eoin_csv(filename: str, low_memory: bool = False) -> pd.DataFrame:
    path = RAW_EOIN / filename
    if not path.exists():
        raise FileNotFoundError(f"Missing {filename} at {path}. Run Cell 4 first.")
    df = pd.read_csv(path, low_memory=low_memory)
    print(f"Loaded {filename} → shape={df.shape}")
    return df

games_raw = load_eoin_csv("Games.csv", low_memory=False)
player_boxes_raw = load_eoin_csv("PlayerStatistics.csv", low_memory=True)
team_boxes_raw = load_eoin_csv("TeamStatistics.csv", low_memory=False)

display(games_raw.head())
display(player_boxes_raw.head())
display(team_boxes_raw.head())


Loaded Games.csv → shape=(72394, 17)


  df = pd.read_csv(path, low_memory=low_memory)


Loaded PlayerStatistics.csv → shape=(1641567, 35)
Loaded TeamStatistics.csv → shape=(144788, 48)


Unnamed: 0,gameId,gameDateTimeEst,hometeamCity,hometeamName,hometeamId,awayteamCity,awayteamName,awayteamId,homeScore,awayScore,winner,gameType,attendance,arenaId,gameLabel,gameSubLabel,seriesGameNumber
0,22500422,2025-12-23 17:30:00,LA,Clippers,1610612746,Houston,Rockets,1610612745,128,108,1610612746,,17927.0,,,,
1,22500421,2025-12-23 17:00:00,Sacramento,Kings,1610612758,Detroit,Pistons,1610612765,127,136,1610612765,,17832.0,,,,
2,22500420,2025-12-23 17:00:00,Portland,Trail Blazers,1610612757,Orlando,Magic,1610612753,106,110,1610612753,,17073.0,,,,
3,22500418,2025-12-23 16:00:00,Phoenix,Suns,1610612756,Los Angeles,Lakers,1610612747,132,108,1610612756,,17071.0,,,,
4,22500419,2025-12-23 16:00:00,Utah,Jazz,1610612762,Memphis,Grizzlies,1610612763,128,137,1610612763,,18186.0,,,,


Unnamed: 0,firstName,lastName,personId,gameId,gameDateTimeEst,playerteamCity,playerteamName,opponentteamCity,opponentteamName,gameType,...,threePointersPercentage,freeThrowsAttempted,freeThrowsMade,freeThrowsPercentage,reboundsDefensive,reboundsOffensive,reboundsTotal,foulsPersonal,turnovers,plusMinusPoints
0,Kris,Dunn,1627739,22500422,2025-12-23 17:30:00,LA,Clippers,Houston,Rockets,,...,0.429,1.0,0.0,0.0,4.0,0.0,4.0,1.0,0.0,17.0
1,John,Collins,1628381,22500422,2025-12-23 17:30:00,LA,Clippers,Houston,Rockets,,...,1.0,0.0,0.0,0.0,3.0,2.0,5.0,3.0,0.0,17.0
2,Aaron,Holiday,1628988,22500422,2025-12-23 17:30:00,Houston,Rockets,LA,Clippers,,...,0.333,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,-9.0
3,Josh,Okogie,1629006,22500422,2025-12-23 17:30:00,Houston,Rockets,LA,Clippers,,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,3.0
4,Jae'Sean,Tate,1630256,22500422,2025-12-23 17:30:00,Houston,Rockets,LA,Clippers,,...,0.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,0.0,0.0


Unnamed: 0,gameId,gameDateTimeEst,teamCity,teamName,teamId,opponentTeamCity,opponentTeamName,opponentTeamId,home,win,...,leadChanges,pointsFastBreak,pointsFromTurnovers,pointsInThePaint,pointsSecondChance,timesTied,timeoutsRemaining,seasonWins,seasonLosses,coachId
0,22500422,2025-12-23 17:30:00,Houston,Rockets,1610612745,LA,Clippers,1610612746,0,0,...,5.0,6.0,14.0,60.0,19.0,6.0,1.0,17.0,10.0,
1,22500422,2025-12-23 17:30:00,LA,Clippers,1610612746,Houston,Rockets,1610612745,1,1,...,5.0,11.0,16.0,44.0,14.0,6.0,1.0,8.0,21.0,
2,22500420,2025-12-23 17:00:00,Orlando,Magic,1610612753,Portland,Trail Blazers,1610612757,0,1,...,4.0,13.0,22.0,36.0,10.0,1.0,0.0,17.0,13.0,
3,22500420,2025-12-23 17:00:00,Portland,Trail Blazers,1610612757,Orlando,Magic,1610612753,1,0,...,4.0,28.0,14.0,58.0,16.0,1.0,0.0,12.0,18.0,
4,22500421,2025-12-23 17:00:00,Sacramento,Kings,1610612758,Detroit,Pistons,1610612765,1,0,...,9.0,8.0,16.0,48.0,31.0,1.0,1.0,7.0,23.0,


In [6]:
# ==========================================================
# CELL 6 — NORMALIZE COLUMN NAMES + DATETIME PARSER
# ==========================================================
import pandas as pd

def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out.columns = [
        str(c).strip()
        .lower()
        .replace(" ", "")
        .replace("(", "")
        .replace(")", "")
        for c in out.columns
    ]
    return out

def parse_game_datetime(series: pd.Series) -> pd.Series:
    # Parse ISO8601 strings into UTC datetimes (handles offsets like -04:00)
    dt = pd.to_datetime(series, format="ISO8601", utc=True, errors="coerce")
    n_bad = series.shape[0] - dt.notna().sum()
    if n_bad:
        print(f"[WARN] {n_bad} rows could not be parsed as datetimes.")
    return dt

games_norm = normalize_columns(games_raw)
player_boxes_norm = normalize_columns(player_boxes_raw)
team_boxes_norm = normalize_columns(team_boxes_raw)

print("games_norm columns (first 30):", list(games_norm.columns)[:30])
print("player_boxes_norm columns (first 30):", list(player_boxes_norm.columns)[:30])
print("team_boxes_norm columns (first 30):", list(team_boxes_norm.columns)[:30])


games_norm columns (first 30): ['gameid', 'gamedatetimeest', 'hometeamcity', 'hometeamname', 'hometeamid', 'awayteamcity', 'awayteamname', 'awayteamid', 'homescore', 'awayscore', 'winner', 'gametype', 'attendance', 'arenaid', 'gamelabel', 'gamesublabel', 'seriesgamenumber']
player_boxes_norm columns (first 30): ['firstname', 'lastname', 'personid', 'gameid', 'gamedatetimeest', 'playerteamcity', 'playerteamname', 'opponentteamcity', 'opponentteamname', 'gametype', 'gamelabel', 'gamesublabel', 'seriesgamenumber', 'win', 'home', 'numminutes', 'points', 'assists', 'blocks', 'steals', 'fieldgoalsattempted', 'fieldgoalsmade', 'fieldgoalspercentage', 'threepointersattempted', 'threepointersmade', 'threepointerspercentage', 'freethrowsattempted', 'freethrowsmade', 'freethrowspercentage', 'reboundsdefensive']
team_boxes_norm columns (first 30): ['gameid', 'gamedatetimeest', 'teamcity', 'teamname', 'teamid', 'opponentteamcity', 'opponentteamname', 'opponentteamid', 'home', 'win', 'teamscore', 'o

In [7]:
# ==========================================================
# CELL 7 — BUILD QEPC-READY TABLES (robust to column variations)
# ==========================================================
import numpy as np
import pandas as pd

def rename_if_present(df: pd.DataFrame, mapping: dict) -> pd.DataFrame:
    existing = {k: v for k, v in mapping.items() if k in df.columns}
    return df.rename(columns=existing)

def first_present(df: pd.DataFrame, candidates: list[str]) -> str:
    for c in candidates:
        if c in df.columns:
            return c
    raise KeyError(f"None of these columns exist: {candidates}")

# ---------- Games (game-level) ----------
games_qepc = games_norm.copy()
games_qepc = rename_if_present(games_qepc, {
    "gameid": "game_id",
    "gamedatetimeest": "game_datetime",
    "gamedatetimeutc": "game_datetime",
    "hometeamid": "home_team_id",
    "awayteamid": "away_team_id",
    "hometeamcity": "home_team_city",
    "hometeamname": "home_team_name",
    "awayteamcity": "away_team_city",
    "awayteamname": "away_team_name",
    "homescore": "home_score",
    "awayscore": "away_score",
})
dt_col = first_present(games_qepc, ["game_datetime", "gamedatetimeest", "gamedatetimeutc"])
games_qepc["game_datetime"] = parse_game_datetime(games_qepc[dt_col])
games_qepc["game_date"] = games_qepc["game_datetime"].dt.date

if "home_score" in games_qepc.columns and "away_score" in games_qepc.columns:
    games_qepc["winner_team_id"] = np.where(
        games_qepc["home_score"] > games_qepc["away_score"],
        games_qepc["home_team_id"],
        np.where(games_qepc["away_score"] > games_qepc["home_score"], games_qepc["away_team_id"], np.nan),
    )
games_qepc["is_final"] = True

core_games_cols = [
    "game_id","game_datetime","game_date",
    "home_team_id","away_team_id",
    "home_team_city","home_team_name",
    "away_team_city","away_team_name",
    "home_score","away_score",
    "winner_team_id","is_final"
]
games_qepc = games_qepc[[c for c in core_games_cols if c in games_qepc.columns] + [c for c in games_qepc.columns if c not in core_games_cols]]

# ---------- Player boxes (player-game) ----------
player_boxes_qepc = player_boxes_norm.copy()
player_boxes_qepc = rename_if_present(player_boxes_qepc, {
    "personid": "player_id",
    "gameid": "game_id",
    "gamedatetimeest": "game_datetime",
    "gamedatetimeutc": "game_datetime",
    "playerteamid": "team_id",
    "playerteamcity": "team_city",
    "playerteamname": "team_name",
    "opponentteamid": "opp_team_id",
    "opponentteamcity": "opp_team_city",
    "opponentteamname": "opp_team_name",
})
dt_col = first_present(player_boxes_qepc, ["game_datetime","gamedatetimeest","gamedatetimeutc"])
player_boxes_qepc["game_datetime"] = parse_game_datetime(player_boxes_qepc[dt_col])
player_boxes_qepc["game_date"] = player_boxes_qepc["game_datetime"].dt.date

if "numminutes" not in player_boxes_qepc.columns:
    if "minsplayed" in player_boxes_qepc.columns:
        player_boxes_qepc["numminutes"] = player_boxes_qepc["minsplayed"]
    elif "minutes" in player_boxes_qepc.columns:
        player_boxes_qepc["numminutes"] = player_boxes_qepc["minutes"]

# ---------- Team boxes (team-game) ----------
team_boxes_qepc = team_boxes_norm.copy()
team_boxes_qepc = rename_if_present(team_boxes_qepc, {
    "gameid": "game_id",
    "gamedatetimeest": "game_datetime",
    "gamedatetimeutc": "game_datetime",
    "teamid": "team_id",
    "teamcity": "team_city",
    "teamname": "team_name",
    "opponentteamid": "opp_team_id",
    "opponentteamcity": "opp_team_city",
    "opponentteamname": "opp_team_name",
    "teamscore": "teamscore",
    "opponentscore": "opponentscore",
})
dt_col = first_present(team_boxes_qepc, ["game_datetime","gamedatetimeest","gamedatetimeutc"])
team_boxes_qepc["game_datetime"] = parse_game_datetime(team_boxes_qepc[dt_col])
team_boxes_qepc["game_date"] = team_boxes_qepc["game_datetime"].dt.date

if "win" not in team_boxes_qepc.columns and "teamscore" in team_boxes_qepc.columns and "opponentscore" in team_boxes_qepc.columns:
    team_boxes_qepc["win"] = (team_boxes_qepc["teamscore"] > team_boxes_qepc["opponentscore"]).astype(int)

print("games_qepc shape:", games_qepc.shape)
print("player_boxes_qepc shape:", player_boxes_qepc.shape)
print("team_boxes_qepc shape:", team_boxes_qepc.shape)

display(games_qepc.head())
display(player_boxes_qepc.head())
display(team_boxes_qepc.head())


games_qepc shape: (72394, 20)
player_boxes_qepc shape: (1641567, 36)
team_boxes_qepc shape: (144788, 49)


Unnamed: 0,game_id,game_datetime,game_date,home_team_id,away_team_id,home_team_city,home_team_name,away_team_city,away_team_name,home_score,away_score,winner_team_id,is_final,winner,gametype,attendance,arenaid,gamelabel,gamesublabel,seriesgamenumber
0,22500422,2025-12-23 17:30:00+00:00,2025-12-23,1610612746,1610612745,LA,Clippers,Houston,Rockets,128,108,1610613000.0,True,1610612746,,17927.0,,,,
1,22500421,2025-12-23 17:00:00+00:00,2025-12-23,1610612758,1610612765,Sacramento,Kings,Detroit,Pistons,127,136,1610613000.0,True,1610612765,,17832.0,,,,
2,22500420,2025-12-23 17:00:00+00:00,2025-12-23,1610612757,1610612753,Portland,Trail Blazers,Orlando,Magic,106,110,1610613000.0,True,1610612753,,17073.0,,,,
3,22500418,2025-12-23 16:00:00+00:00,2025-12-23,1610612756,1610612747,Phoenix,Suns,Los Angeles,Lakers,132,108,1610613000.0,True,1610612756,,17071.0,,,,
4,22500419,2025-12-23 16:00:00+00:00,2025-12-23,1610612762,1610612763,Utah,Jazz,Memphis,Grizzlies,128,137,1610613000.0,True,1610612763,,18186.0,,,,


Unnamed: 0,firstname,lastname,player_id,game_id,game_datetime,team_city,team_name,opp_team_city,opp_team_name,gametype,...,freethrowsattempted,freethrowsmade,freethrowspercentage,reboundsdefensive,reboundsoffensive,reboundstotal,foulspersonal,turnovers,plusminuspoints,game_date
0,Kris,Dunn,1627739,22500422,2025-12-23 17:30:00+00:00,LA,Clippers,Houston,Rockets,,...,1.0,0.0,0.0,4.0,0.0,4.0,1.0,0.0,17.0,2025-12-23
1,John,Collins,1628381,22500422,2025-12-23 17:30:00+00:00,LA,Clippers,Houston,Rockets,,...,0.0,0.0,0.0,3.0,2.0,5.0,3.0,0.0,17.0,2025-12-23
2,Aaron,Holiday,1628988,22500422,2025-12-23 17:30:00+00:00,Houston,Rockets,LA,Clippers,,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,-9.0,2025-12-23
3,Josh,Okogie,1629006,22500422,2025-12-23 17:30:00+00:00,Houston,Rockets,LA,Clippers,,...,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,3.0,2025-12-23
4,Jae'Sean,Tate,1630256,22500422,2025-12-23 17:30:00+00:00,Houston,Rockets,LA,Clippers,,...,1.0,1.0,1.0,1.0,2.0,3.0,3.0,0.0,0.0,2025-12-23


Unnamed: 0,game_id,game_datetime,team_city,team_name,team_id,opp_team_city,opp_team_name,opp_team_id,home,win,...,pointsfastbreak,pointsfromturnovers,pointsinthepaint,pointssecondchance,timestied,timeoutsremaining,seasonwins,seasonlosses,coachid,game_date
0,22500422,2025-12-23 17:30:00+00:00,Houston,Rockets,1610612745,LA,Clippers,1610612746,0,0,...,6.0,14.0,60.0,19.0,6.0,1.0,17.0,10.0,,2025-12-23
1,22500422,2025-12-23 17:30:00+00:00,LA,Clippers,1610612746,Houston,Rockets,1610612745,1,1,...,11.0,16.0,44.0,14.0,6.0,1.0,8.0,21.0,,2025-12-23
2,22500420,2025-12-23 17:00:00+00:00,Orlando,Magic,1610612753,Portland,Trail Blazers,1610612757,0,1,...,13.0,22.0,36.0,10.0,1.0,0.0,17.0,13.0,,2025-12-23
3,22500420,2025-12-23 17:00:00+00:00,Portland,Trail Blazers,1610612757,Orlando,Magic,1610612753,1,0,...,28.0,14.0,58.0,16.0,1.0,0.0,12.0,18.0,,2025-12-23
4,22500421,2025-12-23 17:00:00+00:00,Sacramento,Kings,1610612758,Detroit,Pistons,1610612765,1,0,...,8.0,16.0,48.0,31.0,1.0,1.0,7.0,23.0,,2025-12-23


In [8]:
# ==========================================================
# CELL 8 — SAVE QEPC TABLES TO cache/imports
# ==========================================================
games_path = CACHE_IMPORTS / "eoin_games_qepc.parquet"
player_boxes_path = CACHE_IMPORTS / "eoin_player_boxes_qepc.parquet"
team_boxes_path = CACHE_IMPORTS / "eoin_team_boxes_qepc.parquet"

games_qepc.to_parquet(games_path, index=False)
player_boxes_qepc.to_parquet(player_boxes_path, index=False)
team_boxes_qepc.to_parquet(team_boxes_path, index=False)

print("Saved QEPC-ready Eoin data to:", CACHE_IMPORTS)
print(" -", games_path.name)
print(" -", player_boxes_path.name)
print(" -", team_boxes_path.name)


Saved QEPC-ready Eoin data to: C:\Users\wdorsey\qepc_project\cache\imports
 - eoin_games_qepc.parquet
 - eoin_player_boxes_qepc.parquet
 - eoin_team_boxes_qepc.parquet


In [9]:
# ==========================================================
# CELL 9 — VERIFY QEPC CAN LOAD THE NEW PARQUETS
# ==========================================================
from qepc.nba.eoin_data_source import (
    load_eoin_games,
    load_eoin_player_boxes,
    load_eoin_team_boxes,
    print_eoin_summary,
)

_ = load_eoin_games()
_ = load_eoin_player_boxes()
_ = load_eoin_team_boxes()

print_eoin_summary()
print("\n✅ Loaded back from qepc.nba.eoin_data_source successfully.")


=== Eoin / QEPC Data Summary ===
Games:            72394 rows, 20 columns
  game_datetime: 1946-11-26 23:00:00+00:00  →  2025-12-23 17:30:00+00:00
Player boxes:   1641567 rows, 36 columns
  game_datetime: 1946-11-26 23:00:00+00:00  →  2025-12-23 17:30:00+00:00
Team boxes:      144788 rows, 49 columns
  game_datetime: 1946-11-26 23:00:00+00:00  →  2025-12-23 17:30:00+00:00
Max season record seen in team_boxes: 68.0–65.0 (approx)

✅ Loaded back from qepc.nba.eoin_data_source successfully.


In [10]:
# ==========================================================
# CELL 10 — TIP
# ==========================================================
print("Tip: Use Notebook → 'Run All Cells' to execute top-to-bottom.")


Tip: Use Notebook → 'Run All Cells' to execute top-to-bottom.
