In [None]:
from qepc.brain.games_loader import fetch_league_games, build_games_table
from qepc.brain.boxscores_loader import fetch_boxscores_for_games

team_games = fetch_league_games("2023-24")
games = build_games_table(team_games)

# just 3 games to test
game_ids = games["GAME_ID"].unique().tolist()[:3]

trad_df, adv_df = fetch_boxscores_for_games(game_ids, sleep_seconds=0.8, verbose=True)

trad_df.head(), adv_df.head()


In [None]:
import pandas as pd

def normalize_boxscore_cols(df: pd.DataFrame) -> pd.DataFrame:
    """
    Make the most important ID columns consistent and upper-snake-case.
    Avoid creating duplicate columns if GAME_ID is already present.
    """
    df = df.copy()

    # If GAME_ID already exists (added in boxscores_loader),
    # we don't want to rename gameId -> GAME_ID because that would duplicate it.
    if "GAME_ID" in df.columns and "gameId" in df.columns:
        df = df.drop(columns=["gameId"])

    # Rename ID columns only if they exist and won't conflict
    rename_map = {}
    if "gameId" in df.columns and "GAME_ID" not in df.columns:
        rename_map["gameId"] = "GAME_ID"
    if "teamId" in df.columns and "TEAM_ID" not in df.columns:
        rename_map["teamId"] = "TEAM_ID"
    if "personId" in df.columns and "PLAYER_ID" not in df.columns:
        rename_map["personId"] = "PLAYER_ID"

    if rename_map:
        df.rename(columns=rename_map, inplace=True)

    return df


In [None]:
trad_norm = normalize_boxscore_cols(trad_df)
adv_norm = normalize_boxscore_cols(adv_df)

display(trad_norm.head())
display(adv_norm.head())


In [None]:
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parents[1]
out_dir = PROJECT_ROOT / "data" / "raw" / "nba" / "boxscores"
out_dir.mkdir(parents=True, exist_ok=True)

trad_norm.to_parquet(out_dir / "boxscores_traditional_2023-24_sample.parquet", index=False)
adv_norm.to_parquet(out_dir / "boxscores_advanced_2023-24_sample.parquet", index=False)

print("Saved to:", out_dir)
