In [7]:
import sys
from pathlib import Path
import pandas as pd

# NBA API imports
from nba_api.stats.endpoints import ScoreboardV2, LeagueDashTeamStats

# --------------------------------------------------------------------
# 0) Robustly find QEPC project root and attach it
# --------------------------------------------------------------------
project_root = Path.cwd()

# Walk up until we find qepc_autoload.py or qepc/ package
for _ in range(10):
    if (project_root / "qepc_autoload.py").exists() or (project_root / "qepc").is_dir():
        break
    if project_root.parent == project_root:
        break  # reached filesystem root
    project_root = project_root.parent

sys.path.insert(0, str(project_root))

print(f"[QEPC Live] Project root: {project_root}")

try:
    import qepc_autoload  # side-effect: sets paths + prints banner
    from qepc.autoload.paths import get_data_dir
except ImportError as e:
    print("‚ùå Could not import qepc_autoload or qepc.autoload.paths.")
    print("   project_root used:", project_root)
    print("   sys.path[0]:", sys.path[0])
    raise e

data_dir = get_data_dir()
live_dir = data_dir / "live"
live_dir.mkdir(parents=True, exist_ok=True)

print(f"[QEPC Live] data_dir: {data_dir}")
print(f"[QEPC Live] live_dir: {live_dir}")


# Helper: current NBA season string in NBA API format
def current_season_str() -> str:
    """
    Return current NBA season string like '2024-25'.
    """
    from datetime import datetime, timezone

    today = datetime.now(timezone.utc)
    year = today.year
    # NBA season usually starts in Oct; if month < 7, we are in the tail of previous season
    if today.month < 7:
        start = year - 1
        end = year
    else:
        start = year
        end = year + 1
    return f"{start}-{str(end)[-2:]}"


# --------------------------------------------------------------------
# 1) Fetch today's games from NBA API (ScoreboardV2)
# --------------------------------------------------------------------
print("\nüì° Fetching today's games from NBA API (ScoreboardV2)...")

sb = ScoreboardV2()
games_df = sb.game_header.get_data_frame()
lines_df = sb.line_score.get_data_frame()

print(f"[Scoreboard] Games returned: {len(games_df)}")
print(f"[Scoreboard] line_score columns: {list(lines_df.columns)}")

if len(games_df) == 0:
    print("‚ö†Ô∏è No games found for today from ScoreboardV2.")
else:
    # TEAM_NAME is always present; TEAM_NICKNAME may not be
    team_name_col = "TEAM_NICKNAME" if "TEAM_NICKNAME" in lines_df.columns else "TEAM_NAME"

    # Join line_score with game_header to get HOME_TEAM_ID / VISITOR_TEAM_ID
    merged_ls = lines_df.merge(
        games_df[["GAME_ID", "HOME_TEAM_ID", "VISITOR_TEAM_ID"]],
        on="GAME_ID",
        how="left",
    )

    # Debug: show how IDs look
    print("\n[Debug] Sample merged line_score + header IDs:")
    print(
        merged_ls[
            ["GAME_ID", "TEAM_ID", "HOME_TEAM_ID", "VISITOR_TEAM_ID"]
        ].head()
    )

    # Select the columns we care about
    teams_df = merged_ls[
        [
            "GAME_ID",
            "TEAM_ID",
            "TEAM_ABBREVIATION" if "TEAM_ABBREVIATION" in merged_ls.columns else "TEAM_ID",
            "TEAM_CITY_NAME",
            team_name_col,
            "PTS",
            "HOME_TEAM_ID",
            "VISITOR_TEAM_ID",
        ]
    ].copy()

    # Normalize column names
    if "TEAM_ABBREVIATION" in teams_df.columns:
        teams_df = teams_df.rename(columns={"TEAM_ABBREVIATION": "TEAM_ABBREV"})
    else:
        teams_df = teams_df.rename(columns={"TEAM_ID": "TEAM_ABBREV"})

    teams_df = teams_df.rename(columns={team_name_col: "TEAM_NAME"})

    # Make sure IDs are comparable (string-safe)
    teams_df["TEAM_ID_str"] = teams_df["TEAM_ID"].astype(str)
    teams_df["HOME_ID_str"] = teams_df["HOME_TEAM_ID"].astype(str)
    teams_df["VIS_ID_str"] = teams_df["VISITOR_TEAM_ID"].astype(str)

    teams_df["is_home"] = teams_df["TEAM_ID_str"] == teams_df["HOME_ID_str"]

    home_side = teams_df[teams_df["is_home"]].copy()
    away_side = teams_df[~teams_df["is_home"]].copy()

    print(f"\n[Scoreboard] home_side rows: {len(home_side)}, away_side rows: {len(away_side)}")

    # Fallback: if something went weird and we didn't detect home/away correctly,
    # we can approximate by taking first team as home, second as away per game.
    if home_side.empty or away_side.empty:
        print("‚ö†Ô∏è Home/away detection via IDs failed; using fallback ordering.")
        # Sort by GAME_ID and TEAM_ID; first row per game = home, second = away
        tmp = teams_df.sort_values(["GAME_ID", "TEAM_ID_str"]).copy()
        tmp["rank"] = tmp.groupby("GAME_ID").cumcount()
        home_side = tmp[tmp["rank"] == 0].copy()
        away_side = tmp[tmp["rank"] == 1].copy()

    # Merge into one row per game
    merged = (
        games_df[["GAME_ID", "GAME_DATE_EST"]]
        .merge(
            home_side[
                [
                    "GAME_ID",
                    "TEAM_ID",
                    "TEAM_ABBREV",
                    "TEAM_CITY_NAME",
                    "TEAM_NAME",
                    "PTS",
                ]
            ],
            on="GAME_ID",
            how="left",
            suffixes=("", "_home"),
        )
        .rename(
            columns={
                "TEAM_ID": "HOME_TEAM_ID",
                "TEAM_ABBREV": "HOME_TEAM_ABBREV",
                "TEAM_CITY_NAME": "HOME_TEAM_CITY",
                "TEAM_NAME": "HOME_TEAM_NAME",
                "PTS": "HOME_PTS",
            }
        )
        .merge(
            away_side[
                [
                    "GAME_ID",
                    "TEAM_ID",
                    "TEAM_ABBREV",
                    "TEAM_CITY_NAME",
                    "TEAM_NAME",
                    "PTS",
                ]
            ],
            on="GAME_ID",
            how="left",
            suffixes=("", "_away"),
        )
        .rename(
            columns={
                "TEAM_ID": "AWAY_TEAM_ID",
                "TEAM_ABBREV": "AWAY_TEAM_ABBREV",
                "TEAM_CITY_NAME": "AWAY_TEAM_CITY",
                "TEAM_NAME": "AWAY_TEAM_NAME",
                "PTS": "AWAY_PTS",
            }
        )
    )

    # QEPC-friendly names
    merged["HOME_TEAM_QEPC"] = merged["HOME_TEAM_CITY"] + " " + merged["HOME_TEAM_NAME"]
    merged["AWAY_TEAM_QEPC"] = merged["AWAY_TEAM_CITY"] + " " + merged["AWAY_TEAM_NAME"]

    games_out = live_dir / "games_today_nba_api.csv"
    merged.to_csv(games_out, index=False)
    print(f"\n‚úÖ Saved today's games to {games_out}")

    display(
        merged[
            [
                "GAME_DATE_EST",
                "HOME_TEAM_QEPC",
                "AWAY_TEAM_QEPC",
                "HOME_PTS",
                "AWAY_PTS",
            ]
        ]
    )


# --------------------------------------------------------------------
# 2) Fetch live team advanced stats for current season
# --------------------------------------------------------------------
season_str = current_season_str()
print(f"\nüì° Fetching LeagueDashTeamStats for season: {season_str}")

team_stats = LeagueDashTeamStats(
    season=season_str,
    measure_type_detailed_defense="Advanced",  # OFF_RATING, DEF_RATING, NET_RATING, PACE, etc.
)
team_df = team_stats.get_data_frames()[0]
print(f"[LeagueDashTeamStats] Teams returned: {len(team_df)}")
print("[LeagueDashTeamStats] Columns:", list(team_df.columns))

# Required columns we expect from Advanced:
required_cols = [
    "TEAM_ID",
    "TEAM_NAME",
    "OFF_RATING",
    "DEF_RATING",
    "NET_RATING",
    "PACE",
]
missing_required = [c for c in required_cols if c not in team_df.columns]
if missing_required:
    raise RuntimeError(f"LeagueDashTeamStats missing required columns: {missing_required}")

# Optional columns we keep if present
optional_cols = [c for c in ["TEAM_ABBREVIATION", "W", "L", "GP"] if c in team_df.columns]

cols_keep = required_cols + optional_cols

live_team_stats = team_df[cols_keep].copy()

live_team_stats = live_team_stats.rename(
    columns={
        "TEAM_NAME": "Team",
        "OFF_RATING": "ORtg_live",
        "DEF_RATING": "DRtg_live",
        "NET_RATING": "NetRtg_live",
        "PACE": "Pace_live",
        "W": "Wins" if "W" in live_team_stats.columns else "Wins",
        "L": "Losses" if "L" in live_team_stats.columns else "Losses",
        "GP": "GamesPlayed" if "GP" in live_team_stats.columns else "GamesPlayed",
    }
)

live_team_stats["Season"] = season_str

team_stats_out = live_dir / "team_stats_live_nba_api.csv"
live_team_stats.to_csv(team_stats_out, index=False)
print(f"‚úÖ Saved live team stats to {team_stats_out}")

display(live_team_stats.head(10))


[QEPC Live] Project root: C:\Users\wdors\qepc_project
[QEPC Live] data_dir: C:\Users\wdors\qepc_project\data
[QEPC Live] live_dir: C:\Users\wdors\qepc_project\data\live

üì° Fetching today's games from NBA API (ScoreboardV2)...
[Scoreboard] Games returned: 8
[Scoreboard] line_score columns: ['GAME_DATE_EST', 'GAME_SEQUENCE', 'GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY_NAME', 'TEAM_NAME', 'TEAM_WINS_LOSSES', 'PTS_QTR1', 'PTS_QTR2', 'PTS_QTR3', 'PTS_QTR4', 'PTS_OT1', 'PTS_OT2', 'PTS_OT3', 'PTS_OT4', 'PTS_OT5', 'PTS_OT6', 'PTS_OT7', 'PTS_OT8', 'PTS_OT9', 'PTS_OT10', 'PTS', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 'AST', 'REB', 'TOV']

[Debug] Sample merged line_score + header IDs:
Empty DataFrame
Columns: [GAME_ID, TEAM_ID, HOME_TEAM_ID, VISITOR_TEAM_ID]
Index: []

[Scoreboard] home_side rows: 0, away_side rows: 0
‚ö†Ô∏è Home/away detection via IDs failed; using fallback ordering.

‚úÖ Saved today's games to C:\Users\wdors\qepc_project\data\live\games_today_nba_api.csv


Unnamed: 0,GAME_DATE_EST,HOME_TEAM_QEPC,AWAY_TEAM_QEPC,HOME_PTS,AWAY_PTS
0,2025-11-30T00:00:00,,,,
1,2025-11-30T00:00:00,,,,
2,2025-11-30T00:00:00,,,,
3,2025-11-30T00:00:00,,,,
4,2025-11-30T00:00:00,,,,
5,2025-11-30T00:00:00,,,,
6,2025-11-30T00:00:00,,,,
7,2025-11-30T00:00:00,,,,



üì° Fetching LeagueDashTeamStats for season: 2025-26
[LeagueDashTeamStats] Teams returned: 30
[LeagueDashTeamStats] Columns: ['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'E_OFF_RATING', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT', 'AST_TO', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'OFF_RATING_RANK', 'DEF_RATING_RANK', 'NET_RATING_RANK', 'AST_PCT_RANK', 'AST_TO_RANK', 'AST_RATIO_RANK', 'OREB_PCT_RANK', 'DREB_PCT_RANK', 'REB_PCT_RANK', 'TM_TOV_PCT_RANK', 'EFG_PCT_RANK', 'TS_PCT_RANK', 'PACE_RANK', 'PIE_RANK']
‚úÖ Saved live team stats to C:\Users\wdors\qepc_project\data\live\team_stats_live_nba_api.csv


Unnamed: 0,TEAM_ID,Team,ORtg_live,DRtg_live,NetRtg_live,Pace_live,Wins,Losses,GamesPlayed,Season
0,1610612737,Atlanta Hawks,115.5,113.6,1.9,101.9,12,8,20,2025-26
1,1610612738,Boston Celtics,119.0,114.7,4.3,96.47,10,9,19,2025-26
2,1610612751,Brooklyn Nets,111.6,122.4,-10.8,97.29,3,16,19,2025-26
3,1610612766,Charlotte Hornets,114.6,118.8,-4.2,100.08,6,14,20,2025-26
4,1610612741,Chicago Bulls,113.5,116.8,-3.3,104.38,9,10,19,2025-26
5,1610612739,Cleveland Cavaliers,115.5,112.2,3.3,102.47,12,8,20,2025-26
6,1610612742,Dallas Mavericks,106.0,111.1,-5.1,102.55,6,15,21,2025-26
7,1610612743,Denver Nuggets,123.9,114.2,9.7,100.21,14,5,19,2025-26
8,1610612765,Detroit Pistons,117.1,111.3,5.8,101.82,16,4,20,2025-26
9,1610612744,Golden State Warriors,113.0,111.8,1.3,100.98,11,10,21,2025-26


In [5]:
import requests
import pandas as pd
from datetime import datetime, date
from pathlib import Path

from qepc.autoload.paths import get_data_dir

def fetch_espn_nba_scoreboard(target_date: date | None = None) -> pd.DataFrame:
    """
    Pull ESPN NBA scoreboard for a given date and normalize to:
      gameDate, Home Team, Away Team, gameId
    """
    if target_date is None:
        target_date = date.today()

    # ESPN NBA scoreboard endpoint (unofficial but widely used)
    url = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard"

    # ESPN uses 'dates=YYYYMMDD' or omits it for "today"
    params = {"dates": target_date.strftime("%Y%m%d")}

    resp = requests.get(url, params=params, timeout=10)
    resp.raise_for_status()
    data = resp.json()

    events = data.get("events", [])
    rows = []

    for ev in events:
        game_id = ev.get("id")
        # event date
        ev_date_raw = ev.get("date")
        game_dt = pd.to_datetime(ev_date_raw, errors="coerce")

        comps = ev.get("competitions", [])
        if not comps:
            continue

        comp = comps[0]
        competitors = comp.get("competitors", [])
        if len(competitors) != 2:
            continue

        home_team_name = None
        away_team_name = None

        for c in competitors:
            team_info = c.get("team", {}) or {}
            display_name = team_info.get("displayName")  # "Boston Celtics"
            if c.get("homeAway") == "home":
                home_team_name = display_name
            elif c.get("homeAway") == "away":
                away_team_name = display_name

        if not home_team_name or not away_team_name:
            continue

        rows.append(
            {
                "gameDate": game_dt,
                "Home Team": home_team_name,
                "Away Team": away_team_name,
                "gameId": game_id,
            }
        )

    df = pd.DataFrame(rows)
    return df

# --- TEST + SAVE TO data/live/espn_scoreboard_today.csv ---
data_dir = get_data_dir()
live_dir = Path(data_dir) / "live"
live_dir.mkdir(parents=True, exist_ok=True)

today_games_espn = fetch_espn_nba_scoreboard()
print(today_games_espn.head())

out_path = live_dir / "espn_scoreboard_today.csv"
today_games_espn.to_csv(out_path, index=False)
print(f"Saved ESPN scoreboard to {out_path}")


                   gameDate               Home Team              Away Team  \
0 2025-11-30 20:00:00+00:00               Utah Jazz        Houston Rockets   
1 2025-11-30 23:00:00+00:00     Cleveland Cavaliers         Boston Celtics   
2 2025-11-30 23:00:00+00:00         New York Knicks        Toronto Raptors   
3 2025-11-30 23:00:00+00:00      Philadelphia 76ers          Atlanta Hawks   
4 2025-11-30 23:00:00+00:00  Portland Trail Blazers  Oklahoma City Thunder   

      gameId  
0  401810156  
1  401810157  
2  401810158  
3  401810159  
4  401810160  
Saved ESPN scoreboard to C:\Users\wdors\qepc_project\data\live\espn_scoreboard_today.csv


In [6]:
import requests
import pandas as pd
from pathlib import Path
from datetime import datetime

from qepc.autoload.paths import get_data_dir

BALLDONTLIE_BASE = "https://api.balldontlie.io/v1"
BALLDONTLIE_API_KEY = "c5ae7df3-682e-450c-b47e-f7e91396379e"  # or read from env

def fetch_balldontlie_games_for_season(season: int, per_page: int = 100) -> pd.DataFrame:
    """
    Fetch all regular-season games for a given season from balldontlie.
    Returns columns similar to your Games.csv:
      Date, Time, Away Team, Home Team, Venue, Notes
    """
    headers = {}
    if BALLDONTLIE_API_KEY:
        headers["Authorization"] = f"Bearer {BALLDONTLIE_API_KEY}"

    page = 1
    all_rows = []

    while True:
        params = {
            "seasons[]": season,
            "per_page": per_page,
            "page": page,
            # you can filter type if needed; depends on their schema
        }
        resp = requests.get(f"{BALLDONTLIE_BASE}/games", params=params, headers=headers, timeout=10)
        resp.raise_for_status()
        data = resp.json()

        games = data.get("data", [])
        if not games:
            break

        for g in games:
            # balldontlie dates are usually ISO strings
            dt = pd.to_datetime(g.get("date"), errors="coerce")

            home_team = g.get("home_team", {}) or {}
            away_team = g.get("visitor_team", {}) or {}

            home_name = home_team.get("full_name") or home_team.get("name")
            away_name = away_team.get("full_name") or away_team.get("name")

            # You can refine Venue / Notes later if needed
            all_rows.append(
                {
                    "Date": dt.date().isoformat() if pd.notna(dt) else None,
                    "Time": dt.time().strftime("%I:%M %p") if pd.notna(dt) else None,
                    "Away Team": away_name,
                    "Home Team": home_name,
                    "Venue": "",
                    "Notes": g.get("season"),  # placeholder, often regular season
                }
            )

        page += 1
        if page > data.get("meta", {}).get("total_pages", page):
            break

    return pd.DataFrame(all_rows)

# --- TEST + SAVE ---
data_dir = get_data_dir()
raw_dir = Path(data_dir) / "raw"
raw_dir.mkdir(parents=True, exist_ok=True)

games_2025 = fetch_balldontlie_games_for_season(2025)
print(games_2025.head())

out_path = raw_dir / "Games_balldontlie_2025.csv"
games_2025.to_csv(out_path, index=False)
print(f"Saved balldontlie games to {out_path}")


HTTPError: 429 Client Error: Too Many Requests for url: https://api.balldontlie.io/v1/games?seasons%5B%5D=2025&per_page=100&page=6