In [1]:
# 0. QEPC Notebook Bootstrap – Setup & Helpers

from pathlib import Path
import sys
import pandas as pd
from pandas.errors import EmptyDataError
from datetime import datetime

def find_project_root() -> Path:
    """
    Walk upwards from the current working directory until we find
    a folder that looks like the QEPC project root.
    """
    here = Path.cwd()
    for parent in [here] + list(here.parents):
        if (parent / "main.py").exists() and (parent / "qepc").exists():
            return parent
    return here

project_root = find_project_root()

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print("QEPC project root:", project_root)

# Try to load the shared notebook context (colors, helpers, etc.)
try:
    from notebook_context import *
    print("✅ notebook_context imported.")
except ModuleNotFoundError:
    print("⚠️ notebook_context not found; continuing with bare project_root only.")

# Where this notebook will read/write injury-related CSVs
data_dir = project_root / "data" / "injuries"
data_dir.mkdir(parents=True, exist_ok=True)
print("Injury data directory:", data_dir)


ℹ️ notebook_context not found on sys.path; using CWD as project_root.
project_root: C:\Users\wdorsey\qepc_project\notebooks\02_utilities
data_dir: C:\Users\wdorsey\qepc_project\notebooks\02_utilities\data


In [2]:
# 1. Official NBA injury report (nbainjuries)

try:
    from nbainjuries import injury
except ImportError:
    print("❌ nbainjuries is not installed in this environment.")
    print("   pip install nbainjuries  (and make sure Java/JVM is installed).")
    official_df = None
else:
    ts = datetime.now()
    print("Requesting official injury report for:", ts)

    # NOTE: Use the same logic you already had working here.
    # I'm assuming it returns a pandas DataFrame called inj_raw_df.

    # --- BEGIN: your existing nbainjuries fetch pattern ---
    # This is a template; if it doesn't match exactly, paste in the version
    # that you already used successfully.
    rep = injury.InjuryReport()        # may differ in your code
    inj_raw_df = rep.to_pandas()       # or whatever method you used
    # --- END: your existing nbainjuries fetch pattern ---

    print("Rows in raw official injury report:", len(inj_raw_df))
    display(inj_raw_df.head(10))

    # Normalize to QEPC schema (Team, PlayerName, Status, Injury, EstReturn)
    official_df = pd.DataFrame({
        "Team": inj_raw_df["Team"],
        "PlayerName": inj_raw_df["Player Name"],
        "Status": inj_raw_df["Current Status"],
        "Injury": inj_raw_df["Reason"],
        "EstReturn": "",  # official reports are per-game, no long ETA text
    })

    _save_source(official_df,
                 filename="Injury_Overrides.csv",
                 source_label="NBA_official_nbainjuries")


JVMNotFoundException: No JVM shared library file (jvm.dll) found. Try setting up the JAVA_HOME environment variable properly.

In [None]:
# 2. ESPN injuries API (optional)

import requests

espn_df = None

try:
    url = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/injuries"
    resp = requests.get(url, timeout=10)
    resp.raise_for_status()
    data = resp.json()
    print("Top-level ESPN keys:", list(data.keys()))
except Exception as e:
    print("❌ Error fetching ESPN injuries:", e)
else:
    # This part is very dependent on ESPN's current JSON structure.
    # Often it's something like data["injuries"] -> list of team blocks.
    injuries_blocks = data.get("injuries", [])
    records = []

    for team_block in injuries_blocks:
        team_name = team_block.get("team", {}).get("displayName", "")
        for item in team_block.get("injuries", []):
            player_name = item.get("athlete", {}).get("displayName", "")
            status = item.get("status", "")
            detail = item.get("detail", "")
            est_return = ""  # ESPN rarely gives precise dates here

            records.append({
                "Team": team_name,
                "PlayerName": player_name,
                "Status": status,
                "Injury": detail,
                "EstReturn": est_return,
            })

    if records:
        espn_df = pd.DataFrame(records)
        display(espn_df.head(20))
        _save_source(espn_df,
                     filename="Injury_Overrides_live_espn.csv",
                     source_label="ESPN")
    else:
        print("ℹ️ No ESPN injury records parsed; skipping save.")


In [None]:
# 3. Balldontlie injuries API (optional)

import requests

BALLDONTLIE_API_KEY = "c5ae7df3-682e-450c-b47e-f7e91396379e"  # <- replace with your actual key

def _bdl_headers():
    return {
        "Authorization": f"Bearer {BALLDONTLIE_API_KEY}",
        "Accept": "application/json",
    }

bdl_df = None

try:
    url = "https://api.balldontlie.io/v1/player_injuries"
    params = {"per_page": 100}
    resp = requests.get(url, headers=_bdl_headers(), params=params, timeout=10)
    resp.raise_for_status()
    js = resp.json()
    print("Balldontlie keys:", list(js.keys()))
except Exception as e:
    print("❌ Error fetching Balldontlie injuries:", e)
else:
    data = js.get("data", [])
    records = []
    for item in data:
        player = item.get("player", {})
        team = player.get("team", {})
        records.append({
            "Team": team.get("full_name", ""),
            "PlayerName": player.get("full_name", ""),
            "Status": item.get("status", ""),
            "Injury": item.get("description", ""),
            "EstReturn": item.get("return_date", "") or "",
        })

    if records:
        bdl_df = pd.DataFrame(records)
        display(bdl_df.head(20))
        _save_source(bdl_df,
                     filename="Injury_Overrides.csv",
                     source_label="Balldontlie")
    else:
        print("ℹ️ No Balldontlie injury records parsed; skipping save.")


In [None]:
# 4. Build master injury file from all sources

sources = [
    ("NBA_official_nbainjuries", data_dir / "Injury_Overrides.csv"),
    ("Balldontlie",              data_dir / "Injury_Overrides.csv"),
    ("ESPN",                     data_dir / "Injury_Overrides_live_espn.csv"),
    ("DataDriven",               data_dir / "Injury_Overrides.csv"),
    ("Manual",                   data_dir / "Injury_Overrides.csv"),
]

frames = []
for label, path in sources:
    if not path.exists() or path.stat().st_size == 0:
        print(f"⚠️ Skipping {label}: file missing or empty at {path}")
        continue

    try:
        df = pd.read_csv(path)
    except EmptyDataError:
        print(f"⚠️ Skipping {label}: EmptyDataError in {path.name}")
        continue

    if "Team" not in df.columns:
        print(f"⚠️ {label}: missing 'Team' column, skipping.")
        continue

    if "PlayerName" not in df.columns:
        if "Player" in df.columns:
            df["PlayerName"] = df["Player"]
        else:
            print(f"⚠️ {label}: missing 'PlayerName'/'Player', skipping.")
            continue

    for col, default in [
        ("Status", ""),
        ("Injury", ""),
        ("EstReturn", ""),
        ("Impact", 1.0),
    ]:
        if col not in df.columns:
            df[col] = default

    df["Source"] = label
    frames.append(df[["Team", "PlayerName", "Status", "Injury",
                      "EstReturn", "Impact", "Source"]])

if not frames:
    print("❌ No usable injury data found in any source; master file will NOT be updated.")
else:
    all_inj = pd.concat(frames, ignore_index=True)

    # Priority for conflicts: later in this list wins
    priority_order = ["Manual", "DataDriven", "NBA_official_nbainjuries", "Balldontlie", "ESPN"]
    priority_map = {name: rank for rank, name in enumerate(priority_order, start=1)}

    all_inj["priority"] = all_inj["Source"].map(priority_map).fillna(0)

    all_inj = (
        all_inj.sort_values(["Team", "PlayerName", "priority"], ascending=[True, True, False])
        .drop_duplicates(subset=["Team", "PlayerName"], keep="first")
        .drop(columns=["priority"])
        .reset_index(drop=True)
    )

    master_path = data_dir / "Injury_Overrides.csv"
    all_inj.to_csv(master_path, index=False)

    print("✅ Master injury file built.")
    print("   Rows in master:", len(all_inj))
    print("   Saved to:", master_path)
    display(all_inj.head(20))
