In [1]:
# IMPROVED FIRST CELL FOR injury_data_fetch.ipynb
# Replace the entire first cell with this code

# 0. QEPC Injury Data Fetch ‚Äì Setup & Helpers

from pathlib import Path
import pandas as pd
from pandas.errors import EmptyDataError
from datetime import datetime
import sys
import os

# IMPROVED: Properly find project root
try:
    from notebook_context import *
    print("‚úÖ notebook_context imported successfully")
except ModuleNotFoundError:
    print("‚ÑπÔ∏è  notebook_context not found, searching for project root...")
    
    # Walk up directories to find project root
    current = Path.cwd()
    project_root = None
    
    # Try current directory and up to 3 parents
    search_paths = [current, current.parent, current.parent.parent, current.parent.parent.parent]
    
    for parent in search_paths:
        # Check for project markers (qepc/ folder AND main.py file)
        if (parent / "qepc").is_dir() and (parent / "main.py").exists():
            project_root = parent
            print(f"   ‚úÖ Found project root: {project_root}")
            
            # Add to path so imports work
            if str(parent) not in sys.path:
                sys.path.insert(0, str(parent))
            
            # Change to project root for consistency
            os.chdir(parent)
            print(f"   üìÇ Changed working directory to project root")
            break
    
    if project_root is None:
        print("‚ö†Ô∏è  Could not find project root markers (qepc/ + main.py)")
        print(f"   Using current directory: {current}")
        project_root = current

# Verify project_root is defined
try:
    project_root
except NameError:
    project_root = Path.cwd()
    print(f"‚ö†Ô∏è  project_root not in context, using CWD: {project_root}")

print(f"\nüìÅ Project Root: {project_root}")

# CRITICAL: Data folder must be at project root level
data_dir = project_root / "data"
data_dir.mkdir(parents=True, exist_ok=True)

print(f"üìä Data Directory: {data_dir}")

# Verify we're in the right place
if not (project_root / "qepc").exists():
    print("\n‚ö†Ô∏è  WARNING: qepc/ folder not found at project root!")
    print("   This might indicate wrong project_root")
else:
    print("‚úÖ Project structure verified")

print("\n" + "="*60)


def status_to_impact(status: str) -> float:
    """
    Map live injury status text -> QEPC Impact factor.
    1.00 = no impact, lower = worse.
    """
    if not isinstance(status, str):
        return 0.95

    s = status.lower()

    if "out" in s:
        return 0.70   # big impact
    if "doubtful" in s:
        return 0.85
    if "questionable" in s:
        return 0.90
    if "probable" in s:
        return 0.95
    if "available" in s or "cleared" in s or "active" in s:
        return 1.00

    return 0.95  # default mild discount


def _save_source(df: pd.DataFrame, filename: str, source_label: str) -> Path:
    """
    Normalize columns and save a single-source injury file.
    Expected columns coming in (at minimum): Team, PlayerName, Status, Injury, EstReturn.

    Adds Impact + Source and writes to data_dir/filename.
    """
    required = ["Team", "PlayerName", "Status", "Injury", "EstReturn"]
    for col in required:
        if col not in df.columns:
            raise ValueError(f"Source {source_label} missing required column: {col}")

    out = df.copy()
    out["Impact"] = out["Status"].apply(status_to_impact)
    out["Source"] = source_label

    path = data_dir / filename
    out.to_csv(path, index=False)
    print(f"‚úÖ Saved {source_label} injuries to: {path.name} ({len(out)} rows)")
    return path

‚ÑπÔ∏è  notebook_context not found, searching for project root...
   ‚úÖ Found project root: /home/2dbcc135-5358-4730-8441-82ada9ea8087/qepc_project
   üìÇ Changed working directory to project root

üìÅ Project Root: /home/2dbcc135-5358-4730-8441-82ada9ea8087/qepc_project
üìä Data Directory: /home/2dbcc135-5358-4730-8441-82ada9ea8087/qepc_project/data
‚úÖ Project structure verified



In [2]:
# 1. Official NBA injury report (nbainjuries)

try:
    from nbainjuries import injury
except ImportError:
    print("‚ùå nbainjuries is not installed in this environment.")
    print("   pip install nbainjuries  (and make sure Java/JVM is installed).")
    official_df = None
else:
    ts = datetime.now()
    print("Requesting official injury report for:", ts)

    # NOTE: Use the same logic you already had working here.
    # I'm assuming it returns a pandas DataFrame called inj_raw_df.

    # --- BEGIN: your existing nbainjuries fetch pattern ---
    # This is a template; if it doesn't match exactly, paste in the version
    # that you already used successfully.
    rep = injury.InjuryReport()        # may differ in your code
    inj_raw_df = rep.to_pandas()       # or whatever method you used
    # --- END: your existing nbainjuries fetch pattern ---

    print("Rows in raw official injury report:", len(inj_raw_df))
    display(inj_raw_df.head(10))

    # Normalize to QEPC schema (Team, PlayerName, Status, Injury, EstReturn)
    official_df = pd.DataFrame({
        "Team": inj_raw_df["Team"],
        "PlayerName": inj_raw_df["Player Name"],
        "Status": inj_raw_df["Current Status"],
        "Injury": inj_raw_df["Reason"],
        "EstReturn": "",  # official reports are per-game, no long ETA text
    })

    _save_source(official_df,
                 filename="Injury_Overrides.csv",
                 source_label="NBA_official_nbainjuries")


‚ùå nbainjuries is not installed in this environment.
   pip install nbainjuries  (and make sure Java/JVM is installed).


In [3]:
# 2. ESPN injuries API (optional)

import requests

espn_df = None

try:
    url = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/injuries"
    resp = requests.get(url, timeout=10)
    resp.raise_for_status()
    data = resp.json()
    print("Top-level ESPN keys:", list(data.keys()))
except Exception as e:
    print("‚ùå Error fetching ESPN injuries:", e)
else:
    # This part is very dependent on ESPN's current JSON structure.
    # Often it's something like data["injuries"] -> list of team blocks.
    injuries_blocks = data.get("injuries", [])
    records = []

    for team_block in injuries_blocks:
        team_name = team_block.get("team", {}).get("displayName", "")
        for item in team_block.get("injuries", []):
            player_name = item.get("athlete", {}).get("displayName", "")
            status = item.get("status", "")
            detail = item.get("detail", "")
            est_return = ""  # ESPN rarely gives precise dates here

            records.append({
                "Team": team_name,
                "PlayerName": player_name,
                "Status": status,
                "Injury": detail,
                "EstReturn": est_return,
            })

    if records:
        espn_df = pd.DataFrame(records)
        display(espn_df.head(20))
        _save_source(espn_df,
                     filename="Injury_Overrides_live_espn.csv",
                     source_label="ESPN")
    else:
        print("‚ÑπÔ∏è No ESPN injury records parsed; skipping save.")


Top-level ESPN keys: ['timestamp', 'status', 'season', 'injuries']


Unnamed: 0,Team,PlayerName,Status,Injury,EstReturn
0,,N'Faly Dante,Out,,
1,,Trae Young,Out,,
2,,Neemias Queta,Day-To-Day,,
3,,Jayson Tatum,Out,,
4,,Michael Porter Jr.,Out,,
5,,Ben Saraf,Out,,
6,,Cam Thomas,Out,,
7,,Haywood Highsmith,Out,,
8,,Pat Connaughton,Out,,
9,,Josh Green,Out,,


‚úÖ Saved ESPN injuries to: Injury_Overrides_live_espn.csv (108 rows)


In [4]:
# 3. Balldontlie injuries API (optional)

import requests

BALLDONTLIE_API_KEY = "c5ae7df3-682e-450c-b47e-f7e91396379e"  # <- replace with your actual key

def _bdl_headers():
    return {
        "Authorization": f"Bearer {BALLDONTLIE_API_KEY}",
        "Accept": "application/json",
    }

bdl_df = None

try:
    url = "https://api.balldontlie.io/v1/player_injuries"
    params = {"per_page": 100}
    resp = requests.get(url, headers=_bdl_headers(), params=params, timeout=10)
    resp.raise_for_status()
    js = resp.json()
    print("Balldontlie keys:", list(js.keys()))
except Exception as e:
    print("‚ùå Error fetching Balldontlie injuries:", e)
else:
    data = js.get("data", [])
    records = []
    for item in data:
        player = item.get("player", {})
        team = player.get("team", {})
        records.append({
            "Team": team.get("full_name", ""),
            "PlayerName": player.get("full_name", ""),
            "Status": item.get("status", ""),
            "Injury": item.get("description", ""),
            "EstReturn": item.get("return_date", "") or "",
        })

    if records:
        bdl_df = pd.DataFrame(records)
        display(bdl_df.head(20))
        _save_source(bdl_df,
                     filename="Injury_Overrides.csv",
                     source_label="Balldontlie")
    else:
        print("‚ÑπÔ∏è No Balldontlie injury records parsed; skipping save.")


‚ùå Error fetching Balldontlie injuries: 401 Client Error: Unauthorized for url: https://api.balldontlie.io/v1/player_injuries?per_page=100


In [5]:
# 4. Build master injury file from all sources

sources = [
    ("NBA_official_nbainjuries", data_dir / "Injury_Overrides.csv"),
    ("Balldontlie",              data_dir / "Injury_Overrides.csv"),
    ("ESPN",                     data_dir / "Injury_Overrides_live_espn.csv"),
    ("DataDriven",               data_dir / "Injury_Overrides.csv"),
    ("Manual",                   data_dir / "Injury_Overrides.csv"),
]

frames = []
for label, path in sources:
    if not path.exists() or path.stat().st_size == 0:
        print(f"‚ö†Ô∏è Skipping {label}: file missing or empty at {path}")
        continue

    try:
        df = pd.read_csv(path)
    except EmptyDataError:
        print(f"‚ö†Ô∏è Skipping {label}: EmptyDataError in {path.name}")
        continue

    if "Team" not in df.columns:
        print(f"‚ö†Ô∏è {label}: missing 'Team' column, skipping.")
        continue

    if "PlayerName" not in df.columns:
        if "Player" in df.columns:
            df["PlayerName"] = df["Player"]
        else:
            print(f"‚ö†Ô∏è {label}: missing 'PlayerName'/'Player', skipping.")
            continue

    for col, default in [
        ("Status", ""),
        ("Injury", ""),
        ("EstReturn", ""),
        ("Impact", 1.0),
    ]:
        if col not in df.columns:
            df[col] = default

    df["Source"] = label
    frames.append(df[["Team", "PlayerName", "Status", "Injury",
                      "EstReturn", "Impact", "Source"]])

if not frames:
    print("‚ùå No usable injury data found in any source; master file will NOT be updated.")
else:
    all_inj = pd.concat(frames, ignore_index=True)

    # Priority for conflicts: later in this list wins
    priority_order = ["Manual", "DataDriven", "NBA_official_nbainjuries", "Balldontlie", "ESPN"]
    priority_map = {name: rank for rank, name in enumerate(priority_order, start=1)}

    all_inj["priority"] = all_inj["Source"].map(priority_map).fillna(0)

    all_inj = (
        all_inj.sort_values(["Team", "PlayerName", "priority"], ascending=[True, True, False])
        .drop_duplicates(subset=["Team", "PlayerName"], keep="first")
        .drop(columns=["priority"])
        .reset_index(drop=True)
    )

    master_path = data_dir / "Injury_Overrides.csv"
    all_inj.to_csv(master_path, index=False)

    print("‚úÖ Master injury file built.")
    print("   Rows in master:", len(all_inj))
    print("   Saved to:", master_path)
    display(all_inj.head(20))


‚úÖ Master injury file built.
   Rows in master: 196
   Saved to: /home/2dbcc135-5358-4730-8441-82ada9ea8087/qepc_project/data/Injury_Overrides.csv


Unnamed: 0,Team,PlayerName,Status,Injury,EstReturn,Impact,Source
0,Boston Celtics,"Harper Jr., Ron",Available,G League - Two-Way,,1.0,Balldontlie
1,Boston Celtics,"Queta, Neemias",Out,Injury/Illness - Left Ankle; Sprain,,0.7,Balldontlie
2,Boston Celtics,"Shulga, Max",Available,G League - Two-Way,,1.0,Balldontlie
3,Boston Celtics,"Tatum, Jayson",Out,Injury/Illness - Right Achilles; Repair,,0.7,Balldontlie
4,Charlotte Hornets,"Connaughton, Pat",Out,Injury/Illness - Right Calf; Strain,,0.7,Balldontlie
5,Charlotte Hornets,"Green, Josh",Out,Injury/Illness - Left Shoulder; Surgery,,0.7,Balldontlie
6,Charlotte Hornets,"Peterson, Drew",Out,G League - Two-Way,,0.7,Balldontlie
7,Charlotte Hornets,"Reeves, Antonio",Out,G League - Two-Way,,0.7,Balldontlie
8,Charlotte Hornets,"Salaun, Tidjane",Out,G League - On Assignment,,0.7,Balldontlie
9,Charlotte Hornets,"Simpson, KJ",Out,G League - Two-Way,,0.7,Balldontlie
