# üîÑ QEPC Data Refresh

**Update CSV files with latest data from APIs**

‚ö†Ô∏è **Run this notebook on your LOCAL machine only** (not iPad cloud)

This notebook:
1. Fetches latest team statistics from NBA API
2. Updates today's schedule
3. Refreshes injury data
4. Saves everything to CSVs for portable use

---

In [None]:
# SETUP
import sys
from pathlib import Path
from datetime import datetime
import time

# Find project root
current = Path.cwd()
project_root = None
for p in [current] + list(current.parents):
    if (p / 'data').exists():
        project_root = p
        break

if project_root is None:
    project_root = current

print(f"üìÅ Project root: {project_root}")

# Check for required packages
try:
    from nba_api.stats.endpoints import leaguedashteamstats, scoreboardv2
    from nba_api.stats.static import teams
    HAS_NBA_API = True
    print("‚úÖ nba_api available")
except ImportError:
    HAS_NBA_API = False
    print("‚ùå nba_api not installed. Run: pip install nba_api")

try:
    import requests
    from bs4 import BeautifulSoup
    HAS_SCRAPING = True
    print("‚úÖ requests + beautifulsoup available")
except ImportError:
    HAS_SCRAPING = False
    print("‚ö†Ô∏è  requests/bs4 not installed (needed for injury scraping)")

import pandas as pd
print("‚úÖ Ready!")

---
## üìä Refresh Team Stats (Live Ratings)

In [None]:
if HAS_NBA_API:
    print("üìä Fetching team stats from NBA API...")
    
    # Get current season
    now = datetime.now()
    if now.month >= 10:
        season = f"{now.year}-{str(now.year+1)[2:]}"
    else:
        season = f"{now.year-1}-{str(now.year)[2:]}"
    
    print(f"   Season: {season}")
    
    try:
        # Fetch advanced stats
        time.sleep(1)  # Be nice to the API
        stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Advanced',
            per_mode_detailed='PerGame'
        )
        
        df = stats.get_data_frames()[0]
        
        # Select and rename columns
        live_stats = df[['TEAM_ID', 'TEAM_NAME', 'W', 'L', 'GP', 
                        'OFF_RATING', 'DEF_RATING', 'NET_RATING', 'PACE']].copy()
        
        live_stats = live_stats.rename(columns={
            'TEAM_NAME': 'Team',
            'OFF_RATING': 'ORtg',
            'DEF_RATING': 'DRtg',
            'NET_RATING': 'NetRtg',
            'PACE': 'Pace',
            'W': 'Wins',
            'L': 'Losses',
            'GP': 'GamesPlayed'
        })
        
        live_stats['Season'] = season
        
        # Save
        output_path = project_root / 'data' / 'live' / 'team_stats_live_nba_api.csv'
        output_path.parent.mkdir(parents=True, exist_ok=True)
        live_stats.to_csv(output_path, index=False)
        
        print(f"‚úÖ Saved {len(live_stats)} teams to: {output_path.name}")
        display(live_stats.head())
        
    except Exception as e:
        print(f"‚ùå Error: {e}")
else:
    print("‚ö†Ô∏è  Skipping - nba_api not available")

---
## üìÖ Refresh Today's Schedule

In [None]:
if HAS_NBA_API:
    print("üìÖ Fetching today's games...")
    
    try:
        time.sleep(1)
        scoreboard = scoreboardv2.ScoreboardV2(game_date=datetime.now().strftime('%Y-%m-%d'))
        games_df = scoreboard.get_data_frames()[0]
        
        if len(games_df) > 0:
            # Get team names
            all_teams = {t['id']: t['full_name'] for t in teams.get_teams()}
            
            today_games = []
            for _, game in games_df.iterrows():
                today_games.append({
                    'GAME_ID': game['GAME_ID'],
                    'GAME_DATE_EST': game['GAME_DATE_EST'],
                    'HOME_TEAM_ID': game['HOME_TEAM_ID'],
                    'HOME_TEAM_NAME': all_teams.get(game['HOME_TEAM_ID'], 'Unknown'),
                    'AWAY_TEAM_ID': game['VISITOR_TEAM_ID'],
                    'AWAY_TEAM_NAME': all_teams.get(game['VISITOR_TEAM_ID'], 'Unknown'),
                })
            
            today_df = pd.DataFrame(today_games)
            
            # Also create simple format
            simple_df = today_df[['GAME_DATE_EST', 'HOME_TEAM_NAME', 'AWAY_TEAM_NAME']].copy()
            simple_df.columns = ['gameDate', 'Home Team', 'Away Team']
            
            # Save
            output_path = project_root / 'data' / 'live' / 'games_today_nba_api.csv'
            today_df.to_csv(output_path, index=False)
            
            simple_path = project_root / 'data' / 'live' / 'espn_scoreboard_today.csv'
            simple_df.to_csv(simple_path, index=False)
            
            print(f"‚úÖ Found {len(today_df)} games today!")
            display(simple_df)
        else:
            print("üì≠ No games scheduled for today")
            
    except Exception as e:
        print(f"‚ùå Error: {e}")
else:
    print("‚ö†Ô∏è  Skipping - nba_api not available")

---
## üè• Refresh Injury Data

In [None]:
if HAS_SCRAPING:
    print("üè• Fetching injury data from ESPN...")
    
    try:
        url = 'https://www.espn.com/nba/injuries'
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers, timeout=10)
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            
            injuries = []
            
            # Parse injury tables (ESPN structure)
            tables = soup.find_all('div', class_='ResponsiveTable')
            
            for table in tables:
                # Get team name from header
                team_header = table.find_previous('div', class_='Table__Title')
                team_name = team_header.text if team_header else 'Unknown'
                
                # Get player rows
                rows = table.find_all('tr', class_='Table__TR')
                
                for row in rows:
                    cols = row.find_all('td')
                    if len(cols) >= 2:
                        player_name = cols[0].text.strip()
                        status = cols[1].text.strip() if len(cols) > 1 else 'Out'
                        injury = cols[2].text.strip() if len(cols) > 2 else ''
                        
                        # Assign impact based on status
                        if 'out' in status.lower():
                            impact = 0.7  # Out = significant impact
                        elif 'day-to-day' in status.lower():
                            impact = 0.95  # Day-to-day = might play
                        else:
                            impact = 0.85
                        
                        injuries.append({
                            'Team': team_name,
                            'PlayerName': player_name,
                            'Status': status,
                            'Injury': injury,
                            'Impact': impact,
                            'Source': 'ESPN'
                        })
            
            if injuries:
                injury_df = pd.DataFrame(injuries)
                
                output_path = project_root / 'data' / 'Injury_Overrides_live_espn.csv'
                injury_df.to_csv(output_path, index=False)
                
                print(f"‚úÖ Found {len(injury_df)} injuries")
                display(injury_df.head(10))
            else:
                print("‚ö†Ô∏è  No injuries parsed (ESPN may have changed format)")
        else:
            print(f"‚ùå HTTP Error: {response.status_code}")
            
    except Exception as e:
        print(f"‚ùå Error: {e}")
else:
    print("‚ö†Ô∏è  Skipping - requests/beautifulsoup not available")

---
## üìä Update Game Results (for backtesting)

In [None]:
if HAS_NBA_API:
    print("üìä Fetching recent game results...")
    
    # This would fetch game logs for the season
    # For now, just show what would be done
    
    print("""    
    To update game results, you can run your existing:
    - fetch_nba_results.py
    - merge_schedules.py
    
    Or use the nba_api TeamGameLogs endpoint.
    """)

---
## ‚úÖ Summary

In [None]:
print("\n" + "=" * 50)
print("üì¶ DATA REFRESH COMPLETE")
print("=" * 50)

# Check what files were updated
live_dir = project_root / 'data' / 'live'
if live_dir.exists():
    print(f"\nüìÅ Files in {live_dir.name}/")
    for f in live_dir.glob('*.csv'):
        size = f.stat().st_size / 1024
        mtime = datetime.fromtimestamp(f.stat().st_mtime)
        print(f"   {f.name:40} {size:6.1f} KB  (updated: {mtime:%Y-%m-%d %H:%M})")

print("\nüèÄ You can now run predictions with fresh data!")