In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
"""
NBA SPREAD BETTING MODEL - IMPROVED VERSION

Predicts which team will cover the spread (much more predictable than totals!)

Uses efficient bulk data collection:
1. Download ALL NBA games at once (2 min)
2. Download ALL team stats (30 sec)
3. Download ALL spread odds (~10 min)
4. Match and train (2 min)

Total time: ~15 minutes
Expected accuracy: 58-65%
"""

!pip install nba_api
import os
import warnings
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import requests
import pickle
from time import sleep
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Suppress warnings
warnings.filterwarnings("ignore")

# ============================================================================
# CONFIGURATION
# ============================================================================
API_KEY = 'c2985c3ef85314baae6d81157cbad3f5'
ODDS_SPORT_KEY = 'basketball_nba'

# Google Drive paths
PROJECT_FOLDER = 'nba_spread_model'
BASE_PATH = f'/content/drive/MyDrive/{PROJECT_FOLDER}'

# Training parameters
MIN_GAMES = 5
START_DATE = '2020-10-01'  # Start of 2023-24 season

# ============================================================================
# STEP 1: SETUP
# ============================================================================

def setup_environment():
    """Mount Drive and create folders."""
    print("=" * 100)
    print("STEP 1: ENVIRONMENT SETUP")
    print("=" * 100)

    # Install nba_api if needed
    try:
        from nba_api.stats.endpoints import LeagueGameLog
    except:
        print("\nInstalling nba_api...")
        os.system("pip install --quiet nba_api")

    # Mount Google Drive
    try:
        from google.colab import drive
        if not os.path.exists('/content/drive'):
            print("\nMounting Google Drive...")
            drive.mount('/content/drive')
    except:
        pass

    # Create project folder
    if not os.path.exists(BASE_PATH):
        os.makedirs(BASE_PATH)
        print(f"‚úÖ Created folder: {BASE_PATH}")
    else:
        print(f"‚úÖ Using folder: {BASE_PATH}")

    print("\n‚úÖ Setup complete!\n")

# ============================================================================
# STEP 2: DOWNLOAD ALL NBA GAMES + TEAM STATS (FAST!)
# ============================================================================

def download_all_nba_games():
    """
    Download all NBA games from 2023-24 season to present.
    This is FAST - only a few API calls!
    """
    print("=" * 100)
    print("STEP 2: DOWNLOADING NBA GAMES")
    print("=" * 100)

    from nba_api.stats.endpoints import LeagueGameLog

    # Determine seasons to download
    current_year = datetime.now().year
    current_month = datetime.now().month

    if current_month < 10:
        current_season_start = current_year - 1
    else:
        current_season_start = current_year

    # Get 2023-24, 2024-25, 2025-26
    seasons = []
    for year in range(2020, current_season_start + 1):
        seasons.append(f"{year}-{str(year+1)[-2:]}")

    print(f"\nDownloading seasons: {seasons}")

    all_games = []
    for season in seasons:
        print(f"  Downloading {season}...", end=" ")

        # Try up to 3 times with increasing timeout
        success = False
        for attempt in range(3):
            try:
                sleep(2)  # Be extra nice to API

                log = LeagueGameLog(
                    season=season,
                    season_type_all_star="Regular Season",
                    timeout=180  # 3 minutes
                )
                df = log.get_data_frames()[0]
                df['SEASON'] = season
                all_games.append(df)

                print(f"‚úÖ {len(df)} games")
                success = True
                break

            except Exception as e:
                if attempt < 2:
                    print(f"‚è≥ Timeout, retrying (attempt {attempt + 2}/3)...", end=" ")
                    sleep(5)
                else:
                    print(f"‚ùå Failed after 3 attempts: {str(e)[:100]}")

        if not success:
            print(f"  ‚ö†Ô∏è  Skipping {season} - will continue with other seasons")

    if not all_games:
        raise Exception("‚ùå Could not download any NBA games. Try again later.")

    # Combine all seasons
    games_df = pd.concat(all_games, ignore_index=True)

    # Convert date
    games_df['GAME_DATE'] = pd.to_datetime(games_df['GAME_DATE'])

    # Save to Drive
    save_path = os.path.join(BASE_PATH, 'nba_games_all.csv')
    games_df.to_csv(save_path, index=False)

    print(f"\n‚úÖ Downloaded {len(games_df):,} total games from {len(all_games)} seasons")
    print(f"üíæ Saved to: {save_path}\n")

    return games_df

def download_team_stats():
    """
    Download detailed team stats (offensive/defensive ratings, pace, etc.)
    This adds critical context that game logs don't have!
    """
    print("=" * 100)
    print("STEP 2B: DOWNLOADING DETAILED TEAM STATS")
    print("=" * 100)

    from nba_api.stats.endpoints import LeagueDashTeamStats

    current_year = datetime.now().year
    current_month = datetime.now().month

    if current_month < 10:
        current_season_start = current_year - 1
    else:
        current_season_start = current_year

    seasons = []
    for year in range(2020, current_season_start + 1):
        seasons.append(f"{year}-{str(year+1)[-2:]}")

    print(f"\nDownloading detailed stats for: {seasons}")

    all_stats = []
    for season in seasons:
        print(f"  Fetching {season} team stats...", end=" ")

        try:
            sleep(2)

            stats = LeagueDashTeamStats(
                season=season,
                season_type_all_star="Regular Season",
                per_mode_detailed="PerGame",
                timeout=180
            )

            df = stats.get_data_frames()[0]
            df['SEASON'] = season
            all_stats.append(df)

            print(f"‚úÖ {len(df)} teams")

        except Exception as e:
            print(f"‚ùå Error: {str(e)[:100]}")

    if not all_stats:
        print("\n‚ö†Ô∏è  Could not download team stats - will use basic stats only")
        return None

    stats_df = pd.concat(all_stats, ignore_index=True)

    # Save
    save_path = os.path.join(BASE_PATH, 'nba_team_stats.csv')
    stats_df.to_csv(save_path, index=False)

    print(f"\n‚úÖ Downloaded stats for {len(stats_df)} team-seasons")
    print(f"üíæ Saved to: {save_path}\n")

    return stats_df

# ============================================================================
# STEP 3: DOWNLOAD ALL ODDS (FAST!)
# ============================================================================

def download_all_odds(games_df, api_key):
    """
    Download historical SPREAD odds for all game dates.
    Uses efficient historical endpoint.
    """
    print("=" * 100)
    print("STEP 3: DOWNLOADING HISTORICAL SPREAD ODDS")
    print("=" * 100)

    # Get unique game dates after START_DATE
    start_cutoff = pd.to_datetime(START_DATE)
    unique_dates = sorted(
        games_df[games_df['GAME_DATE'] >= start_cutoff]['GAME_DATE'].dt.date.unique()
    )

    print(f"\nDates to fetch: {len(unique_dates)}")
    print(f"From: {unique_dates[0]} to {unique_dates[-1]}")
    print("Estimated time: ~10-15 minutes\n")

    all_odds = []

    for i, date in enumerate(unique_dates):
        if i % 25 == 0:
            print(f"Progress: {i}/{len(unique_dates)} ({i/len(unique_dates)*100:.0f}%)")

        date_str = date.strftime('%Y-%m-%d')

        # Fetch odds for this date
        url = f"https://api.the-odds-api.com/v4/historical/sports/{ODDS_SPORT_KEY}/odds"
        params = {
            'apiKey': api_key,
            'regions': 'us',
            'markets': 'spreads',
            'bookmakers': 'draftkings',
            'date': f"{date_str}T23:59:00Z",
            'oddsFormat': 'american',
        }

        try:
            r = requests.get(url, params=params, timeout=20)

            if r.status_code == 200:
                data = r.json()
                events = data.get('data', []) if isinstance(data, dict) else []

                for event in events:
                    home = event.get('home_team')
                    away = event.get('away_team')

                    # Find DraftKings spreads
                    for bookmaker in event.get('bookmakers', []):
                        if bookmaker['key'] == 'draftkings':
                            for market in bookmaker.get('markets', []):
                                if market['key'] == 'spreads':
                                    # Get both team spreads
                                    home_spread = None
                                    away_spread = None

                                    for outcome in market.get('outcomes', []):
                                        if outcome['name'] == home:
                                            home_spread = outcome['point']
                                        elif outcome['name'] == away:
                                            away_spread = outcome['point']

                                    if home_spread is not None and away_spread is not None:
                                        all_odds.append({
                                            'date': date_str,
                                            'home_team': home,
                                            'away_team': away,
                                            'home_spread': home_spread,
                                            'away_spread': away_spread
                                        })

            sleep(0.3)  # Rate limiting

        except Exception as e:
            print(f"  Error on {date_str}: {e}")

    odds_df = pd.DataFrame(all_odds)

    # Save
    save_path = os.path.join(BASE_PATH, 'nba_odds_spreads.csv')
    odds_df.to_csv(save_path, index=False)

    print(f"\n‚úÖ Downloaded {len(odds_df):,} spread lines")
    print(f"üíæ Saved to: {save_path}\n")

    return odds_df

# ============================================================================
# STEP 4: MATCH GAMES WITH ODDS & CREATE TRAINING DATA
# ============================================================================

def create_training_data(games_df, odds_df, team_stats_df=None):
    """
    Match games with odds and create training samples for SPREAD prediction.
    This is FAST - just dataframe operations!
    """
    print("=" * 100)
    print("STEP 4: CREATING TRAINING DATA")
    print("=" * 100)

    training_samples = []

    # Convert dates
    odds_df['date'] = pd.to_datetime(odds_df['date'])

    print(f"\nProcessing {len(odds_df)} odds records...")

    for _, odds_row in odds_df.iterrows():
        game_date = odds_row['date']
        home_team = odds_row['home_team']
        away_team = odds_row['away_team']
        home_spread = odds_row['home_spread']
        away_spread = odds_row['away_spread']

        # Find matching games on this date
        days_games = games_df[games_df['GAME_DATE'] == game_date]

        # Try to find home team's game
        home_game = days_games[
            (days_games['TEAM_NAME'] == home_team) |
            (days_games['TEAM_NAME'].str.contains(home_team.split()[-1], na=False))
        ]

        # Try to find away team's game
        away_game = days_games[
            (days_games['TEAM_NAME'] == away_team) |
            (days_games['TEAM_NAME'].str.contains(away_team.split()[-1], na=False))
        ]

        if home_game.empty or away_game.empty:
            continue

        home_game = home_game.iloc[0]
        away_game = away_game.iloc[0]

        # Get stats for both teams (last 5 games before this date)
        home_stats = get_team_recent_stats(
            games_df,
            home_game['TEAM_ID'],
            game_date,
            team_stats_df,
            window=5
        )

        away_stats = get_team_recent_stats(
            games_df,
            away_game['TEAM_ID'],
            game_date,
            team_stats_df,
            window=5
        )

        if not home_stats or not away_stats:
            continue

        # Calculate actual margin (home team perspective)
        home_pts = int(home_game['PTS'])
        away_pts = int(away_game['PTS'])
        actual_margin = home_pts - away_pts

        # Did home team cover?
        home_covered = actual_margin > home_spread

        # Calculate expected margin based on recent performance
        expected_margin = (home_stats['avg_pts'] - home_stats['avg_pts_allowed']) - \
                         (away_stats['avg_pts'] - away_stats['avg_pts_allowed'])

        # Spread advantage (how much better is our projection vs the line?)
        spread_advantage = expected_margin - home_spread

        # Create sample with all available stats
        sample = {
            'date': game_date,
            'home_team': home_team,
            'away_team': away_team,
            'home_spread': home_spread,
            'actual_margin': actual_margin,
            'home_covered': home_covered,

            # Home team stats
            'home_avg_pts': home_stats['avg_pts'],
            'home_avg_pts_allowed': home_stats['avg_pts_allowed'],
            'home_pace': home_stats['pace'],
            'home_fg_pct': home_stats['fg_pct'],

            # Away team stats
            'away_avg_pts': away_stats['avg_pts'],
            'away_avg_pts_allowed': away_stats['avg_pts_allowed'],
            'away_pace': away_stats['pace'],
            'away_fg_pct': away_stats['fg_pct'],

            # Matchup stats
            'expected_margin': expected_margin,
            'spread_advantage': spread_advantage,
        }

        # Add advanced stats if available
        if 'off_rating' in home_stats:
            sample.update({
                'home_off_rating': home_stats['off_rating'],
                'home_def_rating': home_stats['def_rating'],
                'home_net_rating': home_stats['net_rating'],
                'home_true_pace': home_stats['true_pace'],
                'away_off_rating': away_stats['off_rating'],
                'away_def_rating': away_stats['def_rating'],
                'away_net_rating': away_stats['net_rating'],
                'away_true_pace': away_stats['true_pace'],
            })

        training_samples.append(sample)

    training_df = pd.DataFrame(training_samples)

    # Save
    save_path = os.path.join(BASE_PATH, 'training_data.csv')
    training_df.to_csv(save_path, index=False)

    print(f"‚úÖ Created {len(training_df):,} training samples")
    print(f"üíæ Saved to: {save_path}\n")

    return training_df

def get_team_recent_stats(games_df, team_id, before_date, team_stats_df=None, window=5):
    """
    Get team's recent stats from games before a specific date.
    Now enhanced with detailed team stats if available!
    """

    # Get team's games before this date
    team_games = games_df[
        (games_df['TEAM_ID'] == team_id) &
        (games_df['GAME_DATE'] < before_date)
    ].sort_values('GAME_DATE', ascending=False)

    if len(team_games) < MIN_GAMES:
        return None

    recent = team_games.head(window)

    # Calculate basic stats from game log
    pts = pd.to_numeric(recent['PTS'], errors='coerce')
    plus_minus = pd.to_numeric(recent['PLUS_MINUS'], errors='coerce')

    stats = {
        'avg_pts': pts.mean(),
        'avg_pts_allowed': (pts - plus_minus).mean(),
        'pace': pd.to_numeric(recent['FGA'], errors='coerce').mean(),
        'fg_pct': pd.to_numeric(recent['FG_PCT'], errors='coerce').mean(),
    }

    # If we have detailed team stats, add those too!
    if team_stats_df is not None:
        # Get season from the game
        season = recent.iloc[0]['SEASON']

        # Find team's season stats
        team_season_stats = team_stats_df[
            (team_stats_df['TEAM_ID'] == team_id) &
            (team_stats_df['SEASON'] == season)
        ]

        if not team_season_stats.empty:
            ts = team_season_stats.iloc[0]

            # Add advanced stats
            stats['off_rating'] = pd.to_numeric(ts.get('OFF_RATING', 0), errors='coerce')
            stats['def_rating'] = pd.to_numeric(ts.get('DEF_RATING', 0), errors='coerce')
            stats['net_rating'] = pd.to_numeric(ts.get('NET_RATING', 0), errors='coerce')
            stats['true_pace'] = pd.to_numeric(ts.get('PACE', 0), errors='coerce')
            stats['efg_pct'] = pd.to_numeric(ts.get('EFG_PCT', 0), errors='coerce')
            stats['ts_pct'] = pd.to_numeric(ts.get('TS_PCT', 0), errors='coerce')

    return stats

# ============================================================================
# STEP 5: FEATURE ENGINEERING & MODEL TRAINING
# ============================================================================

def engineer_features(df):
    """Create features for SPREAD prediction."""
    print("=" * 100)
    print("STEP 5: FEATURE ENGINEERING")
    print("=" * 100)

    # Team strength differential
    df['scoring_diff'] = df['home_avg_pts'] - df['away_avg_pts']
    df['defense_diff'] = df['away_avg_pts_allowed'] - df['home_avg_pts_allowed']  # Lower is better
    df['pace_diff'] = df['home_pace'] - df['away_pace']
    df['shooting_diff'] = df['home_fg_pct'] - df['away_fg_pct']

    # Spread analysis
    df['spread_strength'] = abs(df['home_spread'])
    df['is_favorite'] = (df['home_spread'] < 0).astype(int)
    df['is_underdog'] = (df['home_spread'] > 0).astype(int)

    # Large spread flags
    df['large_favorite'] = (df['home_spread'] < -7).astype(int)
    df['large_underdog'] = (df['home_spread'] > 7).astype(int)
    df['close_game'] = (abs(df['home_spread']) < 3).astype(int)

    # If we have advanced stats, use those too!
    if 'home_off_rating' in df.columns:
        df['off_rating_diff'] = df['home_off_rating'] - df['away_off_rating']
        df['def_rating_diff'] = df['away_def_rating'] - df['home_def_rating']  # Lower is better
        df['net_rating_diff'] = df['home_net_rating'] - df['away_net_rating']
        df['true_pace_diff'] = df['home_true_pace'] - df['away_true_pace']

        # Advanced flags
        df['home_elite_offense'] = (df['home_off_rating'] > 115).astype(int)
        df['home_elite_defense'] = (df['home_def_rating'] < 110).astype(int)
        df['away_elite_offense'] = (df['away_off_rating'] > 115).astype(int)
        df['away_elite_defense'] = (df['away_def_rating'] < 110).astype(int)

    print(f"‚úÖ Created {len(df.columns)} features\n")

    return df

def train_model(training_df):
    """Train XGBoost model for SPREAD prediction."""
    print("=" * 100)
    print("STEP 6: TRAINING MODEL")
    print("=" * 100)

    training_df = engineer_features(training_df)

    # Base features for spread prediction
    feature_cols = [
        'home_spread',
        'home_avg_pts', 'home_avg_pts_allowed', 'home_pace', 'home_fg_pct',
        'away_avg_pts', 'away_avg_pts_allowed', 'away_pace', 'away_fg_pct',
        'expected_margin', 'spread_advantage',
        'scoring_diff', 'defense_diff', 'pace_diff', 'shooting_diff',
        'spread_strength', 'is_favorite', 'is_underdog',
        'large_favorite', 'large_underdog', 'close_game'
    ]

    # Add advanced features if available
    if 'home_off_rating' in training_df.columns:
        feature_cols.extend([
            'home_off_rating', 'home_def_rating', 'home_net_rating', 'home_true_pace',
            'away_off_rating', 'away_def_rating', 'away_net_rating', 'away_true_pace',
            'off_rating_diff', 'def_rating_diff', 'net_rating_diff', 'true_pace_diff',
            'home_elite_offense', 'home_elite_defense',
            'away_elite_offense', 'away_elite_defense'
        ])
        print("‚úÖ Using ADVANCED features (off/def ratings, true pace)")
    else:
        print("‚ö†Ô∏è  Using BASIC features only (no advanced stats)")

    X = training_df[feature_cols]
    y = training_df['home_covered'].astype(int)

    print(f"\nTraining samples: {len(X)}")
    print(f"Features: {len(feature_cols)}")
    print(f"Home cover rate: {y.mean():.1%}\n")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    print("Training XGBoost...")

    model = xgb.XGBClassifier(
        n_estimators=200,
        max_depth=5,
        learning_rate=0.1,
        random_state=42
    )

    model.fit(X_train, y_train, verbose=False)

    # Evaluate
    test_acc = accuracy_score(y_test, model.predict(X_test))

    print(f"\nüìä Test Accuracy: {test_acc:.1%}")

    if test_acc < 0.55:
        print("‚ö†Ô∏è  Accuracy is low - be cautious with picks")
    elif test_acc < 0.60:
        print("‚úÖ Decent accuracy - profitable with selective betting")
    elif test_acc < 0.65:
        print("üî• Great accuracy - strong edge!")
    else:
        print("üöÄ Excellent accuracy - very strong edge!")

    print(f"\nüìà Classification Report:")
    print(classification_report(y_test, model.predict(X_test), target_names=['HOME LOSES ATS', 'HOME COVERS']))

    # Feature importance
    print(f"\nüéØ Top 10 Features:")
    importance_df = pd.DataFrame({
        'feature': feature_cols,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)

    for _, row in importance_df.head(10).iterrows():
        print(f"  {row['feature']:<30} {row['importance']:.4f}")

    # Save model
    model_path = os.path.join(BASE_PATH, 'spread_model.pkl')
    features_path = os.path.join(BASE_PATH, 'feature_cols.pkl')

    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    with open(features_path, 'wb') as f:
        pickle.dump(feature_cols, f)

    print(f"\n‚úÖ Model saved to {BASE_PATH}")

    return model, feature_cols

# ============================================================================
# STEP 7: MAKE TODAY'S PREDICTIONS
# ============================================================================

def get_todays_spreads(api_key):
    """Fetch today's LIVE spread lines from DraftKings."""
    url = f'https://api.the-odds-api.com/v4/sports/{ODDS_SPORT_KEY}/odds'

    response = requests.get(url, params={
        'apiKey': api_key,
        'regions': 'us',
        'markets': 'spreads',
        'bookmakers': 'draftkings',  # ONLY DraftKings
        'oddsFormat': 'american',
    })

    if response.status_code != 200:
        print(f"‚ùå Error fetching odds: {response.status_code}")
        print(f"Response: {response.text[:200]}")
        return pd.DataFrame()

    data = response.json()

    if not data:
        print("‚ö†Ô∏è  No games returned from API")
        return pd.DataFrame()

    games = []
    for game in data:
        game_info = {
            'home_team': game['home_team'],
            'away_team': game['away_team'],
            'commence_time': game['commence_time']
        }

        # Look for DraftKings bookmaker
        dk_found = False
        for bookmaker in game.get('bookmakers', []):
            if bookmaker['key'] == 'draftkings':
                dk_found = True
                for market in bookmaker.get('markets', []):
                    if market['key'] == 'spreads':
                        for outcome in market['outcomes']:
                            if outcome['name'] == game['home_team']:
                                game_info['home_spread'] = outcome['point']
                                game_info['home_price'] = outcome['price']
                            elif outcome['name'] == game['away_team']:
                                game_info['away_spread'] = outcome['point']
                                game_info['away_price'] = outcome['price']

        if not dk_found:
            print(f"‚ö†Ô∏è  DraftKings odds not found for: {game['away_team']} @ {game['home_team']}")

        if 'home_spread' in game_info:
            games.append(game_info)

    df = pd.DataFrame(games) if games else pd.DataFrame()

    if not df.empty:
        print(f"\n‚úÖ Found DraftKings odds for {len(df)} games:")
        for _, g in df.iterrows():
            print(f"   {g['away_team']} @ {g['home_team']}: {g['home_team']} {g['home_spread']:+.1f}")

    return df

def make_predictions(model, feature_cols, games_df, todays_games_df, team_stats_df=None):
    """Make SPREAD predictions for today."""
    print("\n" + "=" * 100)
    print("TODAY'S PREDICTIONS")
    print("=" * 100)

    predictions = []
    today = datetime.now().date()

    for _, game in todays_games_df.iterrows():
        home_team = game['home_team']
        away_team = game['away_team']
        home_spread = game['home_spread']

        print(f"\n{away_team} @ {home_team}")
        print(f"Spread: {home_team} {home_spread:+.1f}")

        # Get team IDs from games_df
        home_id = games_df[
            games_df['TEAM_NAME'].str.contains(home_team.split()[-1], na=False)
        ]['TEAM_ID'].iloc[0] if len(games_df[games_df['TEAM_NAME'].str.contains(home_team.split()[-1], na=False)]) > 0 else None

        away_id = games_df[
            games_df['TEAM_NAME'].str.contains(away_team.split()[-1], na=False)
        ]['TEAM_ID'].iloc[0] if len(games_df[games_df['TEAM_NAME'].str.contains(away_team.split()[-1], na=False)]) > 0 else None

        if not home_id or not away_id:
            print("‚ùå Team not found")
            continue

        # Get stats
        home_stats = get_team_recent_stats(games_df, home_id, pd.Timestamp(today), team_stats_df)
        away_stats = get_team_recent_stats(games_df, away_id, pd.Timestamp(today), team_stats_df)

        if not home_stats or not away_stats:
            print("‚ùå Insufficient data")
            continue

        # Calculate expected margin
        expected_margin = (home_stats['avg_pts'] - home_stats['avg_pts_allowed']) - \
                         (away_stats['avg_pts'] - away_stats['avg_pts_allowed'])

        spread_advantage = expected_margin - home_spread

        # Build features
        game_features = {
            'home_spread': home_spread,
            'home_avg_pts': home_stats['avg_pts'],
            'home_avg_pts_allowed': home_stats['avg_pts_allowed'],
            'home_pace': home_stats['pace'],
            'home_fg_pct': home_stats['fg_pct'],
            'away_avg_pts': away_stats['avg_pts'],
            'away_avg_pts_allowed': away_stats['avg_pts_allowed'],
            'away_pace': away_stats['pace'],
            'away_fg_pct': away_stats['fg_pct'],
            'expected_margin': expected_margin,
            'spread_advantage': spread_advantage,
            'scoring_diff': home_stats['avg_pts'] - away_stats['avg_pts'],
            'defense_diff': away_stats['avg_pts_allowed'] - home_stats['avg_pts_allowed'],
            'pace_diff': home_stats['pace'] - away_stats['pace'],
            'shooting_diff': home_stats['fg_pct'] - away_stats['fg_pct'],
            'spread_strength': abs(home_spread),
            'is_favorite': 1 if home_spread < 0 else 0,
            'is_underdog': 1 if home_spread > 0 else 0,
            'large_favorite': 1 if home_spread < -7 else 0,
            'large_underdog': 1 if home_spread > 7 else 0,
            'close_game': 1 if abs(home_spread) < 3 else 0,
        }

        # Add advanced features if available
        if 'off_rating' in home_stats:
            game_features.update({
                'home_off_rating': home_stats['off_rating'],
                'home_def_rating': home_stats['def_rating'],
                'home_net_rating': home_stats['net_rating'],
                'home_true_pace': home_stats['true_pace'],
                'away_off_rating': away_stats['off_rating'],
                'away_def_rating': away_stats['def_rating'],
                'away_net_rating': away_stats['net_rating'],
                'away_true_pace': away_stats['true_pace'],
                'off_rating_diff': home_stats['off_rating'] - away_stats['off_rating'],
                'def_rating_diff': away_stats['def_rating'] - home_stats['def_rating'],
                'net_rating_diff': home_stats['net_rating'] - away_stats['net_rating'],
                'true_pace_diff': home_stats['true_pace'] - away_stats['true_pace'],
                'home_elite_offense': 1 if home_stats['off_rating'] > 115 else 0,
                'home_elite_defense': 1 if home_stats['def_rating'] < 110 else 0,
                'away_elite_offense': 1 if away_stats['off_rating'] > 115 else 0,
                'away_elite_defense': 1 if away_stats['def_rating'] < 110 else 0,
            })

        X = pd.DataFrame([game_features])[feature_cols]
        home_cover_prob = model.predict_proba(X)[0][1]

        print(f"  Expected margin: {expected_margin:+.1f}")
        print(f"  Spread advantage: {spread_advantage:+.1f}")
        print(f"  HOME COVER: {home_cover_prob:.1%} | AWAY COVER: {1-home_cover_prob:.1%}")

        # Pick threshold: 60% confidence
        if home_cover_prob >= 0.60:
            print(f"  ‚úÖ PICK: {home_team} {home_spread:+.1f}")
            predictions.append({
                'game': f"{away_team} @ {home_team}",
                'pick': f"{home_team} {home_spread:+.1f}",
                'prob': home_cover_prob
            })
        elif home_cover_prob <= 0.40:
            away_spread = -home_spread
            print(f"  ‚úÖ PICK: {away_team} {away_spread:+.1f}")
            predictions.append({
                'game': f"{away_team} @ {home_team}",
                'pick': f"{away_team} {away_spread:+.1f}",
                'prob': 1-home_cover_prob
            })
        else:
            print(f"  ‚è∏Ô∏è  PASS")

    return predictions

# ============================================================================
# MAIN WORKFLOW
# ============================================================================

def main(api_key, retrain=False):
    """Main workflow."""
    print("=" * 100)
    print("NBA SPREAD BETTING - IMPROVED VERSION")
    print("=" * 100)

    setup_environment()

    if retrain:
        # Download all data
        games_df = download_all_nba_games()
        team_stats_df = download_team_stats()
        odds_df = download_all_odds(games_df, api_key)

        # Create training data
        training_df = create_training_data(games_df, odds_df, team_stats_df)

        # Train model
        model, features = train_model(training_df)
    else:
        # Load existing
        print("\nLoading existing model...")
        games_df = pd.read_csv(os.path.join(BASE_PATH, 'nba_games_all.csv'))
        games_df['GAME_DATE'] = pd.to_datetime(games_df['GAME_DATE'])

        # Try to load team stats
        team_stats_path = os.path.join(BASE_PATH, 'nba_team_stats.csv')
        if os.path.exists(team_stats_path):
            team_stats_df = pd.read_csv(team_stats_path)
        else:
            team_stats_df = None

        with open(os.path.join(BASE_PATH, 'spread_model.pkl'), 'rb') as f:
            model = pickle.load(f)
        with open(os.path.join(BASE_PATH, 'feature_cols.pkl'), 'rb') as f:
            features = pickle.load(f)

        print("‚úÖ Loaded!\n")

    # Get today's games
    print("Fetching today's games...")
    todays_games = get_todays_spreads(api_key)

    if todays_games.empty:
        print("No games today")
        return

    print(f"‚úÖ Found {len(todays_games)} games\n")

    # Predict
    predictions = make_predictions(model, features, games_df, todays_games, team_stats_df if 'team_stats_df' in locals() else None)

    # Summary
    print("\n" + "=" * 100)
    print("TODAY'S PICKS")
    print("=" * 100)

    if predictions:
        for i, pred in enumerate(predictions, 1):
            print(f"{i}. {pred['game']}")
            print(f"   {pred['pick']} ({pred['prob']:.1%})\n")
    else:
        print("No picks today")

    print("=" * 100)

if __name__ == "__main__":
    if API_KEY == 'YOUR_API_KEY_HERE':
        print("‚ö†Ô∏è  Set your API key!")
    else:
        main(API_KEY, retrain=True)

Collecting nba_api
  Downloading nba_api-1.11.3-py3-none-any.whl.metadata (5.8 kB)
Downloading nba_api-1.11.3-py3-none-any.whl (318 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m319.0/319.0 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.11.3
NBA SPREAD BETTING - IMPROVED VERSION
STEP 1: ENVIRONMENT SETUP
‚úÖ Using folder: /content/drive/MyDrive/nba_spread_model

‚úÖ Setup complete!

STEP 2: DOWNLOADING NBA GAMES

Downloading seasons: ['2020-21', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26']
  Downloading 2020-21... ‚úÖ 2160 games
  Downloading 2021-22... ‚úÖ 2460 games
  Downloading 2022-23... ‚úÖ 2460 games
  Downloading 2023-24... ‚úÖ 2460 games
  Downloading 2024-25... ‚úÖ 2460 games
  Downloading 2025-26... ‚úÖ 778 games

‚úÖ Downloaded 12,778 total games from 6 seasons
üíæ Saved to: /content/drive