NBA Spread Predictor# Predicts which team covers the spread - way more useful than trying to pick winners# Been working on this for a while, finally got decent accuracy# Quick setup

In [None]:
from google.colab import drivedrive.mount('/content/drive')# Install what we need!pip install -q nba_apiimport osimport warningsfrom datetime import datetime, timedeltaimport numpy as npimport pandas as pdimport requestsimport picklefrom time import sleepfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import accuracy_score, classification_reportimport xgboost as xgbwarnings.filterwarnings("ignore")# API keys and configAPI_KEY = 'YOUR_API_KEY_HERE'  # odds-api keySPORT = 'basketball_nba'PROJECT_DIR = '/content/drive/MyDrive/nba_spread_model'# Training config - only use teams with at least 5 games so stats are stableMIN_GAMES = 5START_DATE = '2020-10-01'# Make sure we have a place to save stuffos.makedirs(PROJECT_DIR, exist_ok=True)print(f"Saving everything to: {PROJECT_DIR}")

PART 1: Get all the NBA games# This is way faster than doing it one season at a time

In [None]:
def get_nba_games():    """Download games from multiple seasons at once"""    from nba_api.stats.endpoints import LeagueGameLog        # Figure out what seasons we need    now = datetime.now()    current_year = now.year if now.month >= 10 else now.year - 1        # Get last few seasons    seasons = []    for year in range(2020, current_year + 1):        seasons.append(f"{year}-{str(year+1)[-2:]}")        print(f"Downloading {len(seasons)} seasons: {seasons}")        all_games = []    for season in seasons:        print(f"  Getting {season}...", end=" ")                # Sometimes NBA API is slow, retry a few times        for attempt in range(3):            try:                sleep(2)  # don't spam the API                                log = LeagueGameLog(                    season=season,                    season_type_all_star="Regular Season",                    timeout=180                )                df = log.get_data_frames()[0]                df['SEASON'] = season                all_games.append(df)                                print(f"got {len(df)} games")                break                            except Exception as e:                if attempt < 2:                    print(f"timeout, retrying...", end=" ")                    sleep(5)                else:                    print(f"failed: {str(e)[:80]}")        if not all_games:        raise Exception("Couldn't download any games, NBA API might be down")        games = pd.concat(all_games, ignore_index=True)        # Save it    games.to_csv(f'{PROJECT_DIR}/nba_games.csv', index=False)    print(f"\nSaved {len(games)} games total")        return games# Run itgames_df = get_nba_games()

PART 2: Get team stats - off/def ratings, pace, etc

In [None]:
def get_team_stats():    """Get advanced team stats - this is what really matters for spreads"""    from nba_api.stats.endpoints import LeagueDashTeamStats        print("\nGetting team stats...")        # Figure out current season    now = datetime.now()    if now.month < 10:        season = f"{now.year-1}-{str(now.year)[-2:]}"    else:        season = f"{now.year}-{str(now.year+1)[-2:]}"        print(f"  Season: {season}")        sleep(2)        # Get the stats    stats = LeagueDashTeamStats(        season=season,        season_type_all_star="Regular Season",        measure_type_detailed_defense="Advanced",        timeout=60    )        df = stats.get_data_frames()[0]        # Save it    df.to_csv(f'{PROJECT_DIR}/team_stats.csv', index=False)    print(f"  Saved stats for {len(df)} teams")        return dfstats_df = get_team_stats()

PART 3: Get spread odds from odds-api# This takes a while because we need historical data

In [None]:
def get_historical_spreads(games_df, start_date='2020-10-01'):    """    Get spread odds for all games    This is the slow part - odds API limits requests    """        print("\nDownloading spread odds...")    print("This takes ~10 min because of API rate limits")        # Get unique game dates    games_df['GAME_DATE'] = pd.to_datetime(games_df['GAME_DATE'])    start = pd.to_datetime(start_date)    dates = pd.date_range(start=start, end=datetime.now(), freq='D')        print(f"  Checking {len(dates)} days of games")        all_odds = []        for date in dates:        date_str = date.strftime('%Y-%m-%d')                # Show progress every 30 days        if len(all_odds) % 30 == 0:            print(f"  {date_str} ({len(all_odds)} games so far)")                try:            url = f"https://api.the-odds-api.com/v4/sports/{SPORT}/odds"            params = {                'apiKey': API_KEY,                'regions': 'us',                'markets': 'spreads',                'oddsFormat': 'american',                'dateFormat': 'iso',                'date': date_str + 'T12:00:00Z'            }                        resp = requests.get(url, params=params, timeout=30)                        if resp.status_code != 200:                continue                        data = resp.json()                        # Parse the odds            for game in data:                game_time = pd.to_datetime(game['commence_time'])                home_team = game['home_team']                away_team = game['away_team']                                # Get DraftKings spreads                for book in game.get('bookmakers', []):                    if book['key'] != 'draftkings':                        continue                                        for market in book.get('markets', []):                        if market['key'] != 'spreads':                            continue                                                for outcome in market['outcomes']:                            if outcome['name'] == home_team:                                all_odds.append({                                    'date': game_time.date(),                                    'home_team': home_team,                                    'away_team': away_team,                                    'spread': outcome['point'],                                    'odds': outcome['price']                                })                        # Don't spam the API            sleep(1.5)                    except Exception as e:            continue        odds_df = pd.DataFrame(all_odds)        # Save it    odds_df.to_csv(f'{PROJECT_DIR}/spread_odds.csv', index=False)    print(f"\nSaved {len(odds_df)} game spreads")        return odds_dfspreads_df = get_historical_spreads(games_df, START_DATE)

PART 4: Match everything up and create training data

In [None]:
def build_training_data(games_df, stats_df, spreads_df):    """    Combine games, stats, and spreads into one dataset    This is where the magic happens    """        print("\nBuilding training dataset...")        # Team name mapping - NBA API uses different names than odds API    name_map = {        'LA Clippers': 'Los Angeles Clippers',        'LA Lakers': 'Los Angeles Lakers',        # add more if needed    }        # Convert game data to useful format    games_df['GAME_DATE'] = pd.to_datetime(games_df['GAME_DATE']).dt.date        # Group by actual games (not team records)    games_by_matchup = []        for game_id in games_df['GAME_ID'].unique():        game_rows = games_df[games_df['GAME_ID'] == game_id]                if len(game_rows) != 2:            continue  # skip weird games                # Figure out home/away        home = game_rows[game_rows['MATCHUP'].str.contains('vs')].iloc[0]        away = game_rows[game_rows['MATCHUP'].str.contains('@')].iloc[0]                games_by_matchup.append({            'date': home['GAME_DATE'],            'home_team': home['TEAM_NAME'],            'away_team': away['TEAM_NAME'],            'home_pts': home['PTS'],            'away_pts': away['PTS'],            'season': home['SEASON']        })        games_clean = pd.DataFrame(games_by_matchup)        # Merge with spreads    spreads_df['date'] = pd.to_datetime(spreads_df['date']).dt.date        # Match on date + teams    merged = pd.merge(        games_clean,        spreads_df,        on=['date', 'home_team', 'away_team'],        how='inner'    )        print(f"  Matched {len(merged)} games with spreads")        # Add team stats    # For each game, get the team's current stats    # (In reality would want rolling averages, but this works)        for idx, row in merged.iterrows():        home = row['home_team']        away = row['away_team']                # Get stats for these teams        home_stats = stats_df[stats_df['TEAM_NAME'] == home]        away_stats = stats_df[stats_df['TEAM_NAME'] == away]                if len(home_stats) == 0 or len(away_stats) == 0:            continue                # Add offensive ratings        merged.loc[idx, 'home_off_rating'] = home_stats.iloc[0].get('OFF_RATING', 0)        merged.loc[idx, 'away_off_rating'] = away_stats.iloc[0].get('OFF_RATING', 0)                # Add defensive ratings        merged.loc[idx, 'home_def_rating'] = home_stats.iloc[0].get('DEF_RATING', 0)        merged.loc[idx, 'away_def_rating'] = away_stats.iloc[0].get('DEF_RATING', 0)                # Add pace        merged.loc[idx, 'home_pace'] = home_stats.iloc[0].get('PACE', 0)        merged.loc[idx, 'away_pace'] = away_stats.iloc[0].get('PACE', 0)                # Add shooting stats        merged.loc[idx, 'home_fg_pct'] = home_stats.iloc[0].get('FG_PCT', 0)        merged.loc[idx, 'away_fg_pct'] = away_stats.iloc[0].get('FG_PCT', 0)        merged.loc[idx, 'home_fg3_pct'] = home_stats.iloc[0].get('FG3_PCT', 0)        merged.loc[idx, 'away_fg3_pct'] = away_stats.iloc[0].get('FG3_PCT', 0)        # Drop rows with missing stats    merged = merged.dropna()        # Create features    # Expected margin based on ratings    merged['expected_margin'] = (        (merged['home_off_rating'] - merged['away_off_rating']) +        (merged['away_def_rating'] - merged['home_def_rating'])    )        # How much does spread differ from expected margin    merged['spread_advantage'] = merged['expected_margin'] - merged['spread']        # Team stat differentials    merged['scoring_diff'] = merged['home_off_rating'] - merged['away_off_rating']    merged['defense_diff'] = merged['away_def_rating'] - merged['home_def_rating']    merged['shooting_diff'] = merged['home_fg_pct'] - merged['away_fg_pct']    merged['pace_diff'] = merged['home_pace'] - merged['away_pace']        # Target: did home team cover?    merged['home_margin'] = merged['home_pts'] - merged['away_pts']    merged['home_covered'] = (merged['home_margin'] + merged['spread']) > 0    merged['home_covered'] = merged['home_covered'].astype(int)        # Save it    merged.to_csv(f'{PROJECT_DIR}/training_data.csv', index=False)    print(f"  Created {len(merged)} training examples")    print(f"  Home cover rate: {merged['home_covered'].mean():.1%}")        return mergedtraining_df = build_training_data(games_df, stats_df, spreads_df)

PART 5: Train the model# XGBoost works best for this

In [None]:
def train_model(df):    """Train XGBoost on spread coverage"""        print("\nTraining model...")        # Features to use    feature_cols = [        'spread', 'expected_margin', 'spread_advantage',        'home_off_rating', 'away_off_rating',         'home_def_rating', 'away_def_rating',        'home_pace', 'away_pace',        'home_fg_pct', 'away_fg_pct',        'home_fg3_pct', 'away_fg3_pct',        'scoring_diff', 'defense_diff', 'shooting_diff', 'pace_diff'    ]        X = df[feature_cols]    y = df['home_covered']        print(f"  Training samples: {len(X)}")    print(f"  Features: {len(feature_cols)}")    print(f"  Home cover rate: {y.mean():.1%}")        # Split train/test    X_train, X_test, y_train, y_test = train_test_split(        X, y, test_size=0.2, random_state=42    )        # Train XGBoost    print("\nTraining XGBoost...")    model = xgb.XGBClassifier(        n_estimators=200,        learning_rate=0.05,        max_depth=6,        min_child_weight=3,        subsample=0.8,        colsample_bytree=0.8,        random_state=42,        eval_metric='logloss'    )        model.fit(X_train, y_train)        # Test it    y_pred = model.predict(X_test)    accuracy = accuracy_score(y_test, y_pred)        print(f"\nTest Accuracy: {accuracy:.1%}")        if accuracy > 0.70:        print("🔥 Excellent accuracy!")    elif accuracy > 0.60:        print("✅ Good accuracy")    else:        print("⚠️  Accuracy is low, might want to add more features")        print("\nClassification Report:")    print(classification_report(y_test, y_pred,                                 target_names=['HOME LOSES ATS', 'HOME COVERS']))        # Feature importance    importance = pd.DataFrame({        'feature': feature_cols,        'importance': model.feature_importances_    }).sort_values('importance', ascending=False)        print("\nTop 10 Features:")    print(importance.head(10).to_string(index=False))        # Save model    with open(f'{PROJECT_DIR}/model.pkl', 'wb') as f:        pickle.dump(model, f)        print(f"\nModel saved to {PROJECT_DIR}")        return model, feature_colsmodel, features = train_model(training_df)

PART 6: Predict today's games

In [None]:
def predict_today():    """Get predictions for today's games"""        print("\nGetting today's games...")        # Get today's odds    url = f"https://api.the-odds-api.com/v4/sports/{SPORT}/odds"    params = {        'apiKey': API_KEY,        'regions': 'us',        'markets': 'spreads',        'oddsFormat': 'american'    }        resp = requests.get(url, params=params)    data = resp.json()        today_games = []        for game in data:        home = game['home_team']        away = game['away_team']                # Get DraftKings spread        for book in game.get('bookmakers', []):            if book['key'] != 'draftkings':                continue                        for market in book.get('markets', []):                if market['key'] != 'spreads':                    continue                                for outcome in market['outcomes']:                    if outcome['name'] == home:                        today_games.append({                            'home_team': home,                            'away_team': away,                            'spread': outcome['point'],                            'game': f"{away} @ {home}"                        })        print(f"Found {len(today_games)} games with spreads")        if len(today_games) == 0:        print("No games today or odds not available yet")        return        # Load model    with open(f'{PROJECT_DIR}/model.pkl', 'rb') as f:        model = pickle.load(f)        # Load current stats    stats_df = pd.read_csv(f'{PROJECT_DIR}/team_stats.csv')        # Make predictions    print("\n" + "="*80)    print("TODAY'S PREDICTIONS")    print("="*80)        picks = []        for game in today_games:        home = game['home_team']        away = game['away_team']        spread = game['spread']                # Get team stats        home_stats = stats_df[stats_df['TEAM_NAME'] == home]        away_stats = stats_df[stats_df['TEAM_NAME'] == away]                if len(home_stats) == 0 or len(away_stats) == 0:            print(f"\n{game['game']}")            print("  No stats available, skipping")            continue                # Create features        home_off = home_stats.iloc[0].get('OFF_RATING', 0)        away_off = away_stats.iloc[0].get('OFF_RATING', 0)        home_def = home_stats.iloc[0].get('DEF_RATING', 0)        away_def = away_stats.iloc[0].get('DEF_RATING', 0)                expected_margin = (home_off - away_off) + (away_def - home_def)        spread_advantage = expected_margin - spread                X = pd.DataFrame([{            'spread': spread,            'expected_margin': expected_margin,            'spread_advantage': spread_advantage,            'home_off_rating': home_off,            'away_off_rating': away_off,            'home_def_rating': home_def,            'away_def_rating': away_def,            'home_pace': home_stats.iloc[0].get('PACE', 0),            'away_pace': away_stats.iloc[0].get('PACE', 0),            'home_fg_pct': home_stats.iloc[0].get('FG_PCT', 0),            'away_fg_pct': away_stats.iloc[0].get('FG_PCT', 0),            'home_fg3_pct': home_stats.iloc[0].get('FG3_PCT', 0),            'away_fg3_pct': away_stats.iloc[0].get('FG3_PCT', 0),            'scoring_diff': home_off - away_off,            'defense_diff': away_def - home_def,            'shooting_diff': home_stats.iloc[0].get('FG_PCT', 0) - away_stats.iloc[0].get('FG_PCT', 0),            'pace_diff': home_stats.iloc[0].get('PACE', 0) - away_stats.iloc[0].get('PACE', 0)        }])                # Predict        prob = model.predict_proba(X)[0]        home_cover_prob = prob[1]        away_cover_prob = prob[0]                print(f"\n{game['game']}")        print(f"Spread: {home} {spread:+.1f}")        print(f"  Expected margin: {expected_margin:+.1f}")        print(f"  Spread advantage: {spread_advantage:+.1f}")        print(f"  HOME COVER: {home_cover_prob:.1%} | AWAY COVER: {away_cover_prob:.1%}")                # Only pick if confidence > 60%        if home_cover_prob > 0.60:            pick = f"{home} {spread:+.1f}"            print(f"  ✅ PICK: {pick}")            picks.append((game['game'], pick, home_cover_prob))        elif away_cover_prob > 0.60:            pick = f"{away} {-spread:+.1f}"            print(f"  ✅ PICK: {pick}")            picks.append((game['game'], pick, away_cover_prob))        else:            print("  ⏸️  PASS (not confident enough)")        # Summary    print("\n" + "="*80)    print("TODAY'S PICKS")    print("="*80)        for i, (game, pick, prob) in enumerate(picks, 1):        print(f"{i}. {game}")        print(f"   {pick} ({prob:.1%})")        print("="*80)# Run predictionspredict_today()