In [1]:
import pandas as pd
import os
import pickle
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from nba_api.stats.endpoints import playergamelog, playercareerstats, commonplayerinfo, leaguegamefinder
from nba_api.stats.static import players

# Custom requests session for handling retries
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))

# Directory setup
player_stats_cache_dir = 'player_stats_cache'
new_cache_dir = 'nba_new_data_cache'
if not os.path.exists(new_cache_dir):
    os.makedirs(new_cache_dir)

# Cache functions
def cache_exists(directory, filename):
    return os.path.exists(os.path.join(directory, filename))

def load_from_cache(directory, filename):
    with open(os.path.join(directory, filename), 'rb') as file:
        return pickle.load(file)

def save_to_cache(directory, filename, data):
    with open(os.path.join(directory, filename), 'wb') as file:
        pickle.dump(data, file)

# Fetch data functions for new data
def fetch_player_info(player_id):
    filename = f'player_info_{player_id}.pkl'
    if not cache_exists(new_cache_dir, filename):
        player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id, timeout=30).get_data_frames()[0]
        save_to_cache(new_cache_dir, filename, player_info)
    return load_from_cache(new_cache_dir, filename)

def fetch_league_gamefinder():
    filename = 'league_gamefinder.pkl'
    if not cache_exists(new_cache_dir, filename):
        gamefinder = leaguegamefinder.LeagueGameFinder(timeout=30).get_data_frames()[0]
        save_to_cache(new_cache_dir, filename, gamefinder)
    return load_from_cache(new_cache_dir, filename)

In [2]:
# Fetch all NBA players (both active and inactive)
all_players = players.get_players()

# Fetch and combine data
combined_data = []
for player in all_players:
    player_id = player['id']
    print(f"Processing data for {player['full_name']} (ID: {player_id})")

    # Fetch player career stats if not cached
    career_stats_filename = f'career_stats_{player_id}.pkl'
    if not cache_exists(new_cache_dir, career_stats_filename):
        career_stats = playercareerstats.PlayerCareerStats(player_id=player_id, timeout=30).get_data_frames()[0]
        save_to_cache(new_cache_dir, career_stats_filename, career_stats)

    career_stats = load_from_cache(new_cache_dir, career_stats_filename)
    for season_id in career_stats['SEASON_ID'].unique():
        # Fetch game log data
        game_log_filename = f'{player_id}_{season_id}_games.pkl'
        if not cache_exists(player_stats_cache_dir, game_log_filename):
            game_log = playergamelog.PlayerGameLog(player_id=player_id, season=season_id, timeout=30).get_data_frames()[0]
            save_to_cache(player_stats_cache_dir, game_log_filename, game_log)
        else:
            game_log = load_from_cache(player_stats_cache_dir, game_log_filename)

        # Fetch player info data
        player_info_filename = f'player_info_{player_id}.pkl'
        if not cache_exists(new_cache_dir, player_info_filename):
            player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id, timeout=30).get_data_frames()[0]
            save_to_cache(new_cache_dir, player_info_filename, player_info)
        else:
            player_info = load_from_cache(new_cache_dir, player_info_filename)

        player_info['Player_ID'] = player_info['PERSON_ID']

        # Fetch league game finder data
        league_games_filename = f'league_games_{season_id}.pkl'
        if not cache_exists(new_cache_dir, league_games_filename):
            league_games = leaguegamefinder.LeagueGameFinder(season_nullable=season_id, timeout=30).get_data_frames()[0]
            save_to_cache(new_cache_dir, league_games_filename, league_games)
        else:
            league_games = load_from_cache(new_cache_dir, league_games_filename)

        # Combine all data
        combined_game_log = game_log.merge(player_info, on='Player_ID', how='left')
        combined_game_log['GAME_ID'] = combined_game_log['Game_ID']
        combined_game_log = combined_game_log.merge(league_games, on='GAME_ID', how='left')
        combined_data.append(combined_game_log)

# Combine all data into one DataFrame
final_combined_df = pd.concat(combined_data, ignore_index=True)

# Save the combined data
final_combined_df.to_pickle(os.path.join(new_cache_dir, 'final_combined_nba_data.pkl'))

print("Data collection and combination completed.")

Processing data for Alaa Abdelnaby (ID: 76001)
Processing data for Zaid Abdul-Aziz (ID: 76002)
Processing data for Kareem Abdul-Jabbar (ID: 76003)
Processing data for Mahmoud Abdul-Rauf (ID: 51)
Processing data for Tariq Abdul-Wahad (ID: 1505)
Processing data for Shareef Abdur-Rahim (ID: 949)
Processing data for Tom Abernethy (ID: 76005)
Processing data for Forest Able (ID: 76006)
Processing data for John Abramovic (ID: 76007)
Processing data for Alex Abrines (ID: 203518)
Processing data for Precious Achiuwa (ID: 1630173)
Processing data for Alex Acker (ID: 101165)
Processing data for Donald Ackerman (ID: 76008)
Processing data for Mark Acres (ID: 76009)
Processing data for Charles Acton (ID: 76010)
Processing data for Quincy Acy (ID: 203112)
Processing data for Alvan Adams (ID: 76011)
Processing data for Don Adams (ID: 76012)
Processing data for Hassan Adams (ID: 200801)
Processing data for Jaylen Adams (ID: 1629121)
Processing data for Jordan Adams (ID: 203919)
Processing data for Mi

In [None]:
for season in seasons:
    # Fetch team standings
    standings = teamstandings.TeamStandings(season=season).get_data_frames()[0]
    all_team_standings = pd.concat([all_team_standings, standings])

    # Fetch season games
    season_games = leaguegamefinder.LeagueGameFinder(season_nullable=season).get_data_frames()[0]
    all_season_games = pd.concat([all_season_games, season_games])
