In [1]:
from nba_api.stats.endpoints import playercareerstats

from nba_api.stats.static import players

from nba_api.stats.static import teams

from nba_api.stats.endpoints import commonplayerinfo

import pandas as pd

import time  # Import time for delays to avoid rate limits

import requests


In [None]:
# get_players returns a list of dictionaries, each representing a player.
nba_players = players.get_players()

df_player = pd.DataFrame(nba_players).sort_values('id')

df_player.to_csv("datasets/players.csv", index=False)

In [19]:
# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams = teams.get_teams()

df_team = pd.DataFrame(teams.get_teams())

df_team.to_csv("datasets/teams.csv", index=False)

In [3]:
player_info = commonplayerinfo.CommonPlayerInfo(
                player_id=2,
                timeout=3)

In [4]:
def get_player_info_dataframe(df_player):
    """
    Retrieves common player information for all players in a DataFrame,
    returning a single concatenated DataFrame.

    Args:
        df_player (pd.DataFrame): DataFrame with a column named 'id'
                                  containing player IDs.

    Returns:
        pd.DataFrame: A DataFrame containing the combined player information.
                      Returns an empty DataFrame if no data is retrieved.
    """

    all_player_data =[]# Initialize an empty list to store DataFrames

    for player_id in df_player['id'][0:10]:
        try:
            player_info = commonplayerinfo.CommonPlayerInfo(
                player_id=player_id,
                timeout=3
            )
            player_df = player_info.common_player_info.get_data_frame()
            # Add a 'player_id' column to keep track of which player the data belongs to
            player_df['player_id'] = player_id
            all_player_data.append(player_df)
            print(f"Successfully retrieved info for player ID: {player_id}")
        except Exception as e:
            print(f"Error retrieving info for player ID: {player_id}: {e}")
            # Handle errors gracefully - you might want to log these
        time.sleep(3)

    if all_player_data:
        combined_df = pd.concat(all_player_data, ignore_index=True)
        return combined_df
    else:
        return pd.DataFrame()  # Return an empty DataFrame if no data


In [5]:
player_data_df = get_player_info_dataframe(df_player)

Successfully retrieved info for player ID: 2
Successfully retrieved info for player ID: 3
Successfully retrieved info for player ID: 7
Successfully retrieved info for player ID: 9
Successfully retrieved info for player ID: 12
Successfully retrieved info for player ID: 15
Successfully retrieved info for player ID: 17
Successfully retrieved info for player ID: 21
Successfully retrieved info for player ID: 22
Successfully retrieved info for player ID: 23


In [None]:
player_data_df.to_csv('datasets/player_data_df.csv')

## Continue to append missing player data into player_data_df

In [11]:
df_player = pd.read_csv('datasets/players.csv')

player_data_df = pd.read_csv('datasets/players_full.csv')

In [12]:
def append_missing_player_data(df_player, player_data_df):
    """
    Retrieves data for players missing from player_data_df (based on IDs
    in df_player) and updates player_data_df in place.
    If a ReadTimeoutError occurs, the loop breaks, and the current
    player_data_df is returned.

    Args:
        df_player (pd.DataFrame): DataFrame containing the complete list
                                  of player IDs in the 'id' column.
        player_data_df (pd.DataFrame): DataFrame containing the player
                                        data already retrieved.

    Returns:
        pd.DataFrame: The updated player_data_df with the appended data.
    """

    # 1. Identify Missing Player IDs
    missing_player_ids = df_player[~df_player['id'].isin(player_data_df['player_id'])]['id'][0:10]

    # 2. Iterate and Retrieve
    for player_id in missing_player_ids:
        try:
            player_info = commonplayerinfo.CommonPlayerInfo(
                player_id=player_id,
                timeout=3
            )
            player_df = player_info.common_player_info.get_data_frame()
            player_df['player_id'] = player_id  # Add player_id column
            player_data_df = pd.concat([player_data_df, player_df], ignore_index=True)
            print(f"Successfully retrieved and appended info for player ID: {player_id}")
        except requests.exceptions.ReadTimeout as e:  # Catch the ReadTimeoutError
            print(f"Error retrieving info for player ID: {player_id}: {e}")
            print(f"ReadTimeoutError encountered. Breaking the loop.")
            break  # Break the loop if a ReadTimeoutError occurs
        except Exception as e:
            print(f"Error retrieving info for player ID: {player_id}: {e}")
            # Handle other exceptions as needed
        time.sleep(1)

    return player_data_df

# --- Example Usage ---
# Assuming you have your DataFrames: df_player and player_data_df
player_data_df = append_missing_player_data(df_player, player_data_df)

player_data_df.to_csv('datasets/players_full.csv')

Successfully retrieved and appended info for player ID: 81
Successfully retrieved and appended info for player ID: 82
Successfully retrieved and appended info for player ID: 84
Successfully retrieved and appended info for player ID: 85
Successfully retrieved and appended info for player ID: 87
Successfully retrieved and appended info for player ID: 88
Successfully retrieved and appended info for player ID: 89
Successfully retrieved and appended info for player ID: 93
Successfully retrieved and appended info for player ID: 95
Successfully retrieved and appended info for player ID: 96


In [98]:
player_data_df

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FI_LAST,PLAYER_SLUG,BIRTHDATE,SCHOOL,COUNTRY,...,FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,GREATEST_75_FLAG,player_id
0,2,Byron,Scott,Byron Scott,"Scott, Byron",B. Scott,byron-scott,1961-03-28T00:00:00,Arizona State,USA,...,1983,1996,N,Y,Y,1983,1,4,N,2
1,3,Grant,Long,Grant Long,"Long, Grant",G. Long,grant-long,1966-03-12T00:00:00,Eastern Michigan,USA,...,1988,2002,N,Y,Y,1988,2,33,N,3
2,7,Dan,Schayes,Dan Schayes,"Schayes, Dan",D. Schayes,dan-schayes,1959-05-10T00:00:00,Syracuse,USA,...,1981,1998,N,Y,Y,1981,1,13,N,7
3,9,Sedale,Threatt,Sedale Threatt,"Threatt, Sedale",S. Threatt,sedale-threatt,1961-09-10T00:00:00,West Virginia Tech,USA,...,1983,1996,N,Y,Y,1983,6,139,N,9
4,12,Chris,King,Chris King,"King, Chris",C. King,chris-king,1969-07-24T00:00:00,Wake Forest,USA,...,1993,1998,N,Y,Y,1992,2,45,N,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,236,Kevin,Edwards,Kevin Edwards,"Edwards, Kevin",K. Edwards,kevin-edwards,1965-10-30T00:00:00,DePaul,USA,...,1988,2000,N,Y,Y,1988,1,20,N,236
146,238,Tyrone,Hill,Tyrone Hill,"Hill, Tyrone",T. Hill,tyrone-hill,1968-03-19T00:00:00,Xavier,USA,...,1990,2003,N,Y,Y,1990,1,11,N,238
147,239,Darrick,Martin,Darrick Martin,"Martin, Darrick",D. Martin,darrick-martin,1971-03-06T00:00:00,UCLA,USA,...,1994,2007,N,Y,Y,Undrafted,Undrafted,Undrafted,N,239
148,240,Brooks,Thompson,Brooks Thompson,"Thompson, Brooks",B. Thompson,brooks-thompson,1970-07-19T00:00:00,Oklahoma State,USA,...,1994,1997,N,Y,Y,1994,1,27,N,240


In [None]:
from nba_api.stats.endpoints import playercareerstats

# Anthony Davis
career = playercareerstats.PlayerCareerStats(player_id="203076")
career.get_data_frames()[0]

In [None]:

import time  # Import time for delays to avoid rate limits

def get_players_career_stats(player_ids):
    """
    Fetches and combines career statistics for multiple players.

    Args:
        player_ids (list): A list of player IDs.

    Returns:
        pandas.DataFrame: A DataFrame containing the combined career statistics.
    """

    all_stats = []
    for player_id in player_ids:
        try:
            career = playercareerstats.PlayerCareerStats(player_id=str(player_id))  # Convert player_id to string
            player_stats = career.get_data_frames()[0]
            all_stats.append(player_stats)
            time.sleep(1)  # Add a 1-second delay to avoid rate limits
        except Exception as e:
            print(f"Error fetching stats for player ID {player_id}: {e}")

    if all_stats:
        return pd.concat(all_stats, ignore_index=True)
    else:
        return pd.DataFrame()

# Assuming df_player is your DataFrame from the image
player_ids = df_player['id'].tolist() # Use 'id' column from your DataFrame

# Get the combined career statistics
career_stats_df = get_players_career_stats(player_ids)

# Print the combined DataFrame
print(career_stats_df)

      PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0             2   1983-84        00  1610612747               LAL        23.0   
1             2   1984-85        00  1610612747               LAL        24.0   
2             2   1985-86        00  1610612747               LAL        25.0   
3             2   1986-87        00  1610612747               LAL        26.0   
4             2   1987-88        00  1610612747               LAL        27.0   
...         ...       ...       ...         ...               ...         ...   
1222        165   1997-98        00  1610612745               HOU        35.0   
1223        165   1998-99        00  1610612745               HOU        36.0   
1224        165   1999-00        00  1610612745               HOU        37.0   
1225        165   2000-01        00  1610612745               HOU        38.0   
1226        165   2001-02        00  1610612761               TOR        39.0   

      GP  GS     MIN  FGM  