In [192]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import time
from collections import Counter
from tqdm import tqdm

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


In [193]:
def scraper_advanced_statistics(starting_year, ending_year):
    df = pd.DataFrame()
    
    for year in tqdm(range(starting_year, ending_year + 1), desc="Processing Years", unit = "year"):
        try:
            response = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_advanced.html')
            
            if response.status_code == 200:
                
                soup = BeautifulSoup(response.text, 'html.parser')

                # Find the table on the page
                table = soup.find('table')

                # Read the df
                df_season = pd.read_html(str(table))[0]
                df_season = df_season.drop(columns=['Unnamed: 19', "Unnamed: 24", "Rk"])
                df_season['Season'] = year # add season
                df_season.drop_duplicates(keep = False, inplace = True) # delete headers repeated
                
                # add the id cause it has to be read from another place, it's not in the table
                td_elements = soup.find_all('td', {'data-append-csv': True})

                # Estrai i valori dell'attributo data-append-csv
                ids = [td['data-append-csv'] for td in td_elements]
                df_season['ID'] = ids
                
                # Eliminiamo le righe relative ai giocatori che hanno cambiato squadra nell'arco della stagione,
                # mantenendo solo la riga relativa alle statistiche delle 2 o più squadre in cui ha militato.
                condition = (df_season['Tm'] != 'TOT') & df_season.duplicated(subset=['Player', 'Season'], keep=False)

                # Applica le condizioni utilizzando il metodo loc
                df_season = df_season.loc[~condition]
                
                # concat
                df = pd.concat([df, df_season])
                time.sleep(1)
            
            else:
                print(f"Failed to retrieve the page. Status code: {response.status_code}")

        except requests.exceptions.RequestException as e:
            # Handle exceptions that may occur during the request
            print(f"An error occurred during the request: {e}")
            
    return df

In [194]:
def scraper_basic_statistics(starting_year, ending_year):
    df = pd.DataFrame()
    
    for year in tqdm(range(starting_year, ending_year + 1), desc="Processing Years", unit = "year"):
        try:
            response = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html')
            
            if response.status_code == 200:
                
                soup = BeautifulSoup(response.text, 'html.parser')

                # Find the table on the page
                table = soup.find('table')

                # Read the df
                df_season = pd.read_html(str(table))[0]
                df_season = df_season.drop(columns=["Rk"])

                df_season['Season'] = year # add season
                df_season.drop_duplicates(keep = False, inplace = True) # delete headers repeated
                
                # add the id cause it has to be read from another place, it's not in the table
                td_elements = soup.find_all('td', {'data-append-csv': True})

                # Estrai i valori dell'attributo data-append-csv
                ids = [td['data-append-csv'] for td in td_elements]
                df_season['ID'] = ids
                
                # Eliminiamo le righe relative ai giocatori che hanno cambiato squadra nell'arco della stagione,
                # mantenendo solo la riga relativa alle statistiche delle 2 o più squadre in cui ha militato.
                condition = (df_season['Tm'] != 'TOT') & df_season.duplicated(subset=['Player', 'Season'], keep=False)

                # Applica le condizioni utilizzando il metodo loc
                df_season = df_season.loc[~condition]
                
                # concat
                df = pd.concat([df, df_season])
                time.sleep(1)
            
            else:
                print(f"Failed to retrieve the page. Status code: {response.status_code}")

        except requests.exceptions.RequestException as e:
            # Handle exceptions that may occur during the request
            print(f"An error occurred during the request: {e}")
            
    return df

In [195]:
df_2 = scraper_basic_statistics(2010, 2023)
df_2.info()

Processing Years: 100%|██████████| 14/14 [00:38<00:00,  2.76s/year]

<class 'pandas.core.frame.DataFrame'>
Index: 7056 entries, 0 to 704
Data columns (total 31 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Player  7056 non-null   object
 1   Pos     7056 non-null   object
 2   Age     7056 non-null   object
 3   Tm      7056 non-null   object
 4   G       7056 non-null   object
 5   GS      7056 non-null   object
 6   MP      7056 non-null   object
 7   FG      7056 non-null   object
 8   FGA     7056 non-null   object
 9   FG%     7022 non-null   object
 10  3P      7056 non-null   object
 11  3PA     7056 non-null   object
 12  3P%     6378 non-null   object
 13  2P      7056 non-null   object
 14  2PA     7056 non-null   object
 15  2P%     6984 non-null   object
 16  eFG%    7022 non-null   object
 17  FT      7056 non-null   object
 18  FTA     7056 non-null   object
 19  FT%     6746 non-null   object
 20  ORB     7056 non-null   object
 21  DRB     7056 non-null   object
 22  TRB     7056 non-null   object




In [196]:
df_2.head(20)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season,ID
0,Arron Afflalo,SG,24,DEN,82,75,27.1,3.3,7.1,0.465,1.3,3.0,0.434,2.0,4.1,0.488,0.557,0.9,1.2,0.735,0.7,2.4,3.1,1.7,0.6,0.4,0.9,2.7,8.8,2010,afflaar01
1,Alexis Ajinça,C,21,CHA,6,0,5.0,0.8,1.7,0.5,0.0,0.0,,0.8,1.7,0.5,0.5,0.0,0.2,0.0,0.2,0.5,0.7,0.0,0.2,0.2,0.3,0.8,1.7,2010,ajincal01
2,LaMarcus Aldridge,PF,24,POR,78,78,37.5,7.4,15.0,0.495,0.1,0.2,0.313,7.4,14.8,0.498,0.497,2.9,3.9,0.757,2.5,5.6,8.0,2.1,0.9,0.6,1.3,3.0,17.9,2010,aldrila01
3,Joe Alexander,SF,23,CHI,8,0,3.6,0.1,0.8,0.167,0.0,0.1,0.0,0.1,0.6,0.2,0.167,0.3,0.4,0.667,0.3,0.4,0.6,0.3,0.1,0.1,0.0,1.1,0.5,2010,alexajo01
4,Malik Allen,PF,31,DEN,51,3,8.9,0.9,2.3,0.397,0.0,0.1,0.167,0.9,2.2,0.409,0.401,0.2,0.3,0.923,0.7,0.9,1.6,0.3,0.2,0.1,0.4,1.3,2.1,2010,allenma01
5,Ray Allen*,SG,34,BOS,80,80,35.2,5.8,12.2,0.477,1.8,5.0,0.363,4.0,7.2,0.556,0.551,2.9,3.2,0.913,0.6,2.6,3.2,2.6,0.8,0.3,1.6,2.3,16.3,2010,allenra02
6,Tony Allen,SG,28,BOS,54,8,16.5,2.4,4.7,0.51,0.0,0.1,0.0,2.4,4.6,0.52,0.51,1.3,2.2,0.605,1.0,1.7,2.7,1.3,1.1,0.4,1.2,2.0,6.1,2010,allento01
7,Rafer Alston,PG,33,TOT,52,38,27.3,3.0,8.6,0.348,1.1,3.2,0.345,1.9,5.3,0.349,0.413,1.1,1.6,0.728,0.3,2.2,2.5,3.4,1.0,0.2,1.8,2.0,8.2,2010,alstora01
10,Lou Amundson,PF,27,PHO,79,0,14.8,1.9,3.5,0.551,0.0,0.0,0.0,1.9,3.5,0.553,0.551,0.8,1.6,0.545,1.6,2.8,4.4,0.4,0.3,0.9,0.7,2.1,4.7,2010,amundlo01
11,Chris Andersen,PF,31,DEN,76,0,22.3,1.9,3.3,0.566,0.0,0.0,0.0,1.9,3.3,0.573,0.566,2.2,3.1,0.695,1.9,4.4,6.4,0.4,0.6,1.9,0.8,2.3,5.9,2010,anderch01


In [197]:
starting_year = 2010
ending_year = 2023

df = scraper_advanced_statistics(starting_year=starting_year, ending_year=ending_year)
df.info()

Processing Years: 100%|██████████| 14/14 [00:37<00:00,  2.69s/year]

<class 'pandas.core.frame.DataFrame'>
Index: 7056 entries, 0 to 704
Data columns (total 28 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Player  7056 non-null   object
 1   Pos     7056 non-null   object
 2   Age     7056 non-null   object
 3   Tm      7056 non-null   object
 4   G       7056 non-null   object
 5   MP      7056 non-null   object
 6   PER     7054 non-null   object
 7   TS%     7023 non-null   object
 8   3PAr    7022 non-null   object
 9   FTr     7022 non-null   object
 10  ORB%    7054 non-null   object
 11  DRB%    7054 non-null   object
 12  TRB%    7054 non-null   object
 13  AST%    7054 non-null   object
 14  STL%    7054 non-null   object
 15  BLK%    7054 non-null   object
 16  TOV%    7027 non-null   object
 17  USG%    7054 non-null   object
 18  OWS     7056 non-null   object
 19  DWS     7056 non-null   object
 20  WS      7056 non-null   object
 21  WS/48   7054 non-null   object
 22  OBPM    7056 non-null   object




In [199]:
df[df['Player'] == "LeBron James"].tail(50)

Unnamed: 0,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Season,ID
296,LeBron James,SF,25,CLE,76,2966,31.1,0.604,0.253,0.506,3.0,18.5,11.1,41.8,2.2,2.0,12.3,33.5,13.3,5.2,18.5,0.299,9.1,2.8,11.8,10.3,2010,jamesle01
322,LeBron James,SF,26,MIA,79,3063,27.3,0.594,0.188,0.446,3.3,18.7,11.4,34.9,2.1,1.3,13.8,31.5,10.3,5.3,15.6,0.244,6.3,1.8,8.1,7.8,2011,jamesle01
280,LeBron James,SF,27,MIA,62,2326,30.7,0.605,0.127,0.429,5.0,19.7,12.6,33.6,2.6,1.7,13.3,32.0,10.0,4.5,14.5,0.298,8.2,2.7,10.9,7.6,2012,jamesle01
265,LeBron James,PF,28,MIA,76,2877,31.6,0.64,0.188,0.395,4.4,20.8,13.1,36.4,2.4,1.9,12.4,30.2,14.6,4.7,19.3,0.322,9.3,2.4,11.7,9.9,2013,jamesle01
307,LeBron James,PF,29,MIA,77,2902,29.3,0.649,0.226,0.432,3.6,18.9,11.5,32.0,2.2,0.8,14.4,31.0,12.3,3.7,15.9,0.264,7.8,1.1,8.8,7.9,2014,jamesle01
303,LeBron James,SF,30,CLE,69,2493,25.9,0.577,0.265,0.413,2.4,16.6,9.6,38.6,2.3,1.6,15.3,32.3,7.4,2.9,10.4,0.199,6.1,1.0,7.1,5.7,2015,jamesle01
276,LeBron James,SF,31,CLE,76,2709,27.5,0.588,0.199,0.347,4.7,18.8,11.8,36.0,2.0,1.5,13.2,31.4,9.6,4.0,13.6,0.242,7.0,2.0,9.0,7.5,2016,jamesle01
281,LeBron James,SF,32,CLE,74,2794,27.0,0.619,0.254,0.395,4.0,20.7,12.6,41.3,1.6,1.3,16.1,30.0,9.8,3.0,12.9,0.221,6.4,1.2,7.6,6.7,2017,jamesle01
316,LeBron James,PF,33,CLE,82,3026,28.6,0.621,0.257,0.336,3.7,22.3,13.1,44.4,1.9,2.0,16.1,31.6,11.0,3.0,14.0,0.221,7.3,1.4,8.7,8.2,2018,jamesle01
347,LeBron James,SF,34,LAL,55,1937,25.6,0.588,0.299,0.382,3.1,21.3,12.4,39.4,1.7,1.4,13.3,31.6,4.7,2.6,7.2,0.179,6.4,1.7,8.0,4.9,2019,jamesle01


In [220]:
merged_df = pd.merge(df_2, df[['PER', 'ID', 'Season', 'Player', 'WS', 'WS/48']], on=['ID', 'Season', 'Player'])
merged_df.sort_values(by=['ID', 'Season']).head(30)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season,ID,PER,WS,WS/48
3287,Álex Abrines,SG,23,OKC,68,6,15.5,2.0,5.0,0.393,1.4,3.6,0.381,0.6,1.4,0.426,0.531,0.6,0.7,0.898,0.3,1.0,1.3,0.6,0.5,0.1,0.5,1.7,6.0,2017,abrinal01,10.1,2.1,0.096
3773,Álex Abrines,SG,24,OKC,75,8,15.1,1.5,3.9,0.395,1.1,2.9,0.38,0.4,0.9,0.443,0.54,0.5,0.6,0.848,0.3,1.2,1.5,0.4,0.5,0.1,0.3,1.7,4.7,2018,abrinal01,9.0,2.2,0.094
4313,Álex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,0.357,1.3,4.1,0.323,0.5,1.0,0.5,0.487,0.4,0.4,0.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3,2019,abrinal01,6.3,0.6,0.053
5372,Precious Achiuwa,PF,21,MIA,61,4,12.1,2.0,3.7,0.544,0.0,0.0,0.0,2.0,3.7,0.546,0.544,0.9,1.8,0.509,1.2,2.2,3.4,0.5,0.3,0.5,0.7,1.5,5.0,2021,achiupr01,14.2,1.3,0.085
5912,Precious Achiuwa,C,22,TOR,73,28,23.6,3.6,8.3,0.439,0.8,2.1,0.359,2.9,6.1,0.468,0.486,1.1,1.8,0.595,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1,2022,achiupr01,12.7,2.5,0.07
6517,Precious Achiuwa,C,23,TOR,55,12,20.7,3.6,7.3,0.485,0.5,2.0,0.269,3.0,5.4,0.564,0.521,1.6,2.3,0.702,1.8,4.1,6.0,0.9,0.6,0.5,1.1,1.9,9.2,2023,achiupr01,15.2,2.2,0.093
1372,Quincy Acy,PF,22,TOR,29,0,11.8,1.4,2.6,0.56,0.0,0.1,0.5,1.4,2.5,0.562,0.567,1.1,1.3,0.816,1.0,1.6,2.7,0.4,0.4,0.5,0.6,1.8,4.0,2013,acyqu01,15.9,1.1,0.157
1839,Quincy Acy,SF,23,TOT,63,0,13.4,1.0,2.2,0.468,0.1,0.2,0.267,1.0,2.0,0.492,0.482,0.6,0.8,0.66,1.1,2.3,3.4,0.4,0.4,0.4,0.5,1.9,2.7,2014,acyqu01,10.1,1.5,0.086
2319,Quincy Acy,PF,24,NYK,68,22,18.9,2.2,4.9,0.459,0.3,0.9,0.3,2.0,4.0,0.494,0.486,1.1,1.4,0.784,1.2,3.3,4.4,1.0,0.4,0.3,0.9,2.2,5.9,2015,acyqu01,11.9,1.7,0.063
2811,Quincy Acy,PF,25,SAC,59,29,14.8,2.0,3.6,0.556,0.3,0.8,0.388,1.7,2.8,0.606,0.6,0.8,1.2,0.735,1.1,2.1,3.2,0.5,0.5,0.4,0.5,1.7,5.2,2016,acyqu01,14.7,2.5,0.137


In [201]:
def rookies_id(starting_year, ending_year):
    rookies_per_year = {}
    
    for year in tqdm(range(starting_year, ending_year + 1), desc="Processing Rookies", unit = "year"):
        try:
            response = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_rookies.html')
            
            if response.status_code == 200:
                
                soup = BeautifulSoup(response.text, 'html.parser')
                    
                td_elements = soup.find_all('td', {'data-append-csv': True})

                # Estrai i valori dell'attributo data-append-csv
                ids = [td['data-append-csv'] for td in td_elements]
                
                rookies_per_year[year] = ids
                time.sleep(1)
                
            else:
                print(f"Failed to retrieve the page. Status code: {response.status_code}")

        except requests.exceptions.RequestException as e:
            # Handle exceptions that may occur during the request
            print(f"An error occurred during the request: {e}")
            
    return rookies_per_year

In [202]:
rookies_id = rookies_id(starting_year=starting_year, ending_year=ending_year)

Processing Rookies: 100%|██████████| 14/14 [00:19<00:00,  1.40s/year]


Adding PER_mean columns which represents the value of PER of the entire career of a certain player in NBA.

In [221]:
merged_df['PER'] = merged_df['PER'].astype(float)

In [222]:
merged_df[merged_df['ID'] == "abrinal01"]

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season,ID,PER,WS,WS/48
3287,Álex Abrines,SG,23,OKC,68,6,15.5,2.0,5.0,0.393,1.4,3.6,0.381,0.6,1.4,0.426,0.531,0.6,0.7,0.898,0.3,1.0,1.3,0.6,0.5,0.1,0.5,1.7,6.0,2017,abrinal01,10.1,2.1,0.096
3773,Álex Abrines,SG,24,OKC,75,8,15.1,1.5,3.9,0.395,1.1,2.9,0.38,0.4,0.9,0.443,0.54,0.5,0.6,0.848,0.3,1.2,1.5,0.4,0.5,0.1,0.3,1.7,4.7,2018,abrinal01,9.0,2.2,0.094
4313,Álex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,0.357,1.3,4.1,0.323,0.5,1.0,0.5,0.487,0.4,0.4,0.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3,2019,abrinal01,6.3,0.6,0.053


In [223]:
mean_PER = merged_df.groupby('ID')['PER'].mean().round(2)
merged_df = pd.merge(merged_df, mean_PER, on='ID', how='left', suffixes=('', '_mean'))
merged_df[merged_df['ID'] == "abrinal01"].head(10)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season,ID,PER,WS,WS/48,PER_mean
3287,Álex Abrines,SG,23,OKC,68,6,15.5,2.0,5.0,0.393,1.4,3.6,0.381,0.6,1.4,0.426,0.531,0.6,0.7,0.898,0.3,1.0,1.3,0.6,0.5,0.1,0.5,1.7,6.0,2017,abrinal01,10.1,2.1,0.096,8.47
3773,Álex Abrines,SG,24,OKC,75,8,15.1,1.5,3.9,0.395,1.1,2.9,0.38,0.4,0.9,0.443,0.54,0.5,0.6,0.848,0.3,1.2,1.5,0.4,0.5,0.1,0.3,1.7,4.7,2018,abrinal01,9.0,2.2,0.094,8.47
4313,Álex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,0.357,1.3,4.1,0.323,0.5,1.0,0.5,0.487,0.4,0.4,0.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3,2019,abrinal01,6.3,0.6,0.053,8.47


Selecting only the rows relative to rookie's year for each player.

In [241]:
rookies_id

def inverted_rookies(dic):
    inverted_rookies = {}
    for key in dic.keys():
        for id in dic[key]:
            inverted_rookies[id] = key
    return inverted_rookies

dict_keys([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023])


In [251]:
inv_rookies = inverted_rookies(rookies_id)
inv_rookies = pd.DataFrame(list(inv_rookies.items()), columns=['ID', 'Season'])
inv_rookies.head(10)

Unnamed: 0,ID,Season
0,anderda03,2010
1,anderan02,2010
2,pendeje02,2010
3,beaubro01,2010
4,blairde01,2010
5,brockjo01,2010
6,brownde04,2010
7,budinch01,2010
8,carrode01,2010
9,casspom01,2010


In [256]:
rookies_df = pd.merge(merged_df, inv_rookies, on=['ID', 'Season'])
rookies_df.head(30)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season,ID,PER,WS,WS/48,PER_mean
0,David Andersen,C,29,HOU,63,0,14.1,2.3,5.4,0.432,0.4,1.2,0.346,1.9,4.2,0.458,0.472,0.7,1.1,0.687,0.9,2.4,3.3,0.7,0.2,0.2,0.6,1.9,5.8,2010,anderda03,12.1,1.3,0.07,11.5
1,Antonio Anderson,SF,24,OKC,1,0,15.0,1.0,3.0,0.333,0.0,0.0,,1.0,3.0,0.333,0.333,0.0,0.0,,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,2010,anderan02,-1.7,0.0,-0.104,-1.7
2,Jeff Ayres,C,22,POR,39,4,10.4,1.1,1.7,0.662,0.0,0.0,,1.1,1.7,0.662,0.662,0.5,0.5,0.9,0.6,1.9,2.5,0.0,0.2,0.4,0.3,1.9,2.7,2010,pendeje02,12.1,1.3,0.156,12.3
3,Rodrigue Beaubois,PG,21,DAL,56,16,12.5,2.8,5.3,0.518,0.8,2.0,0.409,2.0,3.4,0.582,0.594,0.8,0.9,0.808,0.2,1.2,1.4,1.3,0.5,0.2,1.0,1.5,7.1,2010,beaubro01,18.5,2.0,0.134,13.95
4,DeJuan Blair,PF,20,SAS,82,23,18.2,3.4,6.0,0.556,0.0,0.0,0.0,3.4,6.0,0.558,0.556,1.1,2.0,0.547,2.4,4.0,6.4,0.8,0.6,0.5,1.4,2.7,7.8,2010,blairde01,17.7,4.6,0.147,13.91
5,Jon Brockman,PF,22,SAC,52,4,12.6,1.1,2.0,0.534,0.0,0.0,,1.1,2.0,0.534,0.534,0.7,1.2,0.597,2.0,2.1,4.1,0.4,0.3,0.1,0.4,2.2,2.8,2010,brockjo01,12.4,1.7,0.125,9.53
6,Derrick Brown,SF,22,CHA,57,0,9.4,1.2,2.6,0.463,0.0,0.1,0.286,1.2,2.5,0.471,0.469,0.8,1.3,0.667,0.5,0.8,1.4,0.3,0.4,0.2,0.3,0.9,3.3,2010,brownde04,12.6,1.3,0.114,13.4
7,Chase Budinger,SF,21,HOU,74,4,20.1,3.4,7.6,0.441,1.2,3.4,0.369,2.1,4.3,0.497,0.522,0.9,1.2,0.77,0.5,2.5,3.0,1.2,0.5,0.1,0.6,1.1,8.9,2010,budinch01,14.1,3.1,0.1,12.63
8,DeMarre Carroll,SF,23,MEM,71,1,11.2,1.2,3.1,0.396,0.0,0.1,0.0,1.2,3.0,0.407,0.396,0.5,0.7,0.623,0.7,1.4,2.1,0.5,0.4,0.1,0.3,1.5,2.9,2010,carrode01,8.5,0.5,0.031,12.08
9,Omri Casspi,SF,21,SAC,77,31,25.1,3.9,8.7,0.446,1.0,2.6,0.369,2.9,6.1,0.48,0.502,1.5,2.3,0.672,1.0,3.6,4.5,1.2,0.7,0.2,1.3,1.6,10.3,2010,casspom01,13.0,2.5,0.063,13.12


In [257]:
rookies_df.shape

(1250, 35)