In [28]:
import pandas as pd
import requests # data scripting
pd.set_option('display.max_columns', None) # so we can see all columns in a wide DataFrame
import time
import numpy as np

In [42]:
test_url = 'https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=Totals&Scope=S&Season=2023-24&SeasonType=Regular%20Season&StatCategory=PTS'
r = requests.get(url=test_url).json()
table_headers = r['resultSet']['headers']
table_headers

['PLAYER_ID',
 'RANK',
 'PLAYER',
 'TEAM_ID',
 'TEAM',
 'GP',
 'MIN',
 'FGM',
 'FGA',
 'FG_PCT',
 'FG3M',
 'FG3A',
 'FG3_PCT',
 'FTM',
 'FTA',
 'FT_PCT',
 'OREB',
 'DREB',
 'REB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS',
 'EFF',
 'AST_TOV',
 'STL_TOV']

In [46]:
df_cols = ['Year', 'Season_type'] + table_headers

In [47]:
pd.DataFrame(columns=df_cols)

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV


In [48]:
headers = {
    'accept': '*/*',
    'accept-encoding': 'gzip, deflate, br, zstd',
    'accept-language': 'en-US,en;q=0.9',
    'connection': 'keep-alive',
    'host': 'stats.nba.com',
    'origin': 'https://www.nba.com',
    'referer': 'https://www.nba.com/',
    'sec-ch-ua':
    '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
    'sec-ch-ua-mobile': '?1',
    'sec-ch-ua-platform': '"Android"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Mobile Safari/537.36'
}

In [49]:
df = pd.DataFrame(columns=df_cols)
season_types = ['Regular%20Season', 'Playoffs']
years = ['2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']

begin_loop = time.time()

for y in years:
    for s in season_types:
        api_url = 'https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=Totals&Scope=S&Season='+y+'&SeasonType='+s+'&StatCategory=PTS'
        r = requests.get(url=api_url, headers=headers).json()
        temp_df1 = pd.DataFrame(r['resultSet']['rowSet'], columns=table_headers)
        temp_df2 = pd.DataFrame({'Year':[y for i in range(len(temp_df1))],
                                 'Season_type':[s for i in range(len(temp_df1))]})
        temp_df3 = pd.concat([temp_df2,temp_df1], axis=1)
        df = pd.concat([df, temp_df3], axis=0)
        print(f'Finished scraping data for the {y} {s}.')
        lag = np.random.uniform(low=5,high=20)
        print(f'...waiting {round(lag,1)} seconds')
        time.sleep(lag)
        
print(f'Process completed! Total run time: {round((time.time()-begin_loop)/60,2)}')
df.to_excel('nba_player_data.xlsx', index=False)

Finished scraping data for the 2014-15 Regular%20Season.
...waiting 12.8 seconds
Finished scraping data for the 2014-15 Playoffs.
...waiting 9.9 seconds
Finished scraping data for the 2015-16 Regular%20Season.
...waiting 10.7 seconds
Finished scraping data for the 2015-16 Playoffs.
...waiting 5.3 seconds
Finished scraping data for the 2016-17 Regular%20Season.
...waiting 18.7 seconds
Finished scraping data for the 2016-17 Playoffs.
...waiting 10.4 seconds
Finished scraping data for the 2017-18 Regular%20Season.
...waiting 14.2 seconds
Finished scraping data for the 2017-18 Playoffs.
...waiting 6.2 seconds
Finished scraping data for the 2018-19 Regular%20Season.
...waiting 13.8 seconds
Finished scraping data for the 2018-19 Playoffs.
...waiting 11.4 seconds
Finished scraping data for the 2019-20 Regular%20Season.
...waiting 17.1 seconds
Finished scraping data for the 2019-20 Playoffs.
...waiting 18.6 seconds
Finished scraping data for the 2020-21 Regular%20Season.
...waiting 16.1 second

In [50]:
df

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,2014-15,Regular%20Season,201935,1,James Harden,1610612745,HOU,81,2981,647,1470,0.440,208,555,0.375,715,824,0.868,75,384,459,565,154,60,321,208,2217,2202,1.76,0.48
1,2014-15,Regular%20Season,201939,2,Stephen Curry,1610612744,GSW,80,2613,653,1341,0.487,286,646,0.443,308,337,0.914,56,285,341,619,163,16,249,158,1900,2073,2.49,0.66
2,2014-15,Regular%20Season,201566,3,Russell Westbrook,1610612760,OKC,67,2302,627,1471,0.426,86,288,0.299,546,654,0.835,124,364,488,574,140,14,293,184,1886,1857,1.96,0.48
3,2014-15,Regular%20Season,2544,4,LeBron James,1610612739,CLE,69,2493,624,1279,0.488,120,339,0.354,375,528,0.710,51,365,416,511,109,49,272,135,1743,1748,1.88,0.40
4,2014-15,Regular%20Season,203081,5,Damian Lillard,1610612757,POR,82,2925,590,1360,0.434,196,572,0.343,344,398,0.864,49,329,378,507,97,21,222,164,1720,1677,2.28,0.44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,2023-24,Playoffs,1641765,198,Olivier-Maxence Prosper,1610612742,DAL,3,9,0,2,0.000,0,0,0.000,0,0,0.000,0,3,3,1,0,0,0,0,0,2,0.00,0.00
210,2023-24,Playoffs,1631115,198,Orlando Robinson,1610612748,MIA,1,2,0,1,0.000,0,1,0.000,0,0,0.000,0,1,1,1,0,0,0,0,0,1,0.00,0.00
211,2023-24,Playoffs,203933,198,T.J. Warren,1610612750,MIN,3,11,0,2,0.000,0,1,0.000,0,0,0.000,2,1,3,1,0,0,0,0,0,2,0.00,0.00
212,2023-24,Playoffs,201152,198,Thaddeus Young,1610612756,PHX,1,4,0,0,0.000,0,0,0.000,0,0,0.000,0,0,0,0,0,0,0,0,0,0,0.00,0.00
