In [1]:
import requests
import numpy as np
import pandas as pd

In [2]:
class CreateNBAData:
    """
    This class scrapes NBA.com offical api: data.nba.net.
    See https://data.nba.net/10s/prod/v1/today.json
    Args:
        season_year (int): Use the first year to specify season, e.g. specify 2019 for the 2019-2020 season.
    """
    def __init__(self, season_year):
        self._season_year = str(season_year)

    def create_players_df(self):
        """
        This function returns the DataFrame of player information.
        """
        request_url = "https://data.nba.net/prod/v1/{}/players.json".format(self._season_year)
        resp_dict = requests.get(request_url).json()
        players_list = resp_dict['league']['standard']
        players_list_dict = []
        print("Creating players df...")
        for p in players_list:
            player_dict = {}
            for k, v in p.items():
                if isinstance(v, str) or isinstance(v, bool):
                    player_dict[k] = v
            players_list_dict.append(player_dict)
        df = pd.DataFrame(players_list_dict)
        filtered_df = df[(df['isActive']) & (df['heightMeters'] != '')]
        filtered_df = filtered_df.reset_index(drop=True)
        self._person_ids = filtered_df['personId'].values
        return filtered_df

    def create_stats_df(self):
        """
        This function returns the DataFrame of player career statistics.
        """
        self.create_players_df()
        career_summaries = []
        print("Creating player stats df...")
        for pid in self._person_ids:
            request_url = "https://data.nba.net/prod/v1/{}/players/{}_profile.json".format(self._season_year, pid)
            response = requests.get(request_url)
            profile_json = response.json()
            career_summary = profile_json['league']['standard']['stats']['careerSummary']
            career_summaries.append(career_summary)
        stats_df = pd.DataFrame(career_summaries)
        stats_df.insert(0, 'personId', self._person_ids)
        return stats_df
    
    def create_teams_df(self):
        request_url = 'https://data.nba.net/prod/v2/{}/teams.json'.format(self._season_year)
        response = requests.get(request_url)
        teams_json = response.json()
        teams = teams_json['league']['standard']
        nba_teams = []
        for t in teams:
            if t['isNBAFranchise']:
                nba_teams.append(t)
        teams_df = pd.DataFrame(nba_teams)
        self._team_urls = teams_df['urlName']
        return teams_df
    
    def create_roster_df(self):
        roster_df = pd.DataFrame()
        for team_url in self._team_urls:
            print("Scraping {}...".format(team_url))
            request_url = 'https://data.nba.net/prod/v1/{}/teams/{}/roster.json'.format(self._season_year, team_url)
            response = requests.get(request_url)
            roster_json = response.json()
            roster = roster_json['league']['standard']['players']
            team_id = roster_json['league']['standard']['teamId']
            person_ids = [player['personId'] for player in roster]
            team_df = pd.DataFrame()
            team_df['personId'] = person_ids
            team_df['teamId'] = team_id
            roster_df = roster_df.append(team_df)
        return roster_df

In [3]:
cnd = CreateNBAData(2019)
players = cnd.create_players_df()
careerSummaries = cnd.create_stats_df()
teams = cnd.create_teams_df()
rosters = cnd.create_roster_df()

Creating players df...
Creating players df...
Creating player stats df...
Scraping hawks...
Scraping celtics...
Scraping nets...
Scraping hornets...
Scraping bulls...
Scraping cavaliers...
Scraping mavericks...
Scraping nuggets...
Scraping pistons...
Scraping warriors...
Scraping rockets...
Scraping pacers...
Scraping clippers...
Scraping lakers...
Scraping grizzlies...
Scraping heat...
Scraping bucks...
Scraping timberwolves...
Scraping pelicans...
Scraping knicks...
Scraping thunder...
Scraping magic...
Scraping sixers...
Scraping suns...
Scraping blazers...
Scraping kings...
Scraping spurs...
Scraping raptors...
Scraping jazz...
Scraping wizards...


In [4]:
teams

Unnamed: 0,isNBAFranchise,isAllStar,city,altCityName,fullName,tricode,teamId,nickname,urlName,teamShortName,confName,divName
0,True,False,Atlanta,Atlanta,Atlanta Hawks,ATL,1610612737,Hawks,hawks,Atlanta,East,Southeast
1,True,False,Boston,Boston,Boston Celtics,BOS,1610612738,Celtics,celtics,Boston,East,Atlantic
2,True,False,Brooklyn,Brooklyn,Brooklyn Nets,BKN,1610612751,Nets,nets,Brooklyn,East,Atlantic
3,True,False,Charlotte,Charlotte,Charlotte Hornets,CHA,1610612766,Hornets,hornets,Charlotte,East,Southeast
4,True,False,Chicago,Chicago,Chicago Bulls,CHI,1610612741,Bulls,bulls,Chicago,East,Central
5,True,False,Cleveland,Cleveland,Cleveland Cavaliers,CLE,1610612739,Cavaliers,cavaliers,Cleveland,East,Central
6,True,False,Dallas,Dallas,Dallas Mavericks,DAL,1610612742,Mavericks,mavericks,Dallas,West,Southwest
7,True,False,Denver,Denver,Denver Nuggets,DEN,1610612743,Nuggets,nuggets,Denver,West,Northwest
8,True,False,Detroit,Detroit,Detroit Pistons,DET,1610612765,Pistons,pistons,Detroit,East,Central
9,True,False,Golden State,Golden State,Golden State Warriors,GSW,1610612744,Warriors,warriors,Golden State,West,Pacific


In [5]:
rosters

Unnamed: 0,personId,teamId
0,1627761,1610612737
1,1629718,1610612737
2,203991,1610612737
3,1713,1610612737
4,1628381,1610612737
...,...,...
13,1629678,1610612764
14,202397,1610612764
15,1629021,1610612764
16,202322,1610612764


In [6]:
players

Unnamed: 0,firstName,lastName,temporaryDisplayName,personId,teamId,jersey,isActive,pos,heightFeet,heightInches,heightMeters,weightPounds,weightKilograms,dateOfBirthUTC,nbaDebutYear,yearsPro,collegeName,lastAffiliation,country,isallStar
0,Jaylen,Adams,"Adams, Jaylen",1629121,1610612757,20,True,G,6,0,1.83,225,102.1,1996-05-04,2018,1,St. Bonaventure,St. Bonaventure/USA,USA,
1,Steven,Adams,"Adams, Steven",203500,1610612760,12,True,C,6,11,2.11,265,120.2,1993-07-20,2013,6,Pittsburgh,Pittsburgh/New Zealand,New Zealand,
2,Bam,Adebayo,"Adebayo, Bam",1628389,1610612748,13,True,C-F,6,9,2.06,255,115.7,1997-07-18,2017,2,Kentucky,Kentucky/USA,USA,
3,LaMarcus,Aldridge,"Aldridge, LaMarcus",200746,1610612759,12,True,C-F,6,11,2.11,250,113.4,1985-07-19,2006,13,Texas,Texas/USA,USA,
4,Kyle,Alexander,"Alexander, Kyle",1629734,1610612748,17,True,F-C,6,10,2.08,216,98.0,1996-10-21,,0,Tennessee,Tennessee/Canada,Canada,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,Trae,Young,"Young, Trae",1629027,1610612737,11,True,G,6,1,1.85,180,81.6,1998-09-19,2018,1,Oklahoma,University of Oklahoma/USA,USA,
500,Cody,Zeller,"Zeller, Cody",203469,1610612766,40,True,F-C,7,0,2.13,240,108.9,1992-10-05,2013,6,Indiana,Indiana/USA,USA,
501,Tyler,Zeller,"Zeller, Tyler",203092,1610612759,45,True,F-C,6,11,2.11,250,113.4,1990-01-17,2012,7,North Carolina,North Carolina/USA,USA,
502,Ante,Zizic,"Zizic, Ante",1627790,1610612739,41,True,C,6,10,2.08,266,120.7,1997-01-04,2017,2,,Darussafaka (Turkey)/Croatia,Croatia,


In [7]:
careerSummaries

Unnamed: 0,personId,tpp,ftp,fgp,ppg,rpg,apg,bpg,mpg,spg,...,ftm,fta,pFouls,points,gamesPlayed,gamesStarted,plusMinus,min,dd2,td3
0,1629121,33.8,77.8,34.5,3.2,1.8,1.9,0.2,12.6,0.4,...,7,9,45,108,34,1,-57,427,0,0
1,203500,10,55.8,58.9,9.8,7.6,1.2,1,26.8,0.9,...,867,1555,1373,5140,525,461,1614,14092,111,0
2,1628389,11.4,71.1,55.8,10.5,7.7,2.9,0.9,25.5,0.8,...,531,747,505,2259,216,112,197,5515,57,3
3,200746,31.2,81.1,49.1,19.5,8.3,2,1.1,34.4,0.7,...,3579,4414,2473,19599,1003,962,2799,34517,351,0
4,1629734,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,1629027,34.4,84.7,42.8,23.6,3.9,8.6,0.2,32.8,1,...,824,973,244,3327,141,141,-562,4623,58,3
500,203469,24.8,73.1,51.3,8.6,6,1.4,0.7,22.9,0.7,...,875,1197,1104,3611,419,252,151,9613,29,0
501,203092,28.6,76.4,50.9,7,4.4,0.9,0.6,17.6,0.2,...,541,708,897,2871,412,166,-415,7249,15,0
502,1627790,0,71.1,58.1,6,3.9,0.6,0.4,13.4,0.2,...,128,180,170,674,113,27,-105,1516,5,0


In [8]:
players.to_csv('players.csv', index=False)
teams.to_csv('teams.csv', index=False)
rosters.to_csv('rosters.csv', index=False)
careerSummaries.to_csv('careerSummaries.csv', index=False)