In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# NBA season we will be analyzing
year = 2019
# URL page we will scraping (see image above)
url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html".format(year)
# this is the HTML from the given URL
html = urlopen(url)
soup = BeautifulSoup(html)

In [3]:
# use findALL() to get the column headers
soup.findAll('tr', limit=2)
# use getText()to extract the text we need into a list
headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
# exclude the first column as we will not need the ranking order from Basketball Reference for the analysis
headers = headers[1:]
headers

['Player',
 'Pos',
 'Age',
 'Tm',
 'G',
 'GS',
 'MP',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'eFG%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'DRB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS']

In [4]:
# avoid the first header row
rows = soup.findAll('tr')[1:]
player_stats = [[td.getText() for td in rows[i].findAll('td')]
            for i in range(len(rows))]

In [5]:
stats = pd.DataFrame(player_stats, columns = headers)
stats

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Álex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,.357,...,.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3
1,Quincy Acy,PF,28,PHO,10,0,12.3,0.4,1.8,.222,...,.700,0.3,2.2,2.5,0.8,0.1,0.4,0.4,2.4,1.7
2,Jaylen Adams,PG,22,ATL,34,1,12.6,1.1,3.2,.345,...,.778,0.3,1.4,1.8,1.9,0.4,0.1,0.8,1.3,3.2
3,Steven Adams,C,25,OKC,80,80,33.4,6.0,10.1,.595,...,.500,4.9,4.6,9.5,1.6,1.5,1.0,1.7,2.6,13.9
4,Bam Adebayo,C,21,MIA,82,28,23.3,3.4,5.9,.576,...,.735,2.0,5.3,7.3,2.2,0.9,0.8,1.5,2.5,8.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
729,Tyler Zeller,C,29,MEM,4,1,20.5,4.0,7.0,.571,...,.778,2.3,2.3,4.5,0.8,0.3,0.8,1.0,4.0,11.5
730,Ante Žižić,C,22,CLE,59,25,18.3,3.1,5.6,.553,...,.705,1.8,3.6,5.4,0.9,0.2,0.4,1.0,1.9,7.8
731,Ivica Zubac,C,21,TOT,59,37,17.6,3.6,6.4,.559,...,.802,1.9,4.2,6.1,1.1,0.2,0.9,1.2,2.3,8.9
732,Ivica Zubac,C,21,LAL,33,12,15.6,3.4,5.8,.580,...,.864,1.6,3.3,4.9,0.8,0.1,0.8,1.0,2.2,8.5


In [8]:
teams = stats['Tm'].unique()

teams

array(['OKC', 'PHO', 'ATL', 'MIA', 'CLE', 'DEN', 'SAS', 'CHI', 'UTA',
       'BRK', 'NYK', 'POR', 'MEM', 'TOT', 'IND', 'MIL', 'DAL', 'HOU',
       None, 'TOR', 'WAS', 'ORL', 'CHO', 'SAC', 'LAL', 'MIN', 'BOS',
       'GSW', 'NOP', 'LAC', 'PHI', 'DET'], dtype=object)

In [6]:
OKC = stats.loc[(stats["Tm"] == "OKC"), ["Player", "Tm", "G", "PTS"]]
OKC

Unnamed: 0,Player,Tm,G,PTS
0,Álex Abrines,OKC,31,5.3
3,Steven Adams,OKC,80,13.9
105,Deonte Burton,OKC,32,2.6
170,Tyler Davis,OKC,1,0.0
183,Hamidou Diallo,OKC,51,3.7
215,Jawun Evans,OKC,1,0.0
224,Raymond Felton,OKC,33,4.3
225,Terrance Ferguson,OKC,74,6.9
249,Paul George,OKC,77,28.0
260,Jerami Grant,OKC,80,13.6


In [7]:
CLE = stats.loc[(stats["Tm"] == "CLE"), ["Player", "Tm", "G", "PTS"]]
CLE

Unnamed: 0,Player,Tm,G,PTS
5,Deng Adel,CLE,19,1.7
64,Jaron Blossomgame,CLE,27,4.2
103,Alec Burks,CLE,34,11.6
140,Marquese Chriss,CLE,27,5.7
143,Jordan Clarkson,CLE,81,16.8
173,Sam Dekker,CLE,9,6.3
178,Matthew Dellavedova,CLE,36,7.3
236,Channing Frye,CLE,36,3.6
288,Andrew Harrison,CLE,10,4.3
305,George Hill,CLE,13,10.8


In [10]:
teams_dict = {}
for team in teams:
    teams_dict[team] = stats.loc[(stats["Tm"] == team), ["Player", "Tm", "G", "PTS"]]
    
teams_dict['MEM']

Unnamed: 0,Player,Tm,G,PTS
14,Kyle Anderson,MEM,43,8.0
75,Avery Bradley,MEM,14,16.1
85,Dillon Brooks,MEM,18,7.5
86,MarShon Brooks,MEM,29,6.6
109,Bruno Caboclo,MEM,34,8.3
118,Jevon Carter,MEM,39,4.4
125,Omri Casspi,MEM,36,6.3
148,Mike Conley,MEM,70,21.1
191,Tyler Dorsey,MEM,21,9.8
242,Marc Gasol,MEM,53,15.7
