# NBA Players Data Scraping

In [1]:
from bs4 import BeautifulSoup as soup
from selenium import webdriver
from selenium.webdriver.support.ui import Select

In [2]:
PATH = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'

driver = webdriver.Chrome(PATH)
url = 'https://www.nba.com/players'
driver.get(url)

In [3]:
#Select show 'all page' on HTML page

showPage_xpath = r'/html/body/div[1]/div[2]/div[3]/section/div/div[2]/div[1]/div[7]/div/div[3]/div/label/div/select'
showPage = Select(driver.find_element_by_xpath(showPage_xpath))

In [4]:
#Show all players data in table
showPage.select_by_index(0)

In [5]:
#Get HTML page source
page = driver.page_source

In [6]:
page_soup = soup(page, 'html.parser')

In [7]:
rows_container = page_soup.findAll('tr')

Get table rows from HTML page

In [8]:
rows_container.pop(0) # remove the table header
rows_container

[<tr><td class="primary text RosterRow_primaryCol__19xPQ"><a class="flex items-center t6 Anchor_complexLink__2NtkO" href="/player/1630173/precious-achiuwa/"><div class="w-8 h-8 mr-2"><img alt="Precious Achiuwa Headshot" class="PlayerImage_image__1smob PlayerImage_round__281uY" loading="lazy" src="https://cdn.nba.com/headshots/nba/latest/260x190/1630173.png"/></div><div class="flex flex-col lg:flex-row"><p class="t6 mr-1">Precious</p><p class="t6">Achiuwa</p></div></a></td><td class="text"><a class="t6" href="/team/1610612748/heat/">MIA</a></td><td>5</td><td class="text">F</td><td>6-8</td><td class="text">225<!-- --> <!-- -->lbs</td><td class="text">Memphis</td><td class="text">Nigeria</td></tr>,
 <tr><td class="primary text RosterRow_primaryCol__19xPQ"><a class="flex items-center t6 Anchor_complexLink__2NtkO" href="/player/1629121/jaylen-adams/"><div class="w-8 h-8 mr-2"><img alt="Jaylen Adams Headshot" class="PlayerImage_image__1smob PlayerImage_round__281uY" loading="lazy" src="https

Data transformation

In [9]:
#Dictionary to translate initial team name to full team name


team_dict = {
    'PHI' : 'Philadelphia 76ers',
    'MIL' : 'Milwaukee Bucks',
    'CHI' : 'Chicago Bulls',
    'CLE' : 'Cleveland Cavaliers',
    'BOS' : 'Boston Celtics',
    'LAC' : 'LA Clippers',
    'MEM' : 'Memphis Grizzlies',
    'ATL' : 'Atlanta Hawks',
    'MIA' : 'Miami Heat',
    'CHA' : 'Charlotte Hornets',
    'UTA' : 'Utah Jazz',
    'SAC' : 'Sacramento Kings',
    'NYK' : 'New York Knicks',
    'LAL' : 'Los Angeles Lakers',
    'ORL' : 'Orlando Magic',
    'DAL' : 'Dallas Mavericks',
    'BKN' : 'Brooklyn Nets',
    'DEN' : 'Denver Nuggets',
    'IND' : 'Indiana Pacers',
    'NOP' : 'New Orleans Pelicans',
    'DET' : 'Detroit Pistons',
    'TOR' : 'Toronto Raptors',
    'HOU' : 'Houston Rockets',
    'SAS' : 'San Antonio Spurs',
    'PHX' : 'Phoenix Suns',
    'OKC' : 'Oklahoma Thunder',
    'MIN' : 'Minnesota Timberwolves',
    'POR' : 'Portland Trail Blazers',
    'GSW' : 'Golden State Wariors',
    'WAS' : 'Washington Wizards'
}

#for transforming weight data from string to float
def transformPlayersHeightData(heightString) :
    #transform heightString. ex: '6-10'(string) to 6.10 (float)
    height = float(heightString.replace('-', '.'))
    #convert foot to cm
    return round(height*30.48, 1)

#for transforming weight data from string to int
def transformPlayersWeightData(weightSring) :
    #transform weightString. ex '255 lbs'(string) to 255 (int)
    weight = int(weightSring.replace(' lbs', ''))
    
    #convert pound to kg
    return int(weight/2.205)

Scraping

In [13]:
players_container = {} #to store all players data

for data in rows_container :
    #get name
    first_name = data.findAll('p')[0].text
    last_name = data.findAll('p')[1].text
    full_name = first_name + ' ' + last_name
    
    #get team
    try :
        team_initial = data.findAll('a', {'class' : 't6'})[1].text
        team = team_dict[team_initial]
    except :
        team = None
    
    #get number
    try :
        number = int(data.findAll('td')[2].text)
    except :
        number = None
    
    #get position
    position = data.findAll('td', {'class' : 'text'})[2].text
    
    #get height
    try :
        height_raw = data.findAll('td')[4].text
        height = transformPlayersHeightData(height_raw)
    except :
        height = None
        
    #get weight
    try :
        weight_raw = data.findAll('td', {'class' : 'text'})[3].text
        weight = transformPlayersWeightData(weight_raw)
    except :
        weight = None

    #get school
    school = data.findAll('td', {'class' : 'text'})[4].text

    #get country
    country = data.findAll('td', {'class' : 'text'})[5].text
    
    #insert all players data to container object
    players_container[full_name] = {
        'name' : full_name,
        'team' : team,
        'number' : number,
        'position' : position,
        'height' : height,
        'weight' : weight,
        'school' : school,
        'country' : country
    }
    

In [14]:
players_container

{'Precious Achiuwa': {'name': 'Precious Achiuwa',
  'team': 'Miami Heat',
  'number': 5,
  'position': 'F',
  'height': 207.3,
  'weight': 102,
  'school': 'Memphis',
  'country': 'Nigeria'},
 'Jaylen Adams': {'name': 'Jaylen Adams',
  'team': None,
  'number': None,
  'position': 'G',
  'height': 182.9,
  'weight': 102,
  'school': 'St. Bonaventure',
  'country': 'USA'},
 'Steven Adams': {'name': 'Steven Adams',
  'team': 'New Orleans Pelicans',
  'number': 12,
  'position': 'C',
  'height': 186.2,
  'weight': 120,
  'school': 'Pittsburgh',
  'country': 'New Zealand'},
 'Bam Adebayo': {'name': 'Bam Adebayo',
  'team': 'Miami Heat',
  'number': 13,
  'position': 'C-F',
  'height': 210.3,
  'weight': 115,
  'school': 'Kentucky',
  'country': 'USA'},
 'LaMarcus Aldridge': {'name': 'LaMarcus Aldridge',
  'team': None,
  'number': None,
  'position': 'C-F',
  'height': 186.2,
  'weight': 113,
  'school': 'Texas-Austin',
  'country': 'USA'},
 'Ty-Shon Alexander': {'name': 'Ty-Shon Alexander

In [15]:
len(players_container)

546

Dump data to json

In [18]:
import json

In [19]:
final_json = []

for player in players_container :
    final_json.append(players_container[player])
    
final_json

[{'name': 'Precious Achiuwa',
  'team': 'Miami Heat',
  'number': 5,
  'position': 'F',
  'height': 207.3,
  'weight': 102,
  'school': 'Memphis',
  'country': 'Nigeria'},
 {'name': 'Jaylen Adams',
  'team': None,
  'number': None,
  'position': 'G',
  'height': 182.9,
  'weight': 102,
  'school': 'St. Bonaventure',
  'country': 'USA'},
 {'name': 'Steven Adams',
  'team': 'New Orleans Pelicans',
  'number': 12,
  'position': 'C',
  'height': 186.2,
  'weight': 120,
  'school': 'Pittsburgh',
  'country': 'New Zealand'},
 {'name': 'Bam Adebayo',
  'team': 'Miami Heat',
  'number': 13,
  'position': 'C-F',
  'height': 210.3,
  'weight': 115,
  'school': 'Kentucky',
  'country': 'USA'},
 {'name': 'LaMarcus Aldridge',
  'team': None,
  'number': None,
  'position': 'C-F',
  'height': 186.2,
  'weight': 113,
  'school': 'Texas-Austin',
  'country': 'USA'},
 {'name': 'Ty-Shon Alexander',
  'team': 'Phoenix Suns',
  'number': 0,
  'position': 'G',
  'height': 192.0,
  'weight': 88,
  'school':

In [20]:
dump_json = json.dumps(final_json)

with open (r"D:\Desktop\Sandbox\webscrap\NBAPlayers.json", "w") as f :
    f.write(dump_json)