# Getting Player's Data

In [1]:
import re                       # Regular Expressison Library
import time                     # To introduce time delay
import json
import requests                 # To get the content of the web page
import pandas as pd             # Library to create Dataframe
from tqdm import tqdm
from bs4 import BeautifulSoup, Comment   # The data scrapping library

In [2]:
# Base URL
base_url = 'https://www.basketball-reference.com'

In [3]:
# Reading the team urls from the file
with open('.\\data\\season_urls.json', 'r', encoding='utf8') as file_obj:
    season_urls = json.load(file_obj or {})

### Player's Name and URLs

In [4]:
# Set to store player's name and URL
player_urls = {}

pbar = tqdm(total=len(season_urls)*14, desc='Progress')

for season in season_urls:
    pbar.set_description(season)
    
    for url in season_urls.get(season):
        pbar.update(1)
        
        # Get request to each team page
        response = requests.get(url)

        # Create Beautiful Soup Object
        soup = BeautifulSoup(response.content, 'lxml')
        
        # Getting roster table from the website
        roster = soup.find('div', {'id': 'div_roster'}).tbody.find_all('td', {'data-stat': 'player'})
        
        # Adding player name and URLs to the dictionary 
        for player in roster:
            name = re.sub('\s{2,}\(TW\)', '', player.text.strip())
            player_urls[player.text.strip()] = base_url+player.a['href']

        # Due to rate limit
        time.sleep(2)

# Writing the player URLs into a file
with open('.\\data\\player_urls.json', 'w', encoding='utf8') as file_obj:
    json.dump(player_urls, file_obj)

### Player's Statistics

In [5]:
pbar = tqdm(total=len(player_urls), desc='Player Name')

for player in player_urls:
    pbar.set_description(player)
    pbar.update(1)
    
    # Getting the player page data
    response = requests.get(player_urls.get(player))
    
    # Creating Beautiful Soup object
    adv_soup = BeautifulSoup(response.content, 'lxml')
            
    # Storing the Advanced Stats into a Pandas' Dataframe
    adv = pd.read_html(str(adv_soup.find(id='advanced')))[0]
    adv.drop(['Season', 'Age', 'Tm', 'Lg', 'Pos', 'G', 'MP'], axis=1, inplace=True)
    
    stats_soup = BeautifulSoup('\n'.join(adv_soup.find_all(string=Comment)), 'lxml')
    
    # Storing the stats into a Pandas' Dataframe
    stats = pd.read_html(str(stats_soup.find(id='per_poss')))[0]

    # Creating a final dataframe
    total_stats = pd.concat([stats, adv], axis=1)
    total_stats.dropna(axis=1, how='all', inplace=True)
    
    file_name = re.sub('\.\s|\.|\s|-', '_', player.strip().lower())
    
    # Writing the player's stats into a CSV file
    total_stats.to_csv('.\\data\\stats\\'+file_name+'.csv', index=False)
    
    # Due to rate limitation
    time.sleep(2)