In [8]:
import pandas as pd
import html5lib
import time
from datetime import datetime
import os

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [74]:
def initialize_selenium(URL):
    # initialize selenium
    chromedriver = "/Applications/chromedriver" 
    os.environ["webdriver.chrome.driver"] = chromedriver
    driver = webdriver.Chrome(chromedriver)
    driver.get(URL)
    
    return driver  

# Generate dictionary to store our data per year
def data_to_dict(years):
    """
    Generate Dictionary that will store our data per year in this format:
    
    Key (Year): Value (Data)
    
    years: int indicating how many years of data will be stored
    """
    data = {}
    CURRENT_YEAR = int(datetime.now().year)
    years_label = range(CURRENT_YEAR,CURRENT_YEAR-years,-1)
    
    return years_label, data
    
    
def download_player_data(URL, years, type_data):
    
    years_label, data = data_to_dict(years)
    driver = initialize_selenium(URL)
    wait = WebDriverWait(driver, 10)
    
    # get to the current season stats, this may have changed
    tab = driver.find_elements_by_id("header_leagues")
    hover = ActionChains(driver).move_to_element(tab[0])
    hover.perform()
    wait.until(EC.visibility_of_element_located((By.LINK_TEXT, type_data))).click()
    
    for i in years_label:
        df = pd.read_html(driver.current_url)[0]
        df = df[df.Rk != 'Rk']
        data[i]=df
        prev_year = driver.find_element_by_css_selector("a.button2.prev")
        prev_year.click()
        time.sleep(10)
    
    driver.quit()
    
    return data

In [69]:
data1=download_player_data("https://www.basketball-reference.com",12,"Per G")

In [72]:
for i,j in data1.items():
    temp = j 
    temp['total'] = (temp['Tm'] == 'TOT')
    temp = temp.sort_values('total', ascending=False).drop_duplicates(['Player','Age']).drop('total', 1)
    print("For Year "+str(i)+": "+str(temp.shape[0])+" rows")
    data1[i]=temp


For Year 2019: 530 rows
For Year 2018: 540 rows
For Year 2017: 486 rows
For Year 2016: 476 rows
For Year 2015: 492 rows
For Year 2014: 482 rows
For Year 2013: 469 rows
For Year 2012: 478 rows
For Year 2011: 452 rows
For Year 2010: 442 rows
For Year 2009: 445 rows
For Year 2008: 451 rows


In [75]:
data2=download_player_data("https://www.basketball-reference.com",12,"Advanced")

In [80]:
data22=data2
for i,j in data22.items():
    temp = j 
    temp['total'] = (temp['Tm'] == 'TOT')
    temp = temp.sort_values('total', ascending=False).drop_duplicates(['Player','Age']).drop('total', 1)
    print("For Year "+str(i)+": "+str(temp.shape[0])+" rows")
    data22[i]=temp


For Year 2019: 530 rows
For Year 2018: 540 rows
For Year 2017: 486 rows
For Year 2016: 476 rows
For Year 2015: 492 rows
For Year 2014: 482 rows
For Year 2013: 469 rows
For Year 2012: 478 rows
For Year 2011: 452 rows
For Year 2010: 442 rows
For Year 2009: 445 rows
For Year 2008: 451 rows


In [82]:
data1[2019].sort_values('Player').head(15)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
255,189,Aaron Gordon,PF,23,ORL,78,78,33.8,6.0,13.4,...,0.731,1.7,5.7,7.4,3.7,0.7,0.7,2.1,2.2,16.0
308,229,Aaron Holiday,PG,22,IND,50,0,12.9,2.1,5.2,...,0.82,0.1,1.2,1.3,1.7,0.4,0.3,0.8,1.4,5.9
515,371,Abdel Nader,SF,25,OKC,61,1,11.4,1.5,3.5,...,0.75,0.2,1.7,1.9,0.3,0.3,0.2,0.4,1.1,4.0
319,236,Al Horford,C,32,BOS,68,68,29.0,5.7,10.6,...,0.821,1.8,5.0,6.7,4.2,0.9,1.3,1.5,1.9,13.6
12,13,Al-Farouq Aminu,PF,28,POR,81,81,28.3,3.2,7.3,...,0.867,1.4,6.1,7.5,1.3,0.8,0.4,0.9,1.8,9.4
705,511,Alan Williams,PF,26,BRK,5,0,5.2,1.6,2.6,...,0.5,0.8,3.0,3.8,0.6,0.2,0.0,0.2,0.4,3.6
101,82,Alec Burks,SG,27,TOT,64,24,21.5,3.0,7.4,...,0.823,0.5,3.2,3.7,2.0,0.6,0.3,1.0,1.4,8.8
0,1,Alex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,...,0.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3
124,95,Alex Caruso,PG,24,LAL,25,4,21.2,3.1,6.9,...,0.797,0.8,1.9,2.7,3.1,1.0,0.4,1.7,2.2,9.2
410,300,Alex Len,C,25,ATL,77,31,20.1,4.2,8.4,...,0.648,2.1,3.5,5.5,1.1,0.4,0.9,1.3,2.6,11.1


In [83]:
data22[2019].sort_values('Player').head(15)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,...,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
255,189,Aaron Gordon,PF,23,ORL,78,2633,15.1,0.538,0.332,...,,1.8,3.3,5.1,0.093,,-0.2,1.2,1.0,2.0
308,229,Aaron Holiday,PG,22,IND,50,646,12.0,0.518,0.485,...,,0.1,0.8,0.9,0.065,,-1.5,-1.1,-2.6,-0.1
515,371,Abdel Nader,SF,25,OKC,61,694,8.8,0.522,0.465,...,,0.0,0.9,0.9,0.062,,-4.2,-0.9,-5.1,-0.5
319,236,Al Horford,C,32,BOS,68,1973,20.2,0.605,0.281,...,,4.5,2.9,7.5,0.181,,2.1,2.7,4.8,3.4
12,13,Al-Farouq Aminu,PF,28,POR,81,2292,13.2,0.568,0.472,...,,3.0,2.8,5.8,0.121,,0.1,0.8,0.9,1.7
705,511,Alan Williams,PF,26,BRK,5,26,32.9,0.61,0.077,...,,0.1,0.1,0.2,0.313,,3.1,2.7,5.8,0.1
101,82,Alec Burks,SG,27,TOT,64,1375,12.7,0.523,0.354,...,,0.7,0.8,1.5,0.052,,-1.5,-0.6,-2.2,-0.1
0,1,Alex Abrines,SG,25,OKC,31,588,6.3,0.507,0.809,...,,0.1,0.6,0.6,0.053,,-2.4,-0.9,-3.4,-0.2
124,95,Alex Caruso,PG,24,LAL,25,531,13.9,0.569,0.289,...,,0.5,0.6,1.0,0.093,,-0.9,0.0,-0.9,0.2
410,300,Alex Len,C,25,ATL,77,1544,17.2,0.575,0.315,...,,2.2,1.0,3.2,0.1,,-0.1,-0.8,-0.9,0.4
