In [104]:
import pandas as pd 
from selenium import webdriver
import time
import matplotlib.pyplot as plt

## Functions

#### Function to return game logs from season and amount of pages wanted (2019-20 and 50 pages, for example)

In [105]:
def nba_game_logs1(num_clicks):
    
    # Path to Driver 
    path = '/Users/willhanley/Desktop/chromedriver 3'
    driver = webdriver.Chrome(executable_path=path)
    
    # URL
    url = 'https://www.nba.com/stats/players/boxscores/?Season=2020-21&SeasonType=Regular%20Season'
    driver.get(url)
    
    # location of table, button to click to next page 
    table = driver.find_element_by_class_name('nba-stat-table__overflow').text.split('\n')
    next_page = driver.find_elements_by_xpath('/html/body/main/div/div/div[2]/div/div/nba-stat-table/div[3]/div/div/a[2]')

    # empty lists to be appended to 
    player_names = []
    player_stats = []
    
    # Loop through num_clicks times
    for _ in range(num_clicks):
        
        # Define the table we are scraping from 
        table = driver.find_element_by_class_name('nba-stat-table__overflow').text.split('\n')
        
        # append to player_names and player_stats lists
        for num, info in enumerate(table):
            if num == 0:
                continue
            else:
                if num % 2 == 1:
                    player_names.append(info)
                if num % 2 == 0:
                    player_stats.append([i for i in info.split(' ')])
                
        # Click to next page            
        driver.execute_script("arguments[0].click();", next_page[0])
        
        # Pause 5 seconds 
        time.sleep(4)
 
    # Return dataframe made from player_names and player_stats
    return pd.DataFrame({'Player': player_names,
                  'Team': [i[0] for i in player_stats],
                  'Match-up': [i[2]+i[3] for i in player_stats],
                  'Date': [i[4] for i in player_stats],
                  'W/L': [i[5] for i in player_stats],
                  'Min': [i[6] for i in player_stats],
                  'Pts': [i[7] for i in player_stats],
                  'FGM': [i[8] for i in player_stats],
                  'FGA': [i[9] for i in player_stats],
                  'FG%': [i[10] for i in player_stats],
                  '3PM': [i[11] for i in player_stats],
                  '3PA': [i[12] for i in player_stats],
                  '3P%': [i[13] for i in player_stats],
                  'FTM': [i[14] for i in player_stats],
                  'FTA': [i[15] for i in player_stats],
                  'FT%': [i[16] for i in player_stats],
                  'OREB': [i[17] for i in player_stats],
                  'DREB': [i[18] for i in player_stats],
                  'REB': [i[19] for i in player_stats],
                  'AST': [i[20] for i in player_stats],
                  'STL': [i[21] for i in player_stats],
                  'BLK': [i[22] for i in player_stats],
                  'TOV': [i[23] for i in player_stats],
                  'PF': [i[24] for i in player_stats],
                  '+/-': [i[25] for i in player_stats]})

#### Functions for if players had a double-double or a triple-double

In [106]:
# Function if player has a double double 

def double_double(row):
    val = 0
    if (row['Pts'] >= 10) & (row['AST'] >= 10) | \
    (row['Pts'] >=10) & (row['REB'] >= 10) | (row['AST'] >=10) & (row['REB'] >= 10):
        val += 1
    return val

# Function if player has a triple double 

def triple_double(row):
    val = 0
    if (row['Pts'] >= 10) & (row['AST'] >= 10) & (row['REB'] >=10) | \
    (row['Pts'] >=10) & (row['AST'] >= 10) & (row['STL'] >= 10) | \
    (row['Pts'] >=10) & (row['REB'] >= 10) & (row['BLK'] >= 10) | \
    (row['Pts'] >=10) & (row['AST'] >= 10) & (row['BLK'] >= 10):
        val += 1
    return val

#### Function to return team defense statistics from season input (2019-20, for example)

In [107]:
def nba_defense(season_year):
    
    # Path to Driver 
    path = '/Users/willhanley/Desktop/chromedriver 3'
    driver = webdriver.Chrome(executable_path=path)
    
    # URL
    url = f'https://www.nba.com/stats/teams/defense/?sort=W&dir=-1&Season={season_year}&SeasonType=Regular%20Season'
    driver.get(url)
    
    # location of table, button to click to next page 
    table = driver.find_element_by_class_name('nba-stat-table__overflow').text.split('\n')
    
    # empty lists to append to 
    teams = []
    team_stats = []
    
    # find locations of team names and team stats 
    for num, info in enumerate(table):
        if num > 8:
            if num % 3 == 0:
                teams.append(info)
            if (num -1) % 3 == 0:
                team_stats.append([i for i in info.split(' ')])

    # return dataframe 
    return pd.DataFrame({
        'Season': season_year,
        'Team': teams,
        'GP': [i[0] for i in team_stats],
        'W': [i[1] for i in team_stats],
        'L': [i[2] for i in team_stats],
        'MIN': [i[3] for i in team_stats],
        'DEF_RTG': [i[4] for i in team_stats],
        'DREB': [i[5] for i in team_stats],
        'DREB%': [i[6] for i in team_stats],
        'STL': [i[7] for i in team_stats],
        'BLK': [i[8] for i in team_stats],
        'OPP_PTS_off_TOV': [i[9] for i in team_stats],
        'OPP_PTS_2nd_CHANCE': [i[10] for i in team_stats],
        'OPP_PTS_FB': [i[11] for i in team_stats],
        'OPP_PTS_PAINT': [i[12] for i in team_stats]})

#### Read in web scrapes from seasons 2016-2017 to Current Season (2020-2021)

In [108]:
sixteen = pd.read_csv('data/2016=2017stats.csv')
seventeen = pd.read_csv('data/2017-2018stats.csv')
eighteen = pd.read_csv('data/2018-2019stats.csv')
nineteen = pd.read_csv('data/game_logs2019-2020.csv')
twenty = pd.read_csv('data/20-21_game_logs.csv')

#### Feature Engineering and Concat all game logs together 

In [109]:
# Date column to datetime

sixteen['Date'] = pd.to_datetime(sixteen['Date'])
seventeen['Date'] = pd.to_datetime(seventeen['Date'])
eighteen['Date'] = pd.to_datetime(eighteen['Date'])
nineteen['Date'] = pd.to_datetime(nineteen['Date'])
twenty['Date'] = pd.to_datetime(twenty['Date'])

# Create Season column

sixteen['Season'] = '2016-17'
seventeen['Season'] = '2017-18'
eighteen['Season'] = '2018-19'
nineteen['Season'] = '2019-20'
twenty['Season'] = '2020-21'

# Concat 1 dataframe
frames = [sixteen,seventeen,eighteen,nineteen,twenty]
game_logs_all = pd.concat(frames)
game_logs_all.sort_index(inplace=True)

# Drop columns with percentages

game_logs_all.drop(columns=['FT%', '3P%', 'FG%'], inplace=True)

# Save numerical columns as ints

game_logs_all[['Min', 'Pts', 'FGM','FGA','3PM', '3PA', 'FTM', 'FTA', 'OREB',
       'DREB','REB','AST','STL','BLK','TOV','PF', '+/-']] = game_logs_all[['Min', 'Pts', 'FGM','FGA','3PM', '3PA', 'FTM', 'FTA', 'OREB',
       'DREB','REB','AST','STL','BLK','TOV','PF', '+/-']].astype(str).astype(int)

# Create Double double column
game_logs_all['dubdub'] = game_logs_all.apply(double_double, axis=1)

# Create Triple double column
game_logs_all['tripdub'] = game_logs_all.apply(triple_double, axis=1)

# Create Draft kings score column (based on draftkings formula)
game_logs_all['DraftKings'] = game_logs_all['Pts'] + .5*game_logs_all['3PM'] + 1.25*game_logs_all['REB'] + 1.5*game_logs_all['AST'] \
+ 2*game_logs_all['STL'] + 2*game_logs_all['BLK'] -.5*game_logs_all['TOV'] + 1.5*game_logs_all['dubdub'] + 3*game_logs_all['tripdub']

# Home or away column: 1 for away, 0 for home
game_logs_all['Home/Away'] = game_logs_all['Match-up'].str.contains('@').astype(int)

# Full names for 'Match-up'
game_logs_all['Match-up'] = game_logs_all['Match-up'].map({'vs.UTA': 'Utah Jazz', '@POR':'Portland Trailblazers',
                        '@GSW':'Golden State Warriors', 'vs.SAS':'San Antonio Spurs',
                        '@IND':'Indiana Pacers','vs.DAL':'Dallas Mavericks','vs.CHA':'Charlotte Hornets',
                        'vs.DET':'Detroit Pistons','@TOR':'Toronto Raptors','@MIL': 'Milwaukee Bucks',
                        '@MEM': 'Memphis Grizzlies','vs.MIN':'Minnesota Timberwolves','vs.DEN': 'Denver Nuggets',
                        '@NOP': 'New Orleans Pelicans','vs.MIA': 'Miami Heat','@LAL':'Los Angeles Lakers',
                        'vs.HOU':'Houston Rockets','@ORL':'Orlando Magic','@PHX':'Phoenix Suns','vs.SAC':'Sacramento Kings',
                        'vs.OKC':'Oklahoma City Thunder','@PHI':'Philadelphia 76ers','@BOS':'Boston Celtics',
                        'vs.BKN':'Brooklyn Nets','vs.WAS':'Washington Wizards','@ATL':'Atlanta Hawks',
                        '@SAC':'Sacramento Kings','vs.BOS':'Boston Celtics','@CHI':'Chicago Bulls',
                        'va.LAC': 'Los Angeles Clippers','vs.CLE':'Cleveland Cavaliers','@DAL':'Dallas Mavericks',
                        'vs.PHX': 'Phoenix Suns','vs.GSW':'Golden State Warriors','@OKC':'Oklahoma City Thunder',
                        '@UTA':'Utah Jazz','vs.LAL':'Los Angeles Lakers','@MIA':'Miami Heat','vs.IND':'Indiana Pacers',
                        '@BKN':'Brooklyn Nets','vs.ORL':'Orlando Magic','@DET':'Detroit Pistons','vs.MEM':'Memphis Grizzlies',
                        '@CLE':'Cleveland Cavaliers','@CHA':'Charlotte Hornets','@DEN':'Denver Nuggets','vs.POR':'Portland Trailblazers',
                        'vs.NOP':'New Orleans Pelicans','@SAS':'San Antonio Spurs','vs.ATL':'Atlanta Hawks','@NYK':'New York Knicks',
                        'vs.MIL':'Milwaukee Bucks','@LAC':'Los Angeles Clippers','@HOU':'Houston Rockets','vs.CHI':'Chicago Bulls',
                        'vs.NYK':'New York Knicks','@MIN':'Minnesota Timeberwolves','vs.PHI':'Philadelphia 76ers','@WAS':'Washington Wizards',
                        'vs.TOR':'Toronto Raptors'})

# Save as csv
game_logs_all.to_csv('data/game_logs_all.csv')

game_logs_all

Unnamed: 0,Player,Team,Match-up,Date,W/L,Min,Pts,FGM,FGA,3PM,...,STL,BLK,TOV,PF,+/-,Season,dubdub,tripdub,DraftKings,Home/Away
0,Mike Miller,DEN,Oklahoma City Thunder,2017-04-12,W,28,0,0,3,0,...,0,0,1,0,-4,2016-17,0,0,15.50,1
0,Ivan Rabb,MEM,Golden State Warriors,2019-04-10,W,29,6,3,10,0,...,0,0,1,3,12,2018-19,0,0,24.50,0
0,Mike Scott,PHI,Houston Rockets,2020-08-14,W,20,10,3,4,1,...,0,0,0,0,37,2019-20,0,0,23.75,1
0,Stephen Curry,GSW,Toronto Raptors,2021-01-10,W,38,11,2,16,1,...,0,0,5,3,7,2020-21,0,0,29.25,0
0,Jordan Crawford,NOP,San Antonio Spurs,2018-04-11,W,3,2,1,2,0,...,0,0,0,0,1,2017-18,0,0,4.75,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26098,Kevin Durant,GSW,Oklahoma City Thunder,2018-10-16,W,38,27,9,21,0,...,1,1,3,4,14,2018-19,0,0,48.50,0
26098,Jordan Bell,GSW,Houston Rockets,2017-10-17,L,12,8,4,5,0,...,0,0,1,4,-2,2017-18,0,0,10.25,0
26099,Davis Bertans,SAS,Golden State Warriors,2016-10-25,W,4,5,2,2,1,...,0,0,0,0,6,2016-17,0,0,6.75,1
26099,David West,GSW,Houston Rockets,2017-10-17,L,9,4,2,3,0,...,0,1,1,0,0,2017-18,0,0,6.75,0


#### Read in Team Defense Statistics

In [33]:
sixteen_d = nba_defense('2016-17')
seventeen_d = nba_defense('2017-18')
eighteen_d = nba_defense('2018-19')
nineteen_d = nba_defense('2019-20')
twenty_d = nba_defense('2020-21')

#### Concat defenses together 

In [125]:
defenses = [sixteen_d,seventeen_d,eighteen_d,nineteen_d,twenty_d]

defenses = pd.concat(defenses)

defenses['Team'].map({'Portland Trail Blazers':'Portland Trailblazers'})

defenses.shape

(150, 15)

#### Merge game-logs and defenses together 

In [142]:
total = pd.merge(game_logs_all,defenses,left_on=['Match-up','Season'], right_on=['Team','Season'],how='left')

#### Feature engineering on Merged game-logs and defenses playing against

In [143]:
# Drop columns that are not relevant
total = total[['Date', 'Player', 'Team_x', 'Match-up', 'W/L', 'Min', 'Pts', 'FGM',
       'FGA', '3PM', '3PA', 'FTM', 'FTA', 'OREB', 'DREB_x', 'REB', 'AST',
       'STL_x', 'BLK_x', 'TOV', 'PF', '+/-', 'Season', 'dubdub', 'tripdub',
       'DraftKings', 'Home/Away', 'DEF_RTG','DREB%', 'STL_y', 'BLK_y', 'OPP_PTS_off_TOV',
       'OPP_PTS_2nd_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT']]

# Rename columns to more appealing titles
total.rename(columns={'DEF_RTG':'OPP_DEF_RTG',
          'DREB%':'OPP_DREB%',
          'STL_y':'OPP_STL',
          'BLK_y':'OPP_BLK',
                     'DREB_x':'DREB',
                     'STL_x':'STL',
                     'BLK_x':'BLK',
                     'Team_x':'Team',
                     'DraftKings':'DK_SCORE',
                     'dubdub':'Dub/Dub',
                     'tripdub':'Trip/Dub'}, inplace=True)

# Binarize 'W/L' column
total['W/L'] = total['W/L'].map({'W': 1, 'L': 0})

# Reorder columns 
total = total[['Date','Season', 'Player', 'Team', 'Match-up','Home/Away', 'W/L', 'Min', 'Pts', 'FGM', 'FGA',
       '3PM', '3PA', 'FTM', 'FTA', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', '+/-', 'Dub/Dub', 'Trip/Dub', 'OPP_DEF_RTG', 'OPP_DREB%', 'OPP_STL', 'OPP_BLK',
       'OPP_PTS_off_TOV', 'OPP_PTS_2nd_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT','DK_SCORE']]

# Save to csv for modeling
total.to_csv('data/g_logs_defense.csv',index=False)

total.head()

Unnamed: 0,Date,Season,Player,Team,Match-up,Home/Away,W/L,Min,Pts,FGM,...,Trip/Dub,OPP_DEF_RTG,OPP_DREB%,OPP_STL,OPP_BLK,OPP_PTS_off_TOV,OPP_PTS_2nd_CHANCE,OPP_PTS_FB,OPP_PTS_PAINT,DK_SCORE
0,2017-04-12,2016-17,Mike Miller,DEN,Oklahoma City Thunder,1,1,28,0,0,...,0,107.1,74.2,7.9,5.0,16.9,12.2,11.1,47.5,15.5
1,2019-04-10,2018-19,Ivan Rabb,MEM,Golden State Warriors,0,1,29,6,3,...,0,108.6,72.7,7.6,6.4,17.4,13.8,15.1,47.1,24.5
2,2020-08-14,2019-20,Mike Scott,PHI,Houston Rockets,1,1,20,10,3,...,0,109.8,71.3,8.7,5.2,17.8,14.1,15.1,51.5,23.75
3,2021-01-10,2020-21,Stephen Curry,GSW,Toronto Raptors,0,1,38,11,2,...,0,110.5,71.0,8.7,6.1,17.4,14.1,12.8,40.4,29.25
4,2018-04-11,2017-18,Jordan Crawford,NOP,San Antonio Spurs,0,1,3,2,1,...,0,104.1,74.1,7.7,5.6,15.0,12.2,12.5,44.1,4.75
