In [387]:
import pandas as pd 
from selenium import webdriver
import time
import matplotlib.pyplot as plt
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import train_test_split

# Import ARIMA model.
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.api import VAR

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings("ignore")

import requests
from bs4 import BeautifulSoup

## Functions

#### Function to return game logs from season and amount of pages wanted (2019-20 and 50 pages, for example)

In [42]:
def nba_game_logs1(num_clicks):
    
    # Path to Driver 
    path = '/Users/willhanley/Desktop/chromedriver 3'
    driver = webdriver.Chrome(executable_path=path)
    
    # URL
    url = 'https://www.nba.com/stats/players/boxscores/?Season=2020-21&SeasonType=Regular%20Season'
    driver.get(url)
    
    # location of table, button to click to next page 
    table = driver.find_element_by_class_name('nba-stat-table__overflow').text.split('\n')
    next_page = driver.find_elements_by_xpath('/html/body/main/div/div/div[2]/div/div/nba-stat-table/div[3]/div/div/a[2]')

    # empty lists to be appended to 
    player_names = []
    player_stats = []
    
    # Loop through num_clicks times
    for _ in range(num_clicks):
        
        # Define the table we are scraping from 
        table = driver.find_element_by_class_name('nba-stat-table__overflow').text.split('\n')
        
        # append to player_names and player_stats lists
        for num, info in enumerate(table):
            if num == 0:
                continue
            else:
                if num % 2 == 1:
                    player_names.append(info)
                if num % 2 == 0:
                    player_stats.append([i for i in info.split(' ')])
                
        # Click to next page            
        driver.execute_script("arguments[0].click();", next_page[0])
        
        # Pause 5 seconds 
        time.sleep(4)
 
    # Return dataframe made from player_names and player_stats
    return pd.DataFrame({'Player': player_names,
                  'Team': [i[0] for i in player_stats],
                  'Match-up': [i[2]+i[3] for i in player_stats],
                  'Date': [i[4] for i in player_stats],
                  'W/L': [i[5] for i in player_stats],
                  'Min': [i[6] for i in player_stats],
                  'Pts': [i[7] for i in player_stats],
                  'FGM': [i[8] for i in player_stats],
                  'FGA': [i[9] for i in player_stats],
                  'FG%': [i[10] for i in player_stats],
                  '3PM': [i[11] for i in player_stats],
                  '3PA': [i[12] for i in player_stats],
                  '3P%': [i[13] for i in player_stats],
                  'FTM': [i[14] for i in player_stats],
                  'FTA': [i[15] for i in player_stats],
                  'FT%': [i[16] for i in player_stats],
                  'OREB': [i[17] for i in player_stats],
                  'DREB': [i[18] for i in player_stats],
                  'REB': [i[19] for i in player_stats],
                  'AST': [i[20] for i in player_stats],
                  'STL': [i[21] for i in player_stats],
                  'BLK': [i[22] for i in player_stats],
                  'TOV': [i[23] for i in player_stats],
                  'PF': [i[24] for i in player_stats],
                  '+/-': [i[25] for i in player_stats]})

#### Functions for if players had a double-double or a triple-double

In [43]:
# Function if player has a double double 

def double_double(row):
    val = 0
    if (row['Pts'] >= 10) & (row['AST'] >= 10) | \
    (row['Pts'] >=10) & (row['REB'] >= 10) | (row['AST'] >=10) & (row['REB'] >= 10):
        val += 1
    return val

# Function if player has a triple double 

def triple_double(row):
    val = 0
    if (row['Pts'] >= 10) & (row['AST'] >= 10) & (row['REB'] >=10) | \
    (row['Pts'] >=10) & (row['AST'] >= 10) & (row['STL'] >= 10) | \
    (row['Pts'] >=10) & (row['REB'] >= 10) & (row['BLK'] >= 10) | \
    (row['Pts'] >=10) & (row['AST'] >= 10) & (row['BLK'] >= 10):
        val += 1
    return val

#### Function to return team defense statistics from season input (2019-20, for example)

In [44]:
def nba_defense(season_year):
    
    # Path to Driver 
    path = '/Users/willhanley/Desktop/chromedriver 3'
    driver = webdriver.Chrome(executable_path=path)
    
    # URL
    url = f'https://www.nba.com/stats/teams/defense/?sort=W&dir=-1&Season={season_year}&SeasonType=Regular%20Season'
    driver.get(url)
    
    # location of table, button to click to next page 
    table = driver.find_element_by_class_name('nba-stat-table__overflow').text.split('\n')
    
    # empty lists to append to 
    teams = []
    team_stats = []
    
    # find locations of team names and team stats 
    for num, info in enumerate(table):
        if num > 8:
            if num % 3 == 0:
                teams.append(info)
            if (num -1) % 3 == 0:
                team_stats.append([i for i in info.split(' ')])

    # return dataframe 
    return pd.DataFrame({
        'Season': season_year,
        'Team': teams,
        'GP': [i[0] for i in team_stats],
        'W': [i[1] for i in team_stats],
        'L': [i[2] for i in team_stats],
        'MIN': [i[3] for i in team_stats],
        'DEF_RTG': [i[4] for i in team_stats],
        'DREB': [i[5] for i in team_stats],
        'DREB%': [i[6] for i in team_stats],
        'STL': [i[7] for i in team_stats],
        'BLK': [i[8] for i in team_stats],
        'OPP_PTS_off_TOV': [i[9] for i in team_stats],
        'OPP_PTS_2nd_CHANCE': [i[10] for i in team_stats],
        'OPP_PTS_FB': [i[11] for i in team_stats],
        'OPP_PTS_PAINT': [i[12] for i in team_stats]})

#### Function to find a list of the players that are playing tonight. 

In [399]:
# Function to find a list of the players that are playing tonight

def playing_tonight():    
    url = 'https://www.sportsline.com/nba/expert-projections/simulation/'

    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find_all("table")[0]

    data = [[cell.text for cell in row.find_all(["th","td"])]
                            for row in table.find_all("tr")]
    df = pd.DataFrame(data)

    df.columns = df.iloc[0,:]
    df.drop(index=0,inplace=True)
    return df

players_playing_tonight = playing_tonight()

players_playing_tonight = list(players_playing_tonight['PLAYER  '])

## Update the current season to todays date and read in past data

In [47]:
# Web-scrape current season for updated information

twenty = nba_game_logs1(70)
twenty.head()

Unnamed: 0,Player,Team,Match-up,Date,W/L,Min,Pts,FGM,FGA,FG%,...,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,PF,+/-
0,Myles Turner,IND,@GSW,01/12/2021,W,42,22,7,11,63.6,...,62.5,2,10,12,0,2,5,1,3,-7
1,JaKarr Sampson,IND,@GSW,01/12/2021,W,14,6,3,5,60.0,...,-,1,5,6,0,1,1,1,0,8
2,James Wiseman,GSW,vs.IND,01/12/2021,L,26,8,4,11,36.4,...,0.0,4,5,9,0,0,2,2,5,-9
3,Cassius Stanley,IND,@GSW,01/12/2021,W,4,2,0,0,-,...,100,0,0,0,0,0,0,0,1,-2
4,Kelan Martin,IND,@GSW,01/12/2021,W,6,2,1,3,33.3,...,-,0,3,3,0,0,0,0,1,-1


#### Read in seasons 2016-17 to 2019-20

In [48]:
sixteen = pd.read_csv('data/2016=2017stats.csv')
seventeen = pd.read_csv('data/2017-2018stats.csv')
eighteen = pd.read_csv('data/2018-2019stats.csv')
nineteen = pd.read_csv('data/game_logs2019-2020.csv')
# twenty = pd.read_csv('data/20-21_game_logs.csv')

#### Feature Engineering and Concat all game logs together 

In [137]:
# Date column to datetime

sixteen['Date'] = pd.to_datetime(sixteen['Date'])
seventeen['Date'] = pd.to_datetime(seventeen['Date'])
eighteen['Date'] = pd.to_datetime(eighteen['Date'])
nineteen['Date'] = pd.to_datetime(nineteen['Date'])
twenty['Date'] = pd.to_datetime(twenty['Date'])

# Create Season column

sixteen['Season'] = '2016-17'
seventeen['Season'] = '2017-18'
eighteen['Season'] = '2018-19'
nineteen['Season'] = '2019-20'
twenty['Season'] = '2020-21'

# Concat 1 dataframe
frames = [sixteen,seventeen,eighteen,nineteen,twenty]
game_logs_all = pd.concat(frames)
game_logs_all.sort_index(inplace=True)

# Drop columns with percentages

game_logs_all.drop(columns=['FT%', '3P%', 'FG%'], inplace=True)

# Save numerical columns as ints

game_logs_all[['Min', 'Pts', 'FGM','FGA','3PM', '3PA', 'FTM', 'FTA', 'OREB',
       'DREB','REB','AST','STL','BLK','TOV','PF', '+/-']] = game_logs_all[['Min', 'Pts', 'FGM','FGA','3PM', '3PA', 'FTM', 'FTA', 'OREB',
       'DREB','REB','AST','STL','BLK','TOV','PF', '+/-']].astype(str).astype(int)

# Create Double double column
game_logs_all['dubdub'] = game_logs_all.apply(double_double, axis=1)

# Create Triple double column
game_logs_all['tripdub'] = game_logs_all.apply(triple_double, axis=1)

# Create Draft kings score column (based on draftkings formula)
game_logs_all['DraftKings'] = game_logs_all['Pts'] + .5*game_logs_all['3PM'] + 1.25*game_logs_all['REB'] + 1.5*game_logs_all['AST'] \
+ 2*game_logs_all['STL'] + 2*game_logs_all['BLK'] -.5*game_logs_all['TOV'] + 1.5*game_logs_all['dubdub'] + 3*game_logs_all['tripdub']

# Home or away column: 1 for away, 0 for home
game_logs_all['Home/Away'] = game_logs_all['Match-up'].str.contains('@').astype(int)

# Full names for 'Match-up'
game_logs_all['Match-up'] = game_logs_all['Match-up'].map({'vs.UTA': 'Utah Jazz', '@POR':'Portland Trailblazers',
                        '@GSW':'Golden State Warriors', 'vs.SAS':'San Antonio Spurs',
                        '@IND':'Indiana Pacers','vs.DAL':'Dallas Mavericks','vs.CHA':'Charlotte Hornets',
                        'vs.DET':'Detroit Pistons','@TOR':'Toronto Raptors','@MIL': 'Milwaukee Bucks',
                        '@MEM': 'Memphis Grizzlies','vs.MIN':'Minnesota Timberwolves','vs.DEN': 'Denver Nuggets',
                        '@NOP': 'New Orleans Pelicans','vs.MIA': 'Miami Heat','@LAL':'Los Angeles Lakers',
                        'vs.HOU':'Houston Rockets','@ORL':'Orlando Magic','@PHX':'Phoenix Suns','vs.SAC':'Sacramento Kings',
                        'vs.OKC':'Oklahoma City Thunder','@PHI':'Philadelphia 76ers','@BOS':'Boston Celtics',
                        'vs.BKN':'Brooklyn Nets','vs.WAS':'Washington Wizards','@ATL':'Atlanta Hawks',
                        '@SAC':'Sacramento Kings','vs.BOS':'Boston Celtics','@CHI':'Chicago Bulls',
                        'vs.LAC': 'LA Clippers','vs.CLE':'Cleveland Cavaliers','@DAL':'Dallas Mavericks',
                        'vs.PHX': 'Phoenix Suns','vs.GSW':'Golden State Warriors','@OKC':'Oklahoma City Thunder',
                        '@UTA':'Utah Jazz','vs.LAL':'Los Angeles Lakers','@MIA':'Miami Heat','vs.IND':'Indiana Pacers',
                        '@BKN':'Brooklyn Nets','vs.ORL':'Orlando Magic','@DET':'Detroit Pistons','vs.MEM':'Memphis Grizzlies',
                        '@CLE':'Cleveland Cavaliers','@CHA':'Charlotte Hornets','@DEN':'Denver Nuggets','vs.POR':'Portland Trailblazers',
                        'vs.NOP':'New Orleans Pelicans','@SAS':'San Antonio Spurs','vs.ATL':'Atlanta Hawks','@NYK':'New York Knicks',
                        'vs.MIL':'Milwaukee Bucks','@LAC':'LA Clippers','@HOU':'Houston Rockets','vs.CHI':'Chicago Bulls',
                        'vs.NYK':'New York Knicks','@MIN':'Minnesota Timberwolves','vs.PHI':'Philadelphia 76ers','@WAS':'Washington Wizards',
                        'vs.TOR':'Toronto Raptors'})

# Save as csv
game_logs_all.to_csv('data/game_logs_all.csv')

Unnamed: 0,Player,Team,Match-up,Date,W/L,Min,Pts,FGM,FGA,3PM,...,STL,BLK,TOV,PF,+/-,Season,dubdub,tripdub,DraftKings,Home/Away
0,Mike Miller,DEN,Oklahoma City Thunder,2017-04-12,W,28,0,0,3,0,...,0,0,1,0,-4,2016-17,0,0,15.50,1
0,Ivan Rabb,MEM,Golden State Warriors,2019-04-10,W,29,6,3,10,0,...,0,0,1,3,12,2018-19,0,0,24.50,0
0,Mike Scott,PHI,Houston Rockets,2020-08-14,W,20,10,3,4,1,...,0,0,0,0,37,2019-20,0,0,23.75,1
0,Myles Turner,IND,Golden State Warriors,2021-01-12,W,42,22,7,11,3,...,2,5,1,3,-7,2020-21,1,0,53.50,1
0,Jordan Crawford,NOP,San Antonio Spurs,2018-04-11,W,3,2,1,2,0,...,0,0,0,0,1,2017-18,0,0,4.75,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26098,Stephen Curry,GSW,San Antonio Spurs,2016-10-25,L,34,26,9,18,3,...,0,0,4,1,-9,2016-17,0,0,35.25,0
26098,Kevin Durant,GSW,Oklahoma City Thunder,2018-10-16,W,38,27,9,21,0,...,1,1,3,4,14,2018-19,0,0,48.50,0
26099,Jordan Bell,GSW,Oklahoma City Thunder,2018-10-16,W,7,0,0,0,0,...,0,1,0,1,-6,2018-19,0,0,4.50,0
26099,Davis Bertans,SAS,Golden State Warriors,2016-10-25,W,4,5,2,2,1,...,0,0,0,0,6,2016-17,0,0,6.75,1


#### Read in Team Defense Statistics

In [50]:
sixteen_d = nba_defense('2016-17')
seventeen_d = nba_defense('2017-18')
eighteen_d = nba_defense('2018-19')
nineteen_d = nba_defense('2019-20')
twenty_d = nba_defense('2020-21')

In [51]:
sixteen_d.to_csv('data/16-17_defense.csv', index=False)
seventeen_d.to_csv('data/17-18_defense.csv', index=False)
eighteen_d.to_csv('data/18-19_defense.csv', index=False)
nineteen_d.to_csv('data/19-20_defense.csv', index=False)

#### Concat defenses together 

In [139]:
defenses = [sixteen_d,seventeen_d,eighteen_d,nineteen_d,twenty_d]

defenses = pd.concat(defenses)

defenses['Team'].replace('Portland Trail Blazers','Portland Trailblazers', inplace=True)

Unnamed: 0,Season,Team,GP,W,L,MIN,DEF_RTG,DREB,DREB%,STL,BLK,OPP_PTS_off_TOV,OPP_PTS_2nd_CHANCE,OPP_PTS_FB,OPP_PTS_PAINT
0,2016-17,Golden State Warriors,82,67,15,48.2,103.4,35.0,70.9,9.6,6.8,16.7,14.0,14.4,45.4
1,2016-17,San Antonio Spurs,82,61,21,48.3,102.9,33.9,73.0,8.0,5.9,14.7,12.4,12.6,40.8
2,2016-17,Houston Rockets,82,55,27,48.2,108.4,33.5,71.7,8.2,4.3,17.5,13.8,14.3,49.0
3,2016-17,Boston Celtics,82,53,29,48.2,108.0,32.9,70.5,7.5,4.1,15.2,13.9,12.2,43.2
4,2016-17,Cleveland Cavaliers,82,51,31,48.5,109.7,34.4,72.2,6.6,4.0,16.6,13.7,13.3,44.5
5,2016-17,LA Clippers,82,51,31,48.2,107.5,34.0,73.3,7.5,4.2,15.2,12.7,14.3,42.7
6,2016-17,Toronto Raptors,82,51,31,48.2,107.1,32.6,71.7,8.3,4.9,14.9,13.1,11.5,41.7
7,2016-17,Utah Jazz,82,51,31,48.2,104.7,33.8,74.3,6.7,5.0,15.2,10.7,10.5,41.0
8,2016-17,Washington Wizards,82,49,33,48.4,108.8,32.6,71.6,8.5,4.1,16.4,13.8,12.1,43.9
9,2016-17,Oklahoma City Thunder,82,47,35,48.3,107.1,34.4,74.2,7.9,5.0,16.9,12.2,11.1,47.5


## Merge game-logs and defenses together 

In [140]:
total = pd.merge(game_logs_all,defenses,left_on=['Match-up','Season'], right_on=['Team','Season'],how='inner')

#### Feature engineering on Merged game-logs and defenses playing against

In [141]:
# Drop columns that are not relevant
total = total[['Date', 'Player', 'Team_x', 'Match-up', 'W/L', 'Min', 'Pts', 'FGM',
       'FGA', '3PM', '3PA', 'FTM', 'FTA', 'OREB', 'DREB_x', 'REB', 'AST',
       'STL_x', 'BLK_x', 'TOV', 'PF', '+/-', 'Season', 'dubdub', 'tripdub',
       'DraftKings', 'Home/Away', 'DEF_RTG','DREB%', 'STL_y', 'BLK_y', 'OPP_PTS_off_TOV',
       'OPP_PTS_2nd_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT']]

# Rename columns to more appealing titles
total.rename(columns={'DEF_RTG':'OPP_DEF_RTG',
          'DREB%':'OPP_DREB%',
          'STL_y':'OPP_STL',
          'BLK_y':'OPP_BLK',
                     'DREB_x':'DREB',
                     'STL_x':'STL',
                     'BLK_x':'BLK',
                     'Team_x':'Team',
                     'DraftKings':'DK_SCORE',
                     'dubdub':'Dub/Dub',
                     'tripdub':'Trip/Dub'}, inplace=True)

# Binarize 'W/L' column
total['W/L'] = total['W/L'].map({'W': 1, 'L': 0})

# Reorder columns 
total = total[['Date','Season', 'Player', 'Team', 'Match-up','Home/Away', 'W/L', 'Min', 'Pts', 'FGM', 'FGA',
       '3PM', '3PA', 'FTM', 'FTA', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', '+/-', 'Dub/Dub', 'Trip/Dub', 'OPP_DEF_RTG', 'OPP_DREB%', 'OPP_STL', 'OPP_BLK',
       'OPP_PTS_off_TOV', 'OPP_PTS_2nd_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT','DK_SCORE']]

# Save to csv for modeling
total.to_csv('data/g_logs_defense.csv',index=False)

(104165, 35)

## VAR Modeling

In [291]:
data = pd.read_csv('data/g_logs_defense.csv')

In [408]:
mega = pd.DataFrame(columns=['Player','DK_Projection','Pts_Projection','Reb_Projection','Ast_Projection'])

def projection_model(player_list): 
    
    empty_list = []
    for player in player_list:
        if ((data[data['Player']== player]['Min'].mean()) > 20)&((data[data['Player'] == player]['Date'].count()) > 20):
            df = data[data['Player'] == player]
            df.index = pd.DatetimeIndex(df['Date'])
            df.drop(columns='Date',inplace=True)
            df.sort_index(inplace=True)

            player = df['Player'][0]

            last_game = df[['DK_SCORE', 'Pts','AST','REB']].iloc[[-1]]

            last_dk = 0
            last_points = 0
            last_reb = 0
            last_ast = 0 


            # Confirm Stationarity of the data 
            def interpret_dftest(dftest):
                dfoutput = pd.Series(dftest[0:3], index=['Test Statistic','p-value', 'Lag Used'])
                return dfoutput


            # Check if draftkings is stationary
            dk_test = interpret_dftest(adfuller(df['DK_SCORE']))

            if dk_test[1] > .01:
                df['DK_SCORE'] = df['DK_SCORE'].diff(1)
                last_dk = int(last_game['DK_SCORE'])


            # Check if points is stationary
            pts_test = interpret_dftest(adfuller(df['Pts']))

            if pts_test[1] > .01:
                df['Pts'] = df['Pts'].diff(1)
                last_points = int(last_game['Pts'])


            # Check if rebounds is stationary
            reb_test = interpret_dftest(adfuller(df['REB']))

            if reb_test[1] > .01:
                df['REB'] = df['REB'].diff(1)
                last_reb = int(last_game['REB'])


            # Check if assists is stationary 
            ast_test = interpret_dftest(adfuller(df['AST']))

            if ast_test[1] > .01:
                df['AST'] = df['AST'].diff(1)
                last_ast = int(last_game['AST'])



            df = df[['DK_SCORE','Pts','REB','AST']]

            df.dropna(inplace=True)

            train, test = train_test_split(df,shuffle=False, test_size = .25)

            model = VAR(train)

            ts_model = model.fit()

            forecast = ts_model.forecast(train.values, len(test))

            next_game = ts_model.forecast(train.values, 1)
            
    
            
            empty_list.append([player,last_dk + round(next_game[0][0],2),last_points + round(next_game[0][1],2),
                             last_reb + round(next_game[0][2],2),last_ast + round(next_game[0][3],2)])
               

    return pd.DataFrame(empty_list, columns=['Player','DK_proj','Pts_proj','Reb_proj','Ast_proj'])

projections = projection_model(players_playing_tonight)

In [409]:
projections.sort_values(by='DK_proj',ascending=False).head(20)

Unnamed: 0,Player,DK_proj,Pts_proj,Reb_proj,Ast_proj
1,Giannis Antetokounmpo,54.6,26.41,6.42,5.54
2,LeBron James,54.34,27.15,8.62,8.31
5,Anthony Davis,53.21,27.58,10.98,2.44
7,Brandon Ingram,53.15,25.55,4.62,10.01
6,Karl-Anthony Towns,48.48,28.48,12.58,2.25
0,Luka Doncic,47.99,21.54,9.93,6.78
4,Kevin Durant,46.87,26.33,6.29,5.72
18,Blake Griffin,46.11,26.06,5.03,5.45
3,Damian Lillard,43.91,24.24,4.71,7.18
9,Kawhi Leonard,43.28,23.91,6.66,2.36
