# 3 Pointers Made against game_details.csv

### Import packages

In [25]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr
import itertools

pd.set_option("display.max_columns", None)

### Set working directory

In [26]:
# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

# Change working directory
os.chdir('/Users/tyler/OneDrive/Documents/Python/NBA')

# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA
Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA


## Exploratory Data Analysis

### Import data

In [27]:
df = pd.read_csv('backend/data/details/game_details.csv').drop(['Unnamed: 0'], axis=1)
shooting_df = pd.read_csv('backend/data/totals/game_totals.csv').drop(['Unnamed: 0'], axis=1)
shooting_df = shooting_df[['date', 'visitor', 'home', 'team', '3p']]

### Basic exploration

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 504319 entries, 0 to 504318
Data columns (total 26 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   date        504319 non-null  object 
 1   visitor     504319 non-null  object 
 2   home        504319 non-null  object 
 3   team        504319 non-null  int64  
 4   starter     504277 non-null  float64
 5   player      503917 non-null  object 
 6   mp          503917 non-null  object 
 7   fg          503917 non-null  float64
 8   fga         503917 non-null  float64
 9   fg_perc     482544 non-null  float64
 10  3p          503917 non-null  float64
 11  3pa         503917 non-null  float64
 12  3p_perc     353182 non-null  float64
 13  ft          503917 non-null  float64
 14  fta         503917 non-null  float64
 15  ft_perc     327033 non-null  float64
 16  orb         503917 non-null  float64
 17  drb         503917 non-null  float64
 18  trb         503917 non-null  float64
 19  as

In [29]:
df.sample(5)

Unnamed: 0,date,visitor,home,team,starter,player,mp,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus
89816,"Thu, Apr 2, 2009",Milwaukee Bucks,Philadelphia 76ers,1,0.0,Kareem Rush,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
119640,"Mon, Mar 22, 2010",Memphis Grizzlies,Sacramento Kings,0,1.0,Mike Conley,37:38,5.0,13.0,0.385,2.0,4.0,0.5,0.0,0.0,,0.0,3.0,3.0,7.0,2.0,0.0,1.0,1.0,12.0,31.0
433304,"Fri, Jan 10, 2020",Orlando Magic,Phoenix Suns,0,1.0,Wes Iwundu,27:58,1.0,7.0,0.143,0.0,1.0,0.0,2.0,2.0,1.0,2.0,1.0,3.0,1.0,0.0,1.0,1.0,5.0,4.0,-4.0
95909,"Wed, Nov 4, 2009",Miami Heat,Washington Wizards,0,1.0,Jermaine O'Neal,32:52,2.0,7.0,0.286,0.0,0.0,,0.0,0.0,,2.0,5.0,7.0,2.0,0.0,1.0,2.0,3.0,4.0,17.0
150540,"Sat, Mar 19, 2011",Cleveland Cavaliers,Los Angeles Clippers,0,0.0,Daniel Gibson,27:34,4.0,12.0,0.333,2.0,9.0,0.222,0.0,0.0,,0.0,6.0,6.0,3.0,2.0,0.0,1.0,2.0,10.0,-10.0


In [30]:
def convert_mp(mp):
    if mp == '0' or mp == 0:
        return 0
    else:
        mins = int(mp.split(':')[0])
        secs = int(mp.split(':')[1]) / 60
        return mins + secs

In [31]:
# Fill NaN
df = df.fillna(0)

# Convert 'date' column to Date object
df['date'] = pd.to_datetime(df['date'])

# Convert 'team' column to Team Name
df['team'] = np.where(df['team'], df['home'], df['visitor'])

# Convert 'minutes played' to float
df['mp'] = df['mp'].apply(lambda x: convert_mp(x))

# Set stats
stats = ['fg', 'fga', '3p', '3pa', 'ft', 'fta', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 
         'tov', 'pf', 'pts', 'plus_minus', 'mp']

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 504319 entries, 0 to 504318
Data columns (total 26 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   date        504319 non-null  datetime64[ns]
 1   visitor     504319 non-null  object        
 2   home        504319 non-null  object        
 3   team        504319 non-null  object        
 4   starter     504319 non-null  float64       
 5   player      504319 non-null  object        
 6   mp          504319 non-null  float64       
 7   fg          504319 non-null  float64       
 8   fga         504319 non-null  float64       
 9   fg_perc     504319 non-null  float64       
 10  3p          504319 non-null  float64       
 11  3pa         504319 non-null  float64       
 12  3p_perc     504319 non-null  float64       
 13  ft          504319 non-null  float64       
 14  fta         504319 non-null  float64       
 15  ft_perc     504319 non-null  float64       
 16  or

In [32]:
# Team total stats
teams_df = df.groupby(['date', 'visitor', 'home', 'team']).sum().reset_index()

In [33]:
# Rename target variable
shooting_df = shooting_df.rename({'3p': 'target'}, axis=1)

# Convert 'date' column to Date object
shooting_df['date'] = pd.to_datetime(shooting_df['date'])

# Convert 'team' column to Team Name
shooting_df['team'] = np.where(shooting_df['team'], shooting_df['home'], shooting_df['visitor'])

In [34]:
# Starters total stats
starters_df = df[df['starter'] == 1].groupby(['date', 'visitor', 'home', 'team']).sum()
starters_df = starters_df[stats]
starters_df = starters_df.reset_index()

# Merge dataframes to have target variable
starters_df = pd.merge(starters_df, shooting_df, 
                       left_on=['date', 'visitor', 'home', 'team'], right_on=['date', 'visitor', 'home', 'team'],
                       how='left')

In [35]:
# Bench total stats
bench_df = df[df['starter'] == 0].groupby(['date', 'visitor', 'home', 'team']).sum()
bench_df = bench_df[stats]
bench_df = bench_df.reset_index()

# Merge dataframes to have target variable
bench_df = pd.merge(bench_df, shooting_df, 
                    left_on=['date', 'visitor', 'home', 'team'], right_on=['date', 'visitor', 'home', 'team'],
                    how='left')

# Dataframe of team's last 15 performances

In [36]:
# Return ten lastest dates team played
def last_15_date(team, date):
    schedule = teams_df[teams_df['team'] == team].sort_values(by='date').reset_index()
    date_index = schedule[schedule['date'] == date].index[0]
    if date_index - 15 < 0:
        return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    else:
        date_1, date_2 = schedule.iloc[date_index - 1]['date'], schedule.iloc[date_index - 2]['date']
        date_3, date_4 = schedule.iloc[date_index - 3]['date'], schedule.iloc[date_index - 4]['date']
        date_5, date_6 = schedule.iloc[date_index - 5]['date'], schedule.iloc[date_index - 6]['date']
        date_7, date_8 = schedule.iloc[date_index - 7]['date'], schedule.iloc[date_index - 8]['date']
        date_9, date_10 = schedule.iloc[date_index - 9]['date'], schedule.iloc[date_index - 10]['date']
        date_11, date_12 = schedule.iloc[date_index - 11]['date'], schedule.iloc[date_index - 12]['date']
        date_13, date_14 = schedule.iloc[date_index - 13]['date'], schedule.iloc[date_index - 14]['date']
        date_15 = schedule.iloc[date_index - 15]['date']
        return date_1, date_2, date_3, date_4, date_5, date_6, date_7, date_8, date_9, date_10, date_11, date_12, date_13, date_14, date_15

teams_df['dates'] = teams_df.apply(lambda x: last_15_date(x.team, x.date), axis=1)
teams_df['date_1'], teams_df['date_2'] = teams_df['dates'].apply(lambda x: x[0]), teams_df['dates'].apply(lambda x: x[1])
teams_df['date_3'], teams_df['date_4'] = teams_df['dates'].apply(lambda x: x[2]), teams_df['dates'].apply(lambda x: x[3])
teams_df['date_5'], teams_df['date_6'] = teams_df['dates'].apply(lambda x: x[4]), teams_df['dates'].apply(lambda x: x[5])
teams_df['date_7'], teams_df['date_8'] = teams_df['dates'].apply(lambda x: x[6]), teams_df['dates'].apply(lambda x: x[7])
teams_df['date_9'], teams_df['date_10'] = teams_df['dates'].apply(lambda x: x[8]), teams_df['dates'].apply(lambda x: x[9])
teams_df['date_11'], teams_df['date_12'] = teams_df['dates'].apply(lambda x: x[10]), teams_df['dates'].apply(lambda x: x[11])
teams_df['date_13'], teams_df['date_14'] = teams_df['dates'].apply(lambda x: x[12]), teams_df['dates'].apply(lambda x: x[13])
teams_df['date_15'] = teams_df['dates'].apply(lambda x: x[14])

In [37]:
# Merge in opponents (see team defensive stats)
starters_df = pd.merge(
    starters_df, 
    starters_df, 
    left_on=['date', 'visitor', 'home'], 
    right_on=['date', 'visitor', 'home'],
    suffixes=('', '_opp'),
    how='left')

starters_df = starters_df[starters_df['team'] != starters_df['team_opp']]

bench_df = pd.merge(
    bench_df, 
    bench_df, 
    left_on=['date', 'visitor', 'home'], 
    right_on=['date', 'visitor', 'home'],
    suffixes=('', '_opp'),
    how='left')

bench_df = bench_df[bench_df['team'] != bench_df['team_opp']]

In [38]:
# Keep date columns in teams
cols = [col for col in teams_df.columns
        if ('date_' in col) or \
        (col in ['date', 'visitor', 'home', 'team'])]
teams_df = teams_df[cols]

# Merge dates with starters
starters_df = pd.merge(starters_df, teams_df, 
                       left_on=['date', 'visitor', 'home', 'team'], 
                       right_on=['date', 'visitor', 'home', 'team'],
                       how='left')

# Merge dates with bench
bench_df = pd.merge(bench_df, teams_df, 
                    left_on=['date', 'visitor', 'home', 'team'], 
                    right_on=['date', 'visitor', 'home', 'team'],
                    how='left')

In [39]:
starters_df.head()

Unnamed: 0,date,visitor,home,team,fg,fga,3p,3pa,ft,fta,orb,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,mp,target,team_opp,fg_opp,fga_opp,3p_opp,3pa_opp,ft_opp,fta_opp,orb_opp,drb_opp,trb_opp,ast_opp,stl_opp,blk_opp,tov_opp,pf_opp,pts_opp,plus_minus_opp,mp_opp,target_opp,date_1,date_2,date_3,date_4,date_5,date_6,date_7,date_8,date_9,date_10,date_11,date_12,date_13,date_14,date_15
0,2006-10-31,Chicago Bulls,Miami Heat,Chicago Bulls,18.0,43.0,3.0,7.0,14.0,18.0,7.0,14.0,21.0,8.0,5.0,2.0,7.0,12.0,53.0,66.0,128.55,7.0,Miami Heat,20.0,48.0,3.0,13.0,11.0,19.0,4.0,18.0,22.0,9.0,4.0,3.0,14.0,14.0,54.0,-95.0,156.583333,3.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
1,2006-10-31,Chicago Bulls,Miami Heat,Miami Heat,20.0,48.0,3.0,13.0,11.0,19.0,4.0,18.0,22.0,9.0,4.0,3.0,14.0,14.0,54.0,-95.0,156.583333,3.0,Chicago Bulls,18.0,43.0,3.0,7.0,14.0,18.0,7.0,14.0,21.0,8.0,5.0,2.0,7.0,12.0,53.0,66.0,128.55,7.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
2,2006-10-31,Phoenix Suns,Los Angeles Lakers,Los Angeles Lakers,26.0,51.0,5.0,10.0,15.0,20.0,7.0,26.0,33.0,24.0,7.0,0.0,13.0,10.0,72.0,-5.0,157.6,6.0,Phoenix Suns,24.0,49.0,6.0,18.0,5.0,5.0,4.0,20.0,24.0,22.0,1.0,4.0,11.0,13.0,59.0,-11.0,159.483333,13.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
3,2006-10-31,Phoenix Suns,Los Angeles Lakers,Phoenix Suns,24.0,49.0,6.0,18.0,5.0,5.0,4.0,20.0,24.0,22.0,1.0,4.0,11.0,13.0,59.0,-11.0,159.483333,13.0,Los Angeles Lakers,26.0,51.0,5.0,10.0,15.0,20.0,7.0,26.0,33.0,24.0,7.0,0.0,13.0,10.0,72.0,-5.0,157.6,6.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
4,2006-11-01,Atlanta Hawks,Philadelphia 76ers,Atlanta Hawks,15.0,49.0,3.0,12.0,15.0,18.0,8.0,19.0,27.0,7.0,7.0,4.0,15.0,16.0,48.0,-83.0,164.683333,4.0,Philadelphia 76ers,24.0,56.0,3.0,5.0,21.0,23.0,13.0,21.0,34.0,17.0,7.0,5.0,14.0,12.0,72.0,78.0,168.8,3.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT


In [40]:
# Calculate z-score
def z_score(value, mean, std):
    return (value - mean) / std

# Starters Analysis

In [41]:
# Set stats
stats = ['fg', 'fga', '3p', '3pa', 'ft', 'fta', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 
         'tov', 'pf', 'pts', 'plus_minus', 'mp']
opp_stats = [stat + '_opp' for stat in stats]
stats = stats + opp_stats

# X and y column names to merge on
x_cols = ['date', 'team'] + stats

last_15_games = starters_df.copy()
X = starters_df[x_cols]

# Dataframe of target (3pt made by each team) and of variables (last 5 games stats for each team)
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
for date in dates:
    last_15_games = pd.merge(last_15_games, X, left_on=['date' + date, 'team'], right_on=['date', 'team'], how='left', suffixes=('', date))

### Last 15 Performances (Unweighted)

In [42]:
# Set stats
stats = ['fg', 'fga', '3p', '3pa', 'ft', 'fta', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 
         'tov', 'pf', 'pts', 'plus_minus', 'mp']
opp_stats = [stat + '_opp' for stat in stats]
stats = stats + opp_stats

dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']

last_15 = last_15_games.copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_15[stat] = 0
    for date in dates:
        last_15[stat] = last_15[stat] + last_15[stat + date]
    
    last_15[stat] = last_15[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_15[stat + '_std'] = 0
    for date in dates:
        last_15[stat + '_std'] = last_15[stat + '_std'] + ((last_15[stat + date] - last_15[stat]) ** 2)
    
    last_15[stat + '_std'] = last_15[stat + '_std'] / len(dates)
    last_15[stat + '_std'] = last_15[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_15[stat + '_trend'] = 0
    for date in dates[:10]:
        last_15[stat + '_trend'] = \
                            last_15[stat + '_trend'] + \
                            z_score(last_15[stat + date], last_15[stat], last_15[stat + '_std']).fillna(0)
    
    last_15[stat + '_trend'] = last_15[stat + '_trend'] / len(dates[:10])

last_15 = last_15.groupby(['date', 'visitor', 'home']).sum()

# Standard deviation and trending cols
std_cols = [stat + '_std' for stat in stats]
trend_cols = [stat + '_trend' for stat in stats]

# Keep columns
last_15 = last_15[['target'] + stats + std_cols + trend_cols].dropna(axis=0)

# Calculate percentages
last_15['fg_perc'] = last_15['fg'] / last_15['fga']
last_15['fg_perc_opp'] = last_15['fg_opp'] / last_15['fga_opp']

last_15['3p_perc'] = last_15['3p'] / last_15['3pa']
last_15['3p_perc_opp'] = last_15['3p_opp'] / last_15['3pa_opp']

last_15['ft_perc'] = last_15['ft'] / last_15['fta']
last_15['ft_perc_opp'] = last_15['ft_opp'] / last_15['fta_opp']

# Calculate advanced stats
last_15['ts_perc'] = last_15['pts'] / (2 * (last_15['fga'] + .44 * last_15['fta']))
last_15['ts_perc_opp'] = last_15['pts_opp'] / (2 * (last_15['fga_opp'] + .44 * last_15['fta_opp']))

last_15['efg_perc'] = (last_15['fg'] + (.5 * last_15['3p'])) / last_15['fga']
last_15['efg_perc_opp'] = (last_15['fg_opp'] + (.5 * last_15['3p_opp'])) / last_15['fga_opp']

last_15['3par'] = last_15['3pa'] / last_15['fga']
last_15['3par_opp'] = last_15['3pa_opp'] / last_15['fga_opp']

last_15['ftr'] = last_15['fta'] / last_15['fga']
last_15['ftr_opp'] = last_15['fta_opp'] / last_15['fga_opp']

last_15['orb_perc'] = last_15['orb'] / (last_15['orb'] + last_15['drb_opp'])
last_15['orb_perc_opp'] = last_15['orb_opp'] / (last_15['orb_opp'] + last_15['drb'])

last_15['drb_perc'] = last_15['drb'] / (last_15['drb'] + last_15['orb_opp'])
last_15['drb_perc_opp'] = last_15['drb_opp'] / (last_15['drb_opp'] + last_15['orb'])

last_15['trb_perc'] = last_15['trb'] / (last_15['trb'] + last_15['trb_opp'])
last_15['trb_perc_opp'] = last_15['trb_opp'] / (last_15['trb_opp'] + last_15['trb'])

last_15['ast_perc'] = last_15['ast'] / last_15['fg']
last_15['ast_perc_opp'] = last_15['ast_opp'] / last_15['fg_opp']

starters_15_games = last_15.dropna(axis=0).copy()
starters_15_games.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,3p,3pa,ft,fta,orb,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,mp,fg_opp,fga_opp,3p_opp,3pa_opp,ft_opp,fta_opp,orb_opp,drb_opp,trb_opp,ast_opp,stl_opp,blk_opp,tov_opp,pf_opp,pts_opp,plus_minus_opp,mp_opp,fg_std,fga_std,3p_std,3pa_std,ft_std,fta_std,orb_std,drb_std,trb_std,ast_std,stl_std,blk_std,tov_std,pf_std,pts_std,plus_minus_std,mp_std,fg_opp_std,fga_opp_std,3p_opp_std,3pa_opp_std,ft_opp_std,fta_opp_std,orb_opp_std,drb_opp_std,trb_opp_std,ast_opp_std,stl_opp_std,blk_opp_std,tov_opp_std,pf_opp_std,pts_opp_std,plus_minus_opp_std,mp_opp_std,fg_trend,fga_trend,3p_trend,3pa_trend,ft_trend,fta_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend,plus_minus_trend,mp_trend,fg_opp_trend,fga_opp_trend,3p_opp_trend,3pa_opp_trend,ft_opp_trend,fta_opp_trend,orb_opp_trend,drb_opp_trend,trb_opp_trend,ast_opp_trend,stl_opp_trend,blk_opp_trend,tov_opp_trend,pf_opp_trend,pts_opp_trend,plus_minus_opp_trend,mp_opp_trend,fg_perc,fg_perc_opp,3p_perc,3p_perc_opp,ft_perc,ft_perc_opp,ts_perc,ts_perc_opp,efg_perc,efg_perc_opp,3par,3par_opp,ftr,ftr_opp,orb_perc,orb_perc_opp,drb_perc,drb_perc_opp,trb_perc,trb_perc_opp,ast_perc,ast_perc_opp
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1
2022-03-07,Houston Rockets,Miami Heat,0.0,51.266667,109.2,17.4,45.933333,24.6,33.8,11.0,41.533333,52.533333,33.733333,9.733333,4.6,18.333333,26.933333,144.533333,-8.4,307.092222,55.066667,113.333333,15.133333,42.4,27.333333,34.933333,12.733333,43.666667,56.4,33.466667,10.4,5.333333,20.133333,27.533333,152.6,7.933333,301.411111,8.55411,17.420693,5.466165,12.38989,10.091997,12.157061,5.040698,10.00748,13.065568,6.952753,4.603379,3.065568,6.084998,4.995363,24.899034,99.843659,40.544381,11.28496,16.669609,6.174612,11.417634,9.880233,12.56643,5.282787,12.459864,13.556986,8.81882,3.877254,3.606668,6.380487,8.251517,27.897222,97.020518,33.939629,0.434759,0.394054,0.130845,0.187675,0.174791,0.351317,0.125349,0.268619,0.252619,-0.020657,-0.275027,0.177302,-0.249469,0.193729,0.399529,0.241866,0.272114,0.028984,0.250486,-0.067628,0.113981,0.258421,0.111339,0.217155,0.151946,0.232481,-0.127288,-0.15436,-0.088861,0.114753,0.083107,0.074841,-0.123543,-0.0106,0.469475,0.485882,0.37881,0.356918,0.727811,0.782443,0.582457,0.592833,0.549145,0.552647,0.420635,0.374118,0.309524,0.308235,0.20122,0.234644,0.765356,0.79878,0.482252,0.517748,0.657997,0.607748
2022-03-07,Los Angeles Lakers,San Antonio Spurs,0.0,60.133333,123.133333,14.666667,40.0,21.4,29.266667,12.466667,44.733333,57.2,36.933333,9.4,6.2,18.0,22.333333,156.333333,-12.333333,299.634444,61.733333,123.4,15.8,42.266667,23.8,31.533333,13.0,48.6,61.6,35.733333,10.0,5.333333,17.4,24.133333,163.066667,14.6,310.71,12.360815,16.792714,6.139304,8.343932,8.249824,10.565059,6.311252,7.644123,10.724304,10.496922,3.675978,3.469816,6.714545,5.239169,31.04894,98.177976,30.67418,12.060678,20.575023,6.317101,14.025769,10.106477,12.67854,5.74738,9.310373,11.94415,9.558476,4.305924,2.501545,4.902467,7.475751,31.290749,110.084056,36.665267,0.169691,0.303801,-0.240634,-0.11555,0.214506,0.159359,0.165904,0.171343,0.220921,0.049343,0.140488,0.486117,0.131467,-0.279943,0.158768,-0.013751,0.20003,0.102085,0.088349,-0.063555,-0.125883,0.096141,-0.019202,0.03232,-0.124595,-0.076481,-0.174201,-0.021138,0.23476,0.293375,0.052229,0.098298,0.015824,-0.063287,0.48836,0.50027,0.366667,0.373817,0.731207,0.754757,0.57471,0.593943,0.547916,0.56429,0.324851,0.342518,0.237683,0.255538,0.204148,0.225173,0.774827,0.795852,0.481481,0.518519,0.614191,0.578834
2022-03-07,New York Knicks,Sacramento Kings,0.0,58.133333,122.8,15.666667,43.933333,28.2,38.333333,14.866667,44.533333,59.4,32.933333,8.8,5.733333,18.866667,25.2,160.133333,-16.6,316.347778,57.866667,119.266667,16.8,45.066667,25.933333,33.4,11.066667,46.666667,57.733333,37.266667,8.933333,5.733333,16.933333,25.733333,158.466667,24.4,306.603333,9.816364,15.831393,4.40097,8.796547,9.992128,12.560006,6.138323,9.101087,12.423039,7.081217,5.030327,3.659931,4.282504,6.006105,24.950814,81.488919,30.505984,12.858237,20.339036,5.640834,11.382378,12.521721,13.139534,6.032986,10.45866,11.579637,7.71954,4.647444,4.227145,4.423395,5.41243,35.69873,97.662268,33.154791,0.025786,0.135334,-0.11526,-0.205022,0.324198,0.229372,-0.092666,0.17387,0.065716,-0.217475,-0.37054,-0.034837,-0.391085,0.145583,0.143582,-0.182074,-0.030922,0.083128,-0.151606,0.069017,-0.254669,0.434994,0.252246,-0.813636,0.445053,-0.014273,0.353405,-0.026203,-0.153904,-0.329076,0.306754,0.231612,0.124363,0.201512,0.473398,0.485187,0.356601,0.372781,0.735652,0.776447,0.57327,0.591458,0.537188,0.555618,0.357763,0.377865,0.312161,0.280045,0.241603,0.199041,0.800959,0.758397,0.507114,0.492886,0.566514,0.644009
2022-03-07,Portland Trail Blazers,Minnesota Timberwolves,0.0,55.733333,120.6,17.666667,50.666667,24.8,32.8,14.066667,42.866667,56.933333,35.333333,9.466667,5.866667,18.8,25.8,153.933333,-19.8,306.437778,54.333333,113.133333,16.2,45.4,27.866667,34.466667,11.8,46.533333,58.333333,36.533333,9.2,6.333333,18.4,23.733333,152.733333,23.466667,297.636667,12.335681,20.587997,6.805905,10.667493,8.719726,12.060507,6.085006,10.646105,14.344025,7.617294,4.580045,2.978404,4.625797,7.221311,32.401402,103.49502,38.512804,10.973535,15.132369,5.239847,9.801796,12.310044,14.320618,6.261529,10.343973,12.857954,10.295686,4.529027,3.711034,5.867057,6.894129,28.155987,99.539775,37.88376,-0.242972,-0.349734,-0.029746,-0.069382,0.350808,0.392415,0.030579,-0.333113,-0.231244,0.122908,0.060792,-0.180641,0.293769,0.031607,-0.087891,-0.189291,-0.298702,0.280092,0.046428,0.339389,0.226997,-0.021038,0.002762,-0.250615,-0.351495,-0.406919,0.306161,0.260026,-0.041425,0.060386,0.213647,0.271466,0.208207,-0.035208,0.462134,0.480259,0.348684,0.356828,0.756098,0.808511,0.569988,0.595226,0.535379,0.551856,0.420122,0.401296,0.271973,0.304655,0.232123,0.215854,0.784146,0.767877,0.493927,0.506073,0.633971,0.672393
2022-03-07,Utah Jazz,Dallas Mavericks,0.0,55.533333,114.266667,21.0,51.666667,23.0,29.866667,12.333333,42.8,55.133333,33.2,9.266667,4.933333,18.533333,24.333333,155.066667,33.266667,314.898889,55.333333,117.666667,13.666667,39.6,21.2,29.066667,13.133333,40.0,53.133333,32.933333,9.066667,4.733333,16.666667,23.933333,145.533333,-24.666667,306.881111,10.243285,16.373952,5.991803,10.938459,7.781718,9.366983,4.900382,8.519199,8.842474,8.440779,4.560266,2.917752,6.41657,7.203277,27.421722,98.954189,32.570754,11.988691,17.89119,6.841746,10.690384,8.843303,11.370742,6.624172,8.980585,11.927637,6.742258,4.203284,3.192477,5.753577,5.200756,29.966607,89.760714,35.151154,0.125189,0.292453,0.506777,0.537028,0.503102,0.557636,0.033912,-0.020629,-0.029448,-0.330285,-0.170694,0.015833,0.262495,0.091709,0.377216,-0.138665,0.205079,0.502461,0.454102,0.282261,0.363006,-0.041422,0.100886,0.111363,0.103775,0.14119,0.335191,0.536604,-0.328932,0.060675,0.406705,0.440032,0.19614,0.317181,0.485998,0.470255,0.406452,0.345118,0.770089,0.729358,0.608544,0.557787,0.577888,0.528329,0.452159,0.336544,0.261377,0.247025,0.235669,0.234803,0.765197,0.764331,0.509236,0.490764,0.597839,0.595181


## Correlations

In [43]:
corr_df = pd.DataFrame()

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in starters_15_games:
    corr_p = pearsonr(starters_15_games['target'], starters_15_games[col])
    row = {'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Print statistically significant correlations
starters_corr = corr_df[corr_df['p-value'] < .05].sort_values(['corr'], axis=0, ascending=False)
starters_corr

Unnamed: 0,corr,p-value,stat
0,1.00,0.0,target
4,0.71,0.0,3pa
113,0.71,0.0,3par
3,0.69,0.0,3p
21,0.68,0.0,3pa_opp
...,...,...,...
24,-0.29,0.0,orb_opp
115,-0.30,0.0,ftr
116,-0.37,0.0,ftr_opp
117,-0.38,0.0,orb_perc


## Bench Analysis

In [44]:
# Set stats
stats = ['fg', 'fga', '3p', '3pa', 'ft', 'fta', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 
         'tov', 'pf', 'pts', 'plus_minus', 'mp']
opp_stats = [stat + '_opp' for stat in stats]
stats = stats + opp_stats

# X and y column names to merge on
x_cols = ['date', 'team'] + stats

last_15_games = bench_df.copy()
X = bench_df[x_cols]

# Dataframe of target (3pt made by each team) and of variables (last 5 games stats for each team)
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
for date in dates:
    last_15_games = pd.merge(last_15_games, X, left_on=['date' + date, 'team'], right_on=['date', 'team'], how='left', suffixes=('', date))

### Last 15 Performances (Unweighted)

In [45]:
# Set stats
stats = ['fg', 'fga', '3p', '3pa', 'ft', 'fta', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 
         'tov', 'pf', 'pts', 'plus_minus', 'mp']
opp_stats = [stat + '_opp' for stat in stats]
stats = stats + opp_stats

dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']

last_15 = last_15_games.copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_15[stat] = 0
    for date in dates:
        last_15[stat] = last_15[stat] + last_15[stat + date]
    
    last_15[stat] = last_15[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_15[stat + '_std'] = 0
    for date in dates:
        last_15[stat + '_std'] = last_15[stat + '_std'] + ((last_15[stat + date] - last_15[stat]) ** 2)
    
    last_15[stat + '_std'] = last_15[stat + '_std'] / len(dates)
    last_15[stat + '_std'] = last_15[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_15[stat + '_trend'] = 0
    for date in dates[:10]:
        last_15[stat + '_trend'] = \
                            last_15[stat + '_trend'] + \
                            z_score(last_15[stat + date], last_15[stat], last_15[stat + '_std']).fillna(0)
    
    last_15[stat + '_trend'] = last_15[stat + '_trend'] / len(dates[:10])

last_15 = last_15.groupby(['date', 'visitor', 'home']).sum()

# Standard deviation and trending cols
std_cols = [stat + '_std' for stat in stats]
trend_cols = [stat + '_trend' for stat in stats]

# Keep columns
last_15 = last_15[['target'] + stats + std_cols + trend_cols].dropna(axis=0)

# Calculate percentages
last_15['fg_perc'] = last_15['fg'] / last_15['fga']
last_15['fg_perc_opp'] = last_15['fg_opp'] / last_15['fga_opp']

last_15['3p_perc'] = last_15['3p'] / last_15['3pa']
last_15['3p_perc_opp'] = last_15['3p_opp'] / last_15['3pa_opp']

last_15['ft_perc'] = last_15['ft'] / last_15['fta']
last_15['ft_perc_opp'] = last_15['ft_opp'] / last_15['fta_opp']

# Calculate advanced stats
last_15['ts_perc'] = last_15['pts'] / (2 * (last_15['fga'] + .44 * last_15['fta']))
last_15['ts_perc_opp'] = last_15['pts_opp'] / (2 * (last_15['fga_opp'] + .44 * last_15['fta_opp']))

last_15['efg_perc'] = (last_15['fg'] + (.5 * last_15['3p'])) / last_15['fga']
last_15['efg_perc_opp'] = (last_15['fg_opp'] + (.5 * last_15['3p_opp'])) / last_15['fga_opp']

last_15['3par'] = last_15['3pa'] / last_15['fga']
last_15['3par_opp'] = last_15['3pa_opp'] / last_15['fga_opp']

last_15['ftr'] = last_15['fta'] / last_15['fga']
last_15['ftr_opp'] = last_15['fta_opp'] / last_15['fga_opp']

last_15['orb_perc'] = last_15['orb'] / (last_15['orb'] + last_15['drb_opp'])
last_15['orb_perc_opp'] = last_15['orb_opp'] / (last_15['orb_opp'] + last_15['drb'])

last_15['drb_perc'] = last_15['drb'] / (last_15['drb'] + last_15['orb_opp'])
last_15['drb_perc_opp'] = last_15['drb_opp'] / (last_15['drb_opp'] + last_15['orb'])

last_15['trb_perc'] = last_15['trb'] / (last_15['trb'] + last_15['trb_opp'])
last_15['trb_perc_opp'] = last_15['trb_opp'] / (last_15['trb_opp'] + last_15['trb'])

last_15['ast_perc'] = last_15['ast'] / last_15['fg']
last_15['ast_perc_opp'] = last_15['ast_opp'] / last_15['fg_opp']

bench_15_games = last_15.dropna(axis=0).copy()
bench_15_games.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,3p,3pa,ft,fta,orb,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,mp,fg_opp,fga_opp,3p_opp,3pa_opp,ft_opp,fta_opp,orb_opp,drb_opp,trb_opp,ast_opp,stl_opp,blk_opp,tov_opp,pf_opp,pts_opp,plus_minus_opp,mp_opp,fg_std,fga_std,3p_std,3pa_std,ft_std,fta_std,orb_std,drb_std,trb_std,ast_std,stl_std,blk_std,tov_std,pf_std,pts_std,plus_minus_std,mp_std,fg_opp_std,fga_opp_std,3p_opp_std,3pa_opp_std,ft_opp_std,fta_opp_std,orb_opp_std,drb_opp_std,trb_opp_std,ast_opp_std,stl_opp_std,blk_opp_std,tov_opp_std,pf_opp_std,pts_opp_std,plus_minus_opp_std,mp_opp_std,fg_trend,fga_trend,3p_trend,3pa_trend,ft_trend,fta_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend,plus_minus_trend,mp_trend,fg_opp_trend,fga_opp_trend,3p_opp_trend,3pa_opp_trend,ft_opp_trend,fta_opp_trend,orb_opp_trend,drb_opp_trend,trb_opp_trend,ast_opp_trend,stl_opp_trend,blk_opp_trend,tov_opp_trend,pf_opp_trend,pts_opp_trend,plus_minus_opp_trend,mp_opp_trend,fg_perc,fg_perc_opp,3p_perc,3p_perc_opp,ft_perc,ft_perc_opp,ts_perc,ts_perc_opp,efg_perc,efg_perc_opp,3par,3par_opp,ftr,ftr_opp,orb_perc,orb_perc_opp,drb_perc,drb_perc_opp,trb_perc,trb_perc_opp,ast_perc,ast_perc_opp
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1
2022-03-07,Houston Rockets,Miami Heat,0.0,27.533333,61.6,9.133333,27.733333,11.866667,15.733333,7.666667,22.0,29.666667,15.266667,5.733333,2.666667,10.2,16.733333,76.066667,-18.6,177.898889,26.733333,57.733333,8.666667,26.266667,11.266667,16.0,8.4,23.866667,32.266667,15.8,6.6,4.333333,8.333333,18.466667,73.4,19.066667,183.581111,7.729486,13.200371,4.721984,6.841525,6.829264,7.740118,3.759349,5.277467,6.524479,5.256899,3.931852,2.242382,4.153503,4.492409,21.097028,77.146276,28.739791,8.464988,15.231095,4.046004,8.021689,5.884123,7.649427,4.697034,6.728628,8.328001,5.437297,3.341881,3.29554,4.791953,5.790169,23.99665,65.389448,29.0541,-0.085455,-0.204218,-0.260049,-0.340658,-0.011906,-0.001136,-0.105783,0.121345,0.043972,0.056384,-0.135383,-0.167071,-0.30609,-0.251164,-0.170851,-0.247409,-0.24353,0.184451,0.284661,0.542453,0.51774,-0.270688,-0.255091,0.070875,0.068736,0.136086,0.140403,-0.306136,0.043445,-0.443338,0.006061,0.141272,0.153734,0.225237,0.44697,0.463048,0.329327,0.329949,0.754237,0.704167,0.555047,0.566591,0.521104,0.538106,0.450216,0.454965,0.255411,0.277136,0.243129,0.276316,0.723684,0.756871,0.47901,0.52099,0.554479,0.591022
2022-03-07,Los Angeles Lakers,San Antonio Spurs,0.0,25.8,57.066667,8.733333,25.933333,10.866667,15.0,7.6,21.666667,29.266667,16.333333,5.933333,3.8,7.866667,16.2,71.2,-16.333333,185.355556,26.0,56.4,8.666667,23.866667,9.533333,13.866667,7.666667,22.333333,30.0,15.933333,6.333333,2.933333,7.333333,15.466667,70.2,14.066667,174.278889,7.234691,10.970274,3.66355,7.007932,6.631727,8.501572,3.817168,6.687466,7.768525,4.960523,3.072757,2.96558,4.67926,5.530844,18.168373,57.945804,22.740856,10.220376,15.313196,4.832955,7.544268,6.15311,8.01066,3.688259,6.646055,7.372634,6.765928,3.986988,2.549897,3.700749,5.895181,26.154971,57.757737,33.920713,0.057474,-0.046927,-0.023264,0.067934,-0.246833,-0.262949,-0.176388,-0.148634,-0.24131,0.125177,0.167881,-0.344124,0.131353,0.482577,-0.033796,-0.146925,-0.066847,0.05876,0.096681,0.041841,-0.145432,0.267156,0.456635,0.505469,0.436831,0.588305,0.137896,0.151397,0.095705,0.202682,-0.094301,0.11748,0.164971,0.211115,0.452103,0.460993,0.336761,0.363128,0.724444,0.6875,0.559162,0.561588,0.528621,0.537825,0.454439,0.423168,0.26285,0.245863,0.253898,0.261364,0.738636,0.746102,0.493813,0.506187,0.633075,0.612821
2022-03-07,New York Knicks,Sacramento Kings,0.0,21.666667,52.733333,7.4,24.4,11.933333,14.933333,6.6,23.733333,30.333333,14.4,5.2,3.4,8.666667,16.466667,62.666667,-32.066667,166.973333,27.066667,57.866667,9.8,27.466667,10.133333,13.733333,6.333333,23.466667,29.8,15.133333,6.6,2.466667,7.266667,18.133333,74.066667,24.266667,176.721111,6.632939,12.070996,3.679426,6.383112,5.70899,6.37538,3.629096,6.582265,8.328198,6.649546,3.548189,2.480397,3.802728,4.605915,17.935686,79.341887,26.912647,10.468067,16.969601,5.830195,9.173488,8.098813,9.692207,3.768783,7.779851,8.681319,6.073326,4.384706,2.260635,3.474173,5.029553,29.562562,80.092047,33.036763,-0.094829,-0.007343,0.052863,-0.122951,-0.22894,-0.312609,0.283953,-0.133096,0.057101,0.017198,-0.187453,-0.166523,-0.32669,0.174766,-0.145849,0.171808,-0.030169,-0.106588,-0.111129,-0.017818,0.092965,-0.070569,0.011429,-0.081437,-0.422221,-0.409544,-0.335114,-0.426513,-0.433305,-0.132949,0.046146,-0.135004,-0.209623,-0.264582,0.410872,0.467742,0.303279,0.356796,0.799107,0.737864,0.528351,0.579467,0.481037,0.552419,0.462705,0.474654,0.283186,0.237327,0.219512,0.210643,0.789357,0.780488,0.504435,0.495565,0.664615,0.559113
2022-03-07,Portland Trail Blazers,Minnesota Timberwolves,0.0,26.666667,57.866667,10.6,28.266667,10.2,12.933333,5.933333,22.866667,28.8,16.466667,7.0,3.933333,7.8,17.266667,74.133333,-4.2,175.224444,27.8,61.2,10.333333,28.466667,14.2,18.066667,8.6,24.666667,33.266667,18.666667,6.066667,3.733333,9.533333,16.6,80.133333,0.533333,184.018889,8.674109,14.457046,5.85247,9.066005,6.796236,8.192906,2.950069,9.336394,9.944422,7.221337,4.013765,3.183135,4.537912,5.15924,26.039751,99.751444,37.454603,8.392138,15.830607,5.332917,10.599708,9.540821,10.584236,4.09021,9.043644,11.386847,8.222154,4.176697,2.636064,4.234266,5.501069,24.768144,98.048683,34.75592,0.196892,0.199622,0.095283,0.119062,0.45511,0.338627,-0.50349,0.249708,0.127233,0.040243,0.210026,0.212,0.293848,0.501703,0.270415,-0.302227,0.342666,-0.065918,-0.07622,0.028445,-0.080973,0.219607,0.271488,0.185009,0.203713,0.31199,-0.379538,0.528959,0.018023,0.165983,0.182451,0.046352,0.257853,0.041761,0.460829,0.454248,0.375,0.362998,0.78866,0.785978,0.5832,0.579422,0.552419,0.538671,0.488479,0.465142,0.223502,0.295207,0.1939,0.273305,0.726695,0.8061,0.464017,0.535983,0.6175,0.671463
2022-03-07,Utah Jazz,Dallas Mavericks,0.0,25.866667,56.266667,8.8,26.333333,9.133333,12.4,8.4,22.6,31.0,13.4,5.933333,3.866667,7.066667,14.933333,69.666667,21.4,168.428889,25.466667,54.333333,8.8,24.666667,8.533333,11.066667,7.333333,23.466667,30.8,13.8,6.4,3.6,9.0,15.733333,68.266667,-30.0,176.441111,9.421005,15.268686,4.567778,8.735449,5.186085,6.957285,5.12221,6.669852,9.246823,6.101978,3.856422,3.133086,4.023994,5.201587,25.013011,72.844693,29.299465,11.290252,18.607809,4.411882,7.430244,5.001406,6.18296,4.445688,7.287577,9.626383,7.230131,3.465464,3.214076,4.226961,4.797285,27.567372,84.602946,34.909521,-0.295471,-0.469227,-0.331248,-0.386527,-0.224666,-0.090466,0.119268,0.373089,0.30868,0.010555,-0.20214,0.015502,-0.559222,-0.19051,-0.326711,0.47275,-0.291879,-0.695636,-0.426809,-0.028609,0.124602,-0.011854,-0.056025,-0.303777,-0.379415,-0.402018,-0.601563,-0.11208,-0.129556,-0.260844,0.359574,-0.565878,-0.482152,-0.415969,0.459716,0.468712,0.334177,0.356757,0.736559,0.771084,0.564352,0.576551,0.537915,0.549693,0.468009,0.453988,0.220379,0.203681,0.263598,0.244989,0.755011,0.736402,0.501618,0.498382,0.518041,0.541885


## Correlations of Bench

In [46]:
corr_df = pd.DataFrame()

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in bench_15_games:
    corr_p = pearsonr(bench_15_games['target'], bench_15_games[col])
    row = {'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Print statistically significant correlations
bench_corr = corr_df[corr_df['p-value'] < .05].sort_values(['corr'], axis=0, ascending=False)
bench_corr

Unnamed: 0,corr,p-value,stat
0,1.00,0.0,target
21,0.65,0.0,3pa_opp
114,0.64,0.0,3par_opp
4,0.63,0.0,3pa
113,0.62,0.0,3par
...,...,...,...
51,-0.11,0.0,mp_std
118,-0.29,0.0,orb_perc_opp
115,-0.34,0.0,ftr
117,-0.35,0.0,orb_perc


# Comparison of Starters to Bench

In [47]:
corr_df = pd.merge(starters_corr.drop(['p-value'], axis=1), 
                   bench_corr.drop(['p-value'], axis=1),
                   left_on=['stat'], right_on=['stat'],
                   how='outer',
                   suffixes=['_starter', '_bench'])
corr_df.sort_values(['stat'], axis=0)

Unnamed: 0,corr_starter,stat,corr_bench
3,0.69,3p,0.60
6,0.66,3p_opp,0.62
10,0.44,3p_opp_std,0.38
38,0.12,3p_perc,0.04
47,0.04,3p_perc_opp,-0.02
...,...,...,...
58,-0.02,trb_perc,-0.05
55,0.02,trb_perc_opp,0.05
59,-0.02,trb_std,0.06
11,0.41,ts_perc,0.36


## Save dataframe with significantly correlated stats

In [48]:
correlation = .6

starter_stats = starters_corr[starters_corr['corr'].abs() >= correlation]['stat']
starters_df = starters_15_games[starter_stats]

bench_stats = bench_corr[bench_corr['corr'].abs() >= correlation]['stat']
bench_df = bench_15_games[bench_stats]

df = pd.merge(starters_df, bench_df, 
              left_on=['date', 'visitor', 'home'], 
              right_on=['date', 'visitor', 'home'], 
              how='outer', suffixes=['_starters', '_bench'])

df = df.drop(['target_bench', 'target_starters'], axis=1)

df.to_csv('backend/data/inputs/3p/game_details.csv')