# 3 Pointers Made against advanced_totals.csv

### Import packages

In [1]:
import os
import numpy as np
import pandas as pd
%matplotlib inline
from scipy.stats import pearsonr, zscore
import itertools

pd.set_option("display.max_columns", None)

### Set working directory

In [2]:
# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

# Change working directory
os.chdir('/Users/tyler/OneDrive/Documents/Python/NBA')

# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA\backend\analysis
Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA


## Exploratory Data Analysis

### Import data

In [3]:
advanced_df = pd.read_csv('backend/data/totals/advanced_totals.csv').drop(['Unnamed: 0'], axis=1)
shooting_df = pd.read_csv('backend/data/totals/game_totals.csv').drop(['Unnamed: 0'], axis=1)
shooting_df = shooting_df[['date', 'visitor', 'home', 'team', '3p']]

In [4]:
# Merge dataframes to have target variable
df = pd.merge(shooting_df, advanced_df, 
              left_on=['date', 'visitor', 'home', 'team'], right_on=['date', 'visitor', 'home', 'team'],
              how='left')

### Basic exploration

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38288 entries, 0 to 38287
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   date                      38288 non-null  object 
 1   visitor                   38288 non-null  object 
 2   home                      38288 non-null  object 
 3   team                      38288 non-null  int64  
 4   3p                        38288 non-null  int64  
 5   Unnamed: 0.1              35946 non-null  float64
 6   Unnamed: 0.1.1            33660 non-null  float64
 7   Unnamed: 0.1.1.1          31036 non-null  float64
 8   Unnamed: 0.1.1.1.1        28412 non-null  float64
 9   Unnamed: 0.1.1.1.1.1      25794 non-null  float64
 10  Unnamed: 0.1.1.1.1.1.1    23162 non-null  float64
 11  Unnamed: 0.1.1.1.1.1.1.1  20540 non-null  float64
 12  ts_perc                   38288 non-null  float64
 13  efg_perc                  38288 non-null  float64
 14  3par  

In [6]:
# Convert 'date' column to Date object
df['date'] = pd.to_datetime(df['date'])

# Convert 'team' column to Team Name
df['team'] = np.where(df['team'], df['home'], df['visitor'])

# Rename target variable
df = df.rename({'3p': 'target'}, axis=1)

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38288 entries, 0 to 38287
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   date                      38288 non-null  datetime64[ns]
 1   visitor                   38288 non-null  object        
 2   home                      38288 non-null  object        
 3   team                      38288 non-null  object        
 4   target                    38288 non-null  int64         
 5   Unnamed: 0.1              35946 non-null  float64       
 6   Unnamed: 0.1.1            33660 non-null  float64       
 7   Unnamed: 0.1.1.1          31036 non-null  float64       
 8   Unnamed: 0.1.1.1.1        28412 non-null  float64       
 9   Unnamed: 0.1.1.1.1.1      25794 non-null  float64       
 10  Unnamed: 0.1.1.1.1.1.1    23162 non-null  float64       
 11  Unnamed: 0.1.1.1.1.1.1.1  20540 non-null  float64       
 12  ts_perc           

In [7]:
# Return ten lastest dates team played
def last_10_date(team, date):
    schedule = df[df['team'] == team].sort_values(by='date').reset_index()
    date_index = schedule[schedule['date'] == date].index[0]
    if date_index - 10 < 0:
        return None, None, None, None, None, None, None, None, None, None
    else:
        date_1, date_2 = schedule.iloc[date_index - 1]['date'], schedule.iloc[date_index - 2]['date']
        date_3, date_4 = schedule.iloc[date_index - 3]['date'], schedule.iloc[date_index - 4]['date']
        date_5, date_6 = schedule.iloc[date_index - 5]['date'], schedule.iloc[date_index - 6]['date']
        date_7, date_8 = schedule.iloc[date_index - 7]['date'], schedule.iloc[date_index - 8]['date']
        date_9, date_10 = schedule.iloc[date_index - 9]['date'], schedule.iloc[date_index - 10]['date']
        return date_1, date_2, date_3, date_4, date_5, date_6, date_7, date_8, date_9, date_10

df['dates'] = df.apply(lambda x: last_10_date(x.team, x.date), axis=1)
df['date_1'], df['date_2'] = df['dates'].apply(lambda x: x[0]), df['dates'].apply(lambda x: x[1])
df['date_3'], df['date_4'] = df['dates'].apply(lambda x: x[2]), df['dates'].apply(lambda x: x[3])
df['date_5'], df['date_6'] = df['dates'].apply(lambda x: x[4]), df['dates'].apply(lambda x: x[5])
df['date_7'], df['date_8'] = df['dates'].apply(lambda x: x[6]), df['dates'].apply(lambda x: x[7])
df['date_9'], df['date_10'] = df['dates'].apply(lambda x: x[8]), df['dates'].apply(lambda x: x[9])

# X and y column names to merge on
y_cols = df.columns
x_cols = ['date', 'team', 'ts_perc', 'efg_perc', '3par', 'ftr', 
          'orb_perc', 'drb_perc', 'trb_perc', 'ast_perc', 'stl_perc', 'blk_perc',
          'tov_perc', 'usg_perc', 'ortg', 'drtg']

last_10_games = df[y_cols]
X = df[x_cols]

# Dataframe of target (3pt made by each team) and of variables (last 5 games stats for each team)
weeks = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10']
for week in weeks:
    last_10_games = pd.merge(last_10_games, X, left_on=['date' + week, 'team'], right_on=['date', 'team'], how='left', suffixes=('', week))

drop_cols = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 'ast_perc', 'stl_perc', 'blk_perc',
             'tov_perc', 'usg_perc', 'ortg', 'drtg', 'dates', 'date_1', 'date_2', 'date_3', 'date_4', 'date_5', 
             'date_6', 'date_7', 'date_8', 'date_9', 'date_10']
last_10_games = last_10_games.drop(drop_cols, axis=1).dropna(axis=0)

# Take average of last 10 games
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']
for stat in stats:
    last_10_games[stat] = 0
    for week in weeks:
        last_10_games[stat] = last_10_games[stat] + last_10_games[stat + week]
    
    last_10_games[stat] = last_10_games[stat] / len(weeks)

drop_cols = [tup[0] + tup[1] for tup in list(itertools.product(stats, weeks))]
last_10_games = last_10_games.drop(drop_cols, axis=1)

# Totals for target and variables
last_10_games = last_10_games.groupby(['date', 'visitor', 'home']).aggregate(
    {'target': 'sum', 'ts_perc': 'mean', 'efg_perc': 'mean', '3par': 'mean', 'ftr': 'mean', 'orb_perc': 'mean', 
     'drb_perc': 'mean', 'trb_perc': 'mean', 'ast_perc': 'mean', 'stl_perc': 'mean', 'blk_perc': 'mean', 
     'tov_perc': 'mean', 'usg_perc': 'mean', 'ortg': 'sum', 'drtg': 'sum'})

# Dataframe of team's last 15 performances

In [8]:
# Return ten lastest dates team played
def last_15_date(team, date):
    schedule = df[df['team'] == team].sort_values(by='date').reset_index()
    date_index = schedule[schedule['date'] == date].index[0]
    if date_index - 15 < 0:
        return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    else:
        date_1, date_2 = schedule.iloc[date_index - 1]['date'], schedule.iloc[date_index - 2]['date']
        date_3, date_4 = schedule.iloc[date_index - 3]['date'], schedule.iloc[date_index - 4]['date']
        date_5, date_6 = schedule.iloc[date_index - 5]['date'], schedule.iloc[date_index - 6]['date']
        date_7, date_8 = schedule.iloc[date_index - 7]['date'], schedule.iloc[date_index - 8]['date']
        date_9, date_10 = schedule.iloc[date_index - 9]['date'], schedule.iloc[date_index - 10]['date']
        date_11, date_12 = schedule.iloc[date_index - 11]['date'], schedule.iloc[date_index - 12]['date']
        date_13, date_14 = schedule.iloc[date_index - 13]['date'], schedule.iloc[date_index - 14]['date']
        date_15 = schedule.iloc[date_index - 15]['date']
        return date_1, date_2, date_3, date_4, date_5, date_6, date_7, date_8, date_9, date_10, date_11, date_12, date_13, date_14, date_15

df['dates'] = df.apply(lambda x: last_15_date(x.team, x.date), axis=1)
df['date_1'], df['date_2'] = df['dates'].apply(lambda x: x[0]), df['dates'].apply(lambda x: x[1])
df['date_3'], df['date_4'] = df['dates'].apply(lambda x: x[2]), df['dates'].apply(lambda x: x[3])
df['date_5'], df['date_6'] = df['dates'].apply(lambda x: x[4]), df['dates'].apply(lambda x: x[5])
df['date_7'], df['date_8'] = df['dates'].apply(lambda x: x[6]), df['dates'].apply(lambda x: x[7])
df['date_9'], df['date_10'] = df['dates'].apply(lambda x: x[8]), df['dates'].apply(lambda x: x[9])
df['date_11'], df['date_12'] = df['dates'].apply(lambda x: x[10]), df['dates'].apply(lambda x: x[11])
df['date_13'], df['date_14'] = df['dates'].apply(lambda x: x[12]), df['dates'].apply(lambda x: x[13])
df['date_15'] = df['dates'].apply(lambda x: x[14])

In [9]:
# X and y column names to merge on
y_cols = df.columns
x_cols = ['date', 'team', 'ts_perc', 'efg_perc', '3par', 'ftr', 
          'orb_perc', 'drb_perc', 'trb_perc', 'ast_perc', 'stl_perc', 'blk_perc',
          'tov_perc', 'usg_perc', 'ortg', 'drtg']

last_15_games = df[y_cols]
X = df[x_cols]

# Dataframe of target (3pt made by each team) and of variables (last 15 games stats for each team)
dates_15 = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
for date in dates_15:
    last_15_games = pd.merge(last_15_games, X, left_on=['date' + date, 'team'], right_on=['date', 'team'], how='left', suffixes=('', date))

## Feature Engineering

In [10]:
# Create season variable
def season(month, year):
    if (month in list(range(10, 13)) and year == 2006) or (month in list(range(1, 7)) and year == 2007):
        return 2006
    elif (month in list(range(10, 13)) and year == 2007) or (month in list(range(1, 7)) and year == 2008):
        return 2007
    elif (month in list(range(10, 13)) and year == 2008) or (month in list(range(1, 7)) and year == 2009):
        return 2008
    elif (month in list(range(10, 13)) and year == 2009) or (month in list(range(1, 7)) and year == 2010):
        return 2009
    elif (month in list(range(10, 13)) and year == 2010) or (month in list(range(1, 7)) and year == 2011):
        return 2010
    elif (month in list(range(10, 11)) and year == 2011) or (month in list(range(1, 7)) and year == 2012):
        return 2011
    elif (month in list(range(10, 13)) and year == 2012) or (month in list(range(1, 7)) and year == 2013):
        return 2012
    elif (month in list(range(10, 13)) and year == 2013) or (month in list(range(1, 7)) and year == 2014):
        return 2013
    elif (month in list(range(10, 13)) and year == 2014) or (month in list(range(1, 7)) and year == 2015):
        return 2014
    elif (month in list(range(10, 13)) and year == 2015) or (month in list(range(1, 7)) and year == 2016):
        return 2015
    elif (month in list(range(10, 13)) and year == 2016) or (month in list(range(1, 7)) and year == 2017):
        return 2016
    elif (month in list(range(10, 13)) and year == 2017) or (month in list(range(1, 7)) and year == 2018):
        return 2017
    elif (month in list(range(10, 13)) and year == 2018) or (month in list(range(1, 7)) and year == 2019):
        return 2018
    elif (month in list(range(10, 13)) and year == 2019) or (month in list(range(1, 11)) and year == 2020):
        return 2019
    elif (month in list(range(12, 13)) and year == 2020) or (month in list(range(1, 8)) and year == 2021):
        return 2020

df['season'] = df['date'].apply(lambda x: season(x.month, x.year))
last_15_games['season'] = last_15_games['date'].apply(lambda x: season(x.month, x.year))
last_15_games.sample()

Unnamed: 0,date,visitor,home,team,target,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,ts_perc,efg_perc,3par,ftr,orb_perc,drb_perc,trb_perc,ast_perc,stl_perc,blk_perc,tov_perc,usg_perc,ortg,drtg,dates,date_1,date_2,date_3,date_4,date_5,date_6,date_7,date_8,date_9,date_10,date_11,date_12,date_13,date_14,date_15,date_1.1,ts_perc_1,efg_perc_1,3par_1,ftr_1,orb_perc_1,drb_perc_1,trb_perc_1,ast_perc_1,stl_perc_1,blk_perc_1,tov_perc_1,usg_perc_1,ortg_1,drtg_1,date_2.1,ts_perc_2,efg_perc_2,3par_2,ftr_2,orb_perc_2,drb_perc_2,trb_perc_2,ast_perc_2,stl_perc_2,blk_perc_2,tov_perc_2,usg_perc_2,ortg_2,drtg_2,date_3.1,ts_perc_3,efg_perc_3,3par_3,ftr_3,orb_perc_3,drb_perc_3,trb_perc_3,ast_perc_3,stl_perc_3,blk_perc_3,tov_perc_3,usg_perc_3,ortg_3,drtg_3,date_4.1,ts_perc_4,efg_perc_4,3par_4,ftr_4,orb_perc_4,drb_perc_4,trb_perc_4,ast_perc_4,stl_perc_4,blk_perc_4,tov_perc_4,usg_perc_4,ortg_4,drtg_4,date_5.1,ts_perc_5,efg_perc_5,3par_5,ftr_5,orb_perc_5,drb_perc_5,trb_perc_5,ast_perc_5,stl_perc_5,blk_perc_5,tov_perc_5,usg_perc_5,ortg_5,drtg_5,date_6.1,ts_perc_6,efg_perc_6,3par_6,ftr_6,orb_perc_6,drb_perc_6,trb_perc_6,ast_perc_6,stl_perc_6,blk_perc_6,tov_perc_6,usg_perc_6,ortg_6,drtg_6,date_7.1,ts_perc_7,efg_perc_7,3par_7,ftr_7,orb_perc_7,drb_perc_7,trb_perc_7,ast_perc_7,stl_perc_7,blk_perc_7,tov_perc_7,usg_perc_7,ortg_7,drtg_7,date_8.1,ts_perc_8,efg_perc_8,3par_8,ftr_8,orb_perc_8,drb_perc_8,trb_perc_8,ast_perc_8,stl_perc_8,blk_perc_8,tov_perc_8,usg_perc_8,ortg_8,drtg_8,date_9.1,ts_perc_9,efg_perc_9,3par_9,ftr_9,orb_perc_9,drb_perc_9,trb_perc_9,ast_perc_9,stl_perc_9,blk_perc_9,tov_perc_9,usg_perc_9,ortg_9,drtg_9,date_10.1,ts_perc_10,efg_perc_10,3par_10,ftr_10,orb_perc_10,drb_perc_10,trb_perc_10,ast_perc_10,stl_perc_10,blk_perc_10,tov_perc_10,usg_perc_10,ortg_10,drtg_10,date_11.1,ts_perc_11,efg_perc_11,3par_11,ftr_11,orb_perc_11,drb_perc_11,trb_perc_11,ast_perc_11,stl_perc_11,blk_perc_11,tov_perc_11,usg_perc_11,ortg_11,drtg_11,date_12.1,ts_perc_12,efg_perc_12,3par_12,ftr_12,orb_perc_12,drb_perc_12,trb_perc_12,ast_perc_12,stl_perc_12,blk_perc_12,tov_perc_12,usg_perc_12,ortg_12,drtg_12,date_13.1,ts_perc_13,efg_perc_13,3par_13,ftr_13,orb_perc_13,drb_perc_13,trb_perc_13,ast_perc_13,stl_perc_13,blk_perc_13,tov_perc_13,usg_perc_13,ortg_13,drtg_13,date_14.1,ts_perc_14,efg_perc_14,3par_14,ftr_14,orb_perc_14,drb_perc_14,trb_perc_14,ast_perc_14,stl_perc_14,blk_perc_14,tov_perc_14,usg_perc_14,ortg_14,drtg_14,date_15.1,ts_perc_15,efg_perc_15,3par_15,ftr_15,orb_perc_15,drb_perc_15,trb_perc_15,ast_perc_15,stl_perc_15,blk_perc_15,tov_perc_15,usg_perc_15,ortg_15,drtg_15,season
16913,2013-02-22,Boston Celtics,Phoenix Suns,Phoenix Suns,4,16913.0,16913.0,16913.0,16913.0,16913.0,16913.0,16913.0,0.467,0.44,0.226,0.274,25.0,75.7,47.1,74.3,8.7,5.3,12.1,100.0,95.6,122.7,"(2013-02-20 00:00:00, 2013-02-19 00:00:00, 201...",2013-02-20,2013-02-19,2013-02-12,2013-02-10,2013-02-08,2013-02-06,2013-02-05,2013-02-02,2013-02-01,2013-01-30,2013-01-27,2013-01-26,2013-01-24,2013-01-23,2013-01-17,2013-02-20,0.543,0.53,0.202,0.167,26.2,69.8,48.2,70.0,7.8,11.9,13.4,100.0,108.6,119.7,2013-02-19,0.562,0.518,0.159,0.244,35.9,75.5,58.0,73.2,8.6,3.3,15.8,100.0,109.8,105.5,2013-02-12,0.46,0.429,0.153,0.2,25.0,75.0,50.0,77.1,10.3,12.9,17.0,100.0,87.6,93.8,2013-02-10,0.37,0.352,0.154,0.055,32.7,87.8,57.0,60.0,6.4,13.6,19.1,100.0,73.4,103.2,2013-02-08,0.536,0.512,0.217,0.181,26.8,73.7,49.4,46.2,4.3,6.1,15.2,100.0,104.3,137.9,2013-02-06,0.496,0.455,0.192,0.192,31.4,76.3,54.8,54.3,4.7,6.6,15.9,100.0,99.3,109.9,2013-02-05,0.581,0.534,0.137,0.301,35.5,67.4,54.1,51.4,10.1,2.7,20.3,100.0,107.9,101.2,2013-02-02,0.482,0.436,0.174,0.279,31.3,70.2,50.5,62.9,7.5,1.4,11.1,100.0,99.7,121.2,2013-02-01,0.495,0.449,0.169,0.281,23.5,73.9,47.4,52.6,9.1,10.4,11.5,100.0,99.6,109.7,2013-01-30,0.513,0.476,0.145,0.181,16.7,72.5,43.9,55.3,12.0,3.9,10.9,100.0,100.4,93.8,2013-01-27,0.532,0.468,0.221,0.364,20.0,71.4,46.3,64.7,7.3,7.8,16.0,100.0,99.4,115.2,2013-01-26,0.568,0.543,0.148,0.173,22.5,83.3,53.7,73.2,4.3,0.0,14.7,100.0,106.6,116.3,2013-01-24,0.533,0.458,0.225,0.521,20.5,71.4,48.9,60.0,7.6,6.2,13.8,100.0,101.1,95.7,2013-01-23,0.546,0.511,0.211,0.178,24.4,52.6,38.0,69.8,15.0,5.9,10.2,100.0,113.4,102.7,2013-01-17,0.462,0.404,0.157,0.326,20.8,72.3,46.3,58.8,12.5,5.3,14.3,100.0,90.5,94.4,2012.0


In [11]:
# Calculate z-score
def z_score(value, mean, std):
    return (value - mean) / std

In [12]:
# Season average for each stat
season_avgs = df.groupby(['team', 'season']).aggregate(['mean', 'std']).reset_index()
season_avgs.columns = ["_".join([tup[0], tup[1]]) if len(tup[1]) > 0 else tup[0] for tup in season_avgs.columns]
last_15_games = pd.merge(last_15_games, season_avgs, left_on=['team', 'season'], right_on=['team', 'season'])

In [13]:
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']

for stat in stats:
    # 3 day trend
    last_15_games[stat + '_trend_3'] = \
        (z_score(last_15_games[stat + '_1'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_2'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_3'], last_15_games[stat + '_mean'], last_15_games[stat + '_std'])) / 3
    
    # 5 day trend
    last_15_games[stat + '_trend_5'] = \
        (z_score(last_15_games[stat + '_1'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_2'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_3'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_4'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_5'], last_15_games[stat + '_mean'], last_15_games[stat + '_std'])) / 5
    
    # 10 day trend
    last_15_games[stat + '_trend_10'] = \
        (z_score(last_15_games[stat + '_1'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_2'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_3'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_4'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_5'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_6'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_7'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_8'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_9'], last_15_games[stat + '_mean'], last_15_games[stat + '_std']) + \
         z_score(last_15_games[stat + '_10'], last_15_games[stat + '_mean'], last_15_games[stat + '_std'])) / 10

### Last Performance

In [14]:
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']
dates = ['_1']
trends = ['_trend_3', '_trend_5', '_trend_10']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, trends))]

last_game = last_15_games[cols].copy()

for stat in stats:
    last_game[stat] = 0
    for date in dates:
        last_game[stat] = last_game[stat] + last_game[stat + date]
    
    last_game[stat] = last_game[stat] / len(dates)

last_game = last_game.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_game_cols = [col 
                  for col in last_game.columns
                  if ('_perc' in col[0] and 'mean' == col[1] and 'usg' not in col[0]) or ('_perc' not in col[0] and 'sum' == col[1])]

last_game = last_game[last_game_cols].dropna(axis=0)
last_game.columns = [col[0] for col in last_game.columns]
last_game.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,ts_perc_1,efg_perc_1,3par_1,ftr_1,orb_perc_1,drb_perc_1,trb_perc_1,ast_perc_1,stl_perc_1,blk_perc_1,tov_perc_1,ortg_1,drtg_1,ts_perc_trend_3,ts_perc_trend_5,ts_perc_trend_10,efg_perc_trend_3,efg_perc_trend_5,efg_perc_trend_10,3par_trend_3,3par_trend_5,3par_trend_10,ftr_trend_3,ftr_trend_5,ftr_trend_10,orb_perc_trend_3,orb_perc_trend_5,orb_perc_trend_10,drb_perc_trend_3,drb_perc_trend_5,drb_perc_trend_10,trb_perc_trend_3,trb_perc_trend_5,trb_perc_trend_10,ast_perc_trend_3,ast_perc_trend_5,ast_perc_trend_10,stl_perc_trend_3,stl_perc_trend_5,stl_perc_trend_10,blk_perc_trend_3,blk_perc_trend_5,blk_perc_trend_10,tov_perc_trend_3,tov_perc_trend_5,tov_perc_trend_10,ortg_trend_3,ortg_trend_5,ortg_trend_10,drtg_trend_3,drtg_trend_5,drtg_trend_10,ts_perc,efg_perc,3par,ftr,orb_perc,drb_perc,trb_perc,ast_perc,stl_perc,blk_perc,tov_perc,ortg,drtg
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1
2015-10-30,Miami Heat,Cleveland Cavaliers,12,0.6055,0.57,0.619,0.49,17.6,78.6,53.1,67.3,6.35,7.35,15.4,224.0,181.8,-0.194526,-0.678687,-0.6026,-0.231819,-0.645878,-0.605179,-0.049265,0.047477,-0.117295,0.035181,0.255365,0.364162,-0.039325,0.149719,0.11001,0.134285,-0.186125,-0.327993,0.217404,-0.103317,-0.258531,0.386568,-0.08552,-0.259915,-0.619277,-0.726849,-0.276619,0.210421,0.227784,0.285786,0.329637,-0.114218,-0.011848,-0.564605,-1.038701,-0.923444,-1.108934,-0.180903,0.166057,0.6055,0.57,0.619,0.49,17.6,78.6,53.1,67.3,6.35,7.35,15.4,224.0,181.8
2019-01-05,New Orleans Pelicans,Cleveland Cavaliers,26,0.56,0.5215,0.52,0.517,19.8,76.05,47.05,42.55,6.75,5.1,11.75,220.4,253.3,0.25574,-0.120993,-0.239697,0.205833,-0.200814,-0.196454,-0.904663,-0.635001,-0.536698,-0.118001,0.049556,-0.341979,-0.567423,-0.065216,-0.17525,-0.616836,-0.197933,-0.324849,-0.523853,-0.145958,-0.295543,-0.185325,-0.130391,-0.017071,0.002055,-0.004751,0.184855,-0.212714,-0.386638,0.002955,0.065488,-0.039164,-0.241815,0.12629,-0.052584,-0.33115,1.004094,0.789619,0.429579,0.56,0.5215,0.52,0.517,19.8,76.05,47.05,42.55,6.75,5.1,11.75,220.4,253.3
2017-11-22,Orlando Magic,Minnesota Timberwolves,20,0.509,0.469,0.752,0.563,20.6,77.4,46.6,50.4,6.9,10.15,13.95,195.7,219.5,-0.375325,-0.1697,-0.101961,-0.313083,-0.130053,-0.068788,1.917927,1.148825,0.781168,-0.406516,-0.291173,0.066751,-0.098792,-0.32295,-0.134317,-0.044819,-0.246993,0.178749,0.046321,-0.163326,0.125571,-0.43531,0.016962,0.117371,0.035584,0.176252,0.238405,-0.229612,-0.141845,-0.087841,0.808106,0.334414,0.594392,-1.603436,-0.900014,-0.876207,-0.443031,-0.309658,-0.69369,0.509,0.469,0.752,0.563,20.6,77.4,46.6,50.4,6.9,10.15,13.95,195.7,219.5
2018-10-29,New Orleans Pelicans,Denver Nuggets,22,0.54,0.5005,0.608,0.522,23.1,81.0,49.7,51.4,5.65,8.4,14.4,210.0,235.9,-0.413371,0.032286,-0.167406,-0.535304,-0.082372,-0.199389,-1.2639,-1.259215,-0.723197,1.31993,1.680795,0.424878,0.184525,0.245326,-0.1791,0.196985,0.425767,0.289147,-0.113129,0.403246,0.070205,-0.997751,-0.53189,-0.224682,0.660023,0.444126,0.096635,0.398787,0.441792,0.099892,-0.405522,-0.235161,-0.141661,-0.394668,0.381203,-0.36416,-0.134398,-0.661012,-0.707592,0.54,0.5005,0.608,0.522,23.1,81.0,49.7,51.4,5.65,8.4,14.4,210.0,235.9
2012-11-16,Golden State Warriors,Minnesota Timberwolves,9,0.504,0.457,0.489,0.809,32.6,83.15,57.75,67.1,7.55,9.0,18.4,193.0,190.8,-0.929082,-0.405026,-0.351711,-0.857509,-0.433927,-0.336698,0.168704,-0.035594,-0.296479,0.674592,0.843432,0.347931,0.050405,-0.267266,-0.021298,-0.018187,-0.022714,-0.196497,0.146831,0.043951,-0.034217,0.186467,0.378293,0.039208,-0.092026,0.080575,-0.18291,-0.056334,0.009405,0.235221,0.592721,0.501969,0.302209,-2.340887,-1.498714,-0.939458,-1.532595,-1.551599,-0.652053,0.504,0.457,0.489,0.809,32.6,83.15,57.75,67.1,7.55,9.0,18.4,193.0,190.8


### Last 5 Performances

In [15]:
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']
dates = ['_1', '_2', '_3', '_4', '_5']
trends = ['_trend_3', '_trend_5', '_trend_10']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, trends))]

last_5_game = last_15_games[cols].copy()

for stat in stats:
    last_5_game[stat] = 0
    for date in dates:
        last_5_game[stat] = last_5_game[stat] + last_5_game[stat + date]
    
    last_5_game[stat] = last_5_game[stat] / len(dates)

last_5_game = last_5_game.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_5_game_cols = [col 
                    for col in last_5_games.columns
                    if ('_perc' in col[0] and 'mean' == col[1] and 'usg' not in col[0]) or ('_perc' not in col[0] and 'sum' == col[1])]

last_5_games = last_5_games[last_5_game_cols].dropna(axis=0)
last_5_games.columns = [col[0] for col in last_5_games.columns]
last_5_games.head()

NameError: name 'last_5_games' is not defined

### Last 10 Perfomances

In [None]:
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10']
trends = ['_trend_3', '_trend_5', '_trend_10']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, trends))]

last_10_game = last_15_games[cols].copy()

for stat in stats:
    last_10_game[stat] = 0
    for date in dates:
        last_10_game[stat] = last_10_game[stat] + last_10_game[stat + date]
    
    last_10_game[stat] = last_10_game[stat] / len(dates)

last_10_game = last_10_game.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_10_game_cols = [col 
                    for col in last_10_games.columns
                    if ('_perc' in col[0] and 'mean' == col[1] and 'usg' not in col[0]) or ('_perc' not in col[0] and 'sum' == col[1])]

last_10_games = last_10_games[last_10_game_cols].dropna(axis=0)
last_10_games.columns = [col[0] for col in last_10_games.columns]
last_10_games.head()

### Last 15 Performances (Unweighted)

In [None]:
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
trends = ['_trend_3', '_trend_5', '_trend_10']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, trends))]

last_15_games_unweighted = last_15_games[cols].copy()

for stat in stats:
    last_15_games_unweighted[stat] = 0
    for date in dates:
        last_15_games_unweighted[stat] = last_15_games_unweighted[stat] + last_15_games_unweighted[stat + date]
    
    last_15_games_unweighted[stat] = last_15_games_unweighted[stat] / len(dates)

last_15_games_unweighted = last_15_games_unweighted.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_15_game_cols = [col 
                     for col in last_15_games_unweighted.columns
                     if ('_perc' in col[0] and 'mean' == col[1] and 'usg' not in col[0]) or ('_perc' not in col[0] and 'sum' == col[1])]

last_15_games_unweighted = last_15_games_unweighted[last_15_game_cols].dropna(axis=0)
last_15_games_unweighted.columns = [col[0] for col in last_15_games_unweighted.columns]
last_15_games_unweighted.head()

### Last 15 Performances (Weighted)

In [None]:
stats = ['ts_perc', 'efg_perc', '3par', 'ftr', 'orb_perc', 'drb_perc', 'trb_perc', 
         'ast_perc', 'stl_perc', 'blk_perc', 'tov_perc', 'usg_perc', 'ortg', 'drtg']
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
trends = ['_trend_3', '_trend_5', '_trend_10']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, trends))]

last_15_games_weighted = last_15_games[cols].copy()

for stat in stats:
    last_15_games_weighted[stat] = 0
    for date in dates:
        weight = ((int(date.strip('_')) - 1) // 5) + 1
        last_15_games_weighted[stat] = (last_15_games_weighted[stat] + last_15_games_weighted[stat + date]) * weight
    
    last_15_games_weighted[stat] = last_15_games_weighted[stat] / len(dates)

last_15_games_weighted = last_15_games_weighted.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_15_game_cols = [col 
                     for col in last_15_games_weighted.columns
                     if ('_perc' in col[0] and 'mean' == col[1] and 'usg' not in col[0]) or ('_perc' not in col[0] and 'sum' == col[1])]

last_15_games_weighted = last_15_games_weighted[last_15_game_cols].dropna(axis=0)
last_15_games_weighted.columns = [col[0] for col in last_15_games_weighted.columns]
last_15_games_weighted.sample(5)

## Correlations of performances (last 1, last 5, last 10)

In [None]:
corr_df = pd.DataFrame()

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in last_15_games_weighted:
    corr_p = pearsonr(last_15_games_weighted['target'], last_15_games_weighted[col])
    row = {'last': '15_weighted', 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in last_15_games_unweighted:
    corr_p = pearsonr(last_15_games_unweighted['target'], last_15_games_unweighted[col])
    row = {'last': '15_unweighted', 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)

# Correlations for last 10 game stats vs 3pt made 
for col in last_10_games:
    corr_p = pearsonr(last_10_games['target'], last_10_games[col])
    row = {'last': 10, 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Correlations for last 10 game stats vs 3pt made 
for col in last_5_games:
    corr_p = pearsonr(last_5_games['target'], last_5_games[col])
    row = {'last': 5, 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Correlations for last game stats vs 3pt made 
for col in last_game:
    corr_p = pearsonr(last_game['target'], last_game[col])
    row = {'last': 1, 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Print each correlation
for stat in stats + [tup[0] + tup[1] for tup in list(itertools.product(stats, trends))]:
    print(f'Stat: {stat}')
    print(corr_df[(corr_df['stat'] == stat) & (corr_df['p-value'] < .05)].set_index(['last']).drop(['stat'], axis=1))
    print('\n')