# 3 Pointers Made against game_totals.csv

### Import packages

In [49]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
from scipy.stats import pearsonr
import itertools

pd.set_option("display.max_columns", None)

### Set working directory

In [50]:
# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

# Change working directory
os.chdir('/Users/tyler/OneDrive/Documents/Python/NBA')

# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA
Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA


## Exploratory Data Analysis

### Import data

In [51]:
df = pd.read_csv('backend/data/totals/game_totals.csv').drop(['Unnamed: 0'], axis=1)

### Basic exploration

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38288 entries, 0 to 38287
Data columns (total 22 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   date     38288 non-null  object 
 1   visitor  38288 non-null  object 
 2   home     38288 non-null  object 
 3   team     38288 non-null  int64  
 4   fg       38288 non-null  int64  
 5   fga      38288 non-null  int64  
 6   fg_perc  38288 non-null  float64
 7   3p       38288 non-null  int64  
 8   3pa      38288 non-null  int64  
 9   3p_perc  38288 non-null  float64
 10  ft       38288 non-null  int64  
 11  fta      38288 non-null  int64  
 12  ft_perc  38288 non-null  float64
 13  orb      38288 non-null  int64  
 14  drb      38288 non-null  int64  
 15  trb      38288 non-null  int64  
 16  ast      38288 non-null  int64  
 17  stl      38288 non-null  int64  
 18  blk      38288 non-null  int64  
 19  tov      38288 non-null  int64  
 20  pf       38288 non-null  int64  
 21  pts      382

In [53]:
df.sample(5)

Unnamed: 0,date,visitor,home,team,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts
17232,"Sat, Mar 16, 2013",Charlotte Bobcats,Boston Celtics,0,34,76,0.447,7,20,0.35,13,21,0.619,6,29,35,23,7,7,12,17,88
6622,"Fri, Jan 30, 2009",Los Angeles Lakers,Minnesota Timberwolves,0,48,95,0.505,10,27,0.37,26,33,0.788,19,34,53,23,5,7,12,25,132
37926,"Thu, May 6, 2021",Memphis Grizzlies,Detroit Pistons,0,38,89,0.427,10,37,0.27,11,17,0.647,12,30,42,25,7,5,15,14,97
31024,"Sun, May 27, 2018",Cleveland Cavaliers,Boston Celtics,0,30,66,0.455,9,35,0.257,18,25,0.72,2,39,41,15,2,4,12,21,87
27187,"Fri, Jan 27, 2017",Milwaukee Bucks,Toronto Raptors,1,37,84,0.44,10,25,0.4,18,23,0.783,12,35,47,20,9,10,14,19,102


In [54]:
# Convert 'date' column to Date object
df['date'] = pd.to_datetime(df['date'])

# Conver 'team' column to Team Name
df['team'] = np.where(df['team'], df['home'], df['visitor'])

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38288 entries, 0 to 38287
Data columns (total 22 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   date     38288 non-null  datetime64[ns]
 1   visitor  38288 non-null  object        
 2   home     38288 non-null  object        
 3   team     38288 non-null  object        
 4   fg       38288 non-null  int64         
 5   fga      38288 non-null  int64         
 6   fg_perc  38288 non-null  float64       
 7   3p       38288 non-null  int64         
 8   3pa      38288 non-null  int64         
 9   3p_perc  38288 non-null  float64       
 10  ft       38288 non-null  int64         
 11  fta      38288 non-null  int64         
 12  ft_perc  38288 non-null  float64       
 13  orb      38288 non-null  int64         
 14  drb      38288 non-null  int64         
 15  trb      38288 non-null  int64         
 16  ast      38288 non-null  int64         
 17  stl      38288 non-null  int64 

# Dataframe of team's last 15 performances

In [55]:
# Return ten lastest dates team played
def last_15_date(team, date):
    schedule = df[df['team'] == team].sort_values(by='date').reset_index()
    date_index = schedule[schedule['date'] == date].index[0]
    if date_index - 15 < 0:
        return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    else:
        date_1, date_2 = schedule.iloc[date_index - 1]['date'], schedule.iloc[date_index - 2]['date']
        date_3, date_4 = schedule.iloc[date_index - 3]['date'], schedule.iloc[date_index - 4]['date']
        date_5, date_6 = schedule.iloc[date_index - 5]['date'], schedule.iloc[date_index - 6]['date']
        date_7, date_8 = schedule.iloc[date_index - 7]['date'], schedule.iloc[date_index - 8]['date']
        date_9, date_10 = schedule.iloc[date_index - 9]['date'], schedule.iloc[date_index - 10]['date']
        date_11, date_12 = schedule.iloc[date_index - 11]['date'], schedule.iloc[date_index - 12]['date']
        date_13, date_14 = schedule.iloc[date_index - 13]['date'], schedule.iloc[date_index - 14]['date']
        date_15 = schedule.iloc[date_index - 15]['date']
        return date_1, date_2, date_3, date_4, date_5, date_6, date_7, date_8, date_9, date_10, date_11, date_12, date_13, date_14, date_15

df['dates'] = df.apply(lambda x: last_15_date(x.team, x.date), axis=1)
df['date_1'], df['date_2'] = df['dates'].apply(lambda x: x[0]), df['dates'].apply(lambda x: x[1])
df['date_3'], df['date_4'] = df['dates'].apply(lambda x: x[2]), df['dates'].apply(lambda x: x[3])
df['date_5'], df['date_6'] = df['dates'].apply(lambda x: x[4]), df['dates'].apply(lambda x: x[5])
df['date_7'], df['date_8'] = df['dates'].apply(lambda x: x[6]), df['dates'].apply(lambda x: x[7])
df['date_9'], df['date_10'] = df['dates'].apply(lambda x: x[8]), df['dates'].apply(lambda x: x[9])
df['date_11'], df['date_12'] = df['dates'].apply(lambda x: x[10]), df['dates'].apply(lambda x: x[11])
df['date_13'], df['date_14'] = df['dates'].apply(lambda x: x[12]), df['dates'].apply(lambda x: x[13])
df['date_15'] = df['dates'].apply(lambda x: x[14])

In [56]:
# X and y column names to merge on
y_cols = df.columns
x_cols = ['date', 'team', 'fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 
          'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']

last_15_games = df[y_cols]
last_15_games['target'] = last_15_games['3p']
X = df[x_cols]

# Dataframe of target (3pt made by each team) and of variables (last 5 games stats for each team)
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
for date in dates:
    last_15_games = pd.merge(last_15_games, X, left_on=['date' + date, 'team'], right_on=['date', 'team'], how='left', suffixes=('', date))

## Feature Engineering

In [57]:
# Create season variable
def season(month, year):
    if (month in list(range(10, 13)) and year == 2006) or (month in list(range(1, 7)) and year == 2007):
        return 2006
    elif (month in list(range(10, 13)) and year == 2007) or (month in list(range(1, 7)) and year == 2008):
        return 2007
    elif (month in list(range(10, 13)) and year == 2008) or (month in list(range(1, 7)) and year == 2009):
        return 2008
    elif (month in list(range(10, 13)) and year == 2009) or (month in list(range(1, 7)) and year == 2010):
        return 2009
    elif (month in list(range(10, 13)) and year == 2010) or (month in list(range(1, 7)) and year == 2011):
        return 2010
    elif (month in list(range(10, 11)) and year == 2011) or (month in list(range(1, 7)) and year == 2012):
        return 2011
    elif (month in list(range(10, 13)) and year == 2012) or (month in list(range(1, 7)) and year == 2013):
        return 2012
    elif (month in list(range(10, 13)) and year == 2013) or (month in list(range(1, 7)) and year == 2014):
        return 2013
    elif (month in list(range(10, 13)) and year == 2014) or (month in list(range(1, 7)) and year == 2015):
        return 2014
    elif (month in list(range(10, 13)) and year == 2015) or (month in list(range(1, 7)) and year == 2016):
        return 2015
    elif (month in list(range(10, 13)) and year == 2016) or (month in list(range(1, 7)) and year == 2017):
        return 2016
    elif (month in list(range(10, 13)) and year == 2017) or (month in list(range(1, 7)) and year == 2018):
        return 2017
    elif (month in list(range(10, 13)) and year == 2018) or (month in list(range(1, 7)) and year == 2019):
        return 2018
    elif (month in list(range(10, 13)) and year == 2019) or (month in list(range(1, 11)) and year == 2020):
        return 2019
    elif (month in list(range(12, 13)) and year == 2020) or (month in list(range(1, 8)) and year == 2021):
        return 2020

df['season'] = df['date'].apply(lambda x: season(x.month, x.year))
last_15_games['season'] = last_15_games['date'].apply(lambda x: season(x.month, x.year))
last_15_games.sample()

Unnamed: 0,date,visitor,home,team,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,dates,date_1,date_2,date_3,date_4,date_5,date_6,date_7,date_8,date_9,date_10,date_11,date_12,date_13,date_14,date_15,target,date_1.1,fg_1,fga_1,fg_perc_1,3p_1,3pa_1,3p_perc_1,ft_1,fta_1,ft_perc_1,orb_1,drb_1,trb_1,ast_1,stl_1,blk_1,tov_1,pf_1,pts_1,date_2.1,fg_2,fga_2,fg_perc_2,3p_2,3pa_2,3p_perc_2,ft_2,fta_2,ft_perc_2,orb_2,drb_2,trb_2,ast_2,stl_2,blk_2,tov_2,pf_2,pts_2,date_3.1,fg_3,fga_3,fg_perc_3,3p_3,3pa_3,3p_perc_3,ft_3,fta_3,ft_perc_3,orb_3,drb_3,trb_3,ast_3,stl_3,blk_3,tov_3,pf_3,pts_3,date_4.1,fg_4,fga_4,fg_perc_4,3p_4,3pa_4,3p_perc_4,ft_4,fta_4,ft_perc_4,orb_4,drb_4,trb_4,ast_4,stl_4,blk_4,tov_4,pf_4,pts_4,date_5.1,fg_5,fga_5,fg_perc_5,3p_5,3pa_5,3p_perc_5,ft_5,fta_5,ft_perc_5,orb_5,drb_5,trb_5,ast_5,stl_5,blk_5,tov_5,pf_5,pts_5,date_6.1,fg_6,fga_6,fg_perc_6,3p_6,3pa_6,3p_perc_6,ft_6,fta_6,ft_perc_6,orb_6,drb_6,trb_6,ast_6,stl_6,blk_6,tov_6,pf_6,pts_6,date_7.1,fg_7,fga_7,fg_perc_7,3p_7,3pa_7,3p_perc_7,ft_7,fta_7,ft_perc_7,orb_7,drb_7,trb_7,ast_7,stl_7,blk_7,tov_7,pf_7,pts_7,date_8.1,fg_8,fga_8,fg_perc_8,3p_8,3pa_8,3p_perc_8,ft_8,fta_8,ft_perc_8,orb_8,drb_8,trb_8,ast_8,stl_8,blk_8,tov_8,pf_8,pts_8,date_9.1,fg_9,fga_9,fg_perc_9,3p_9,3pa_9,3p_perc_9,ft_9,fta_9,ft_perc_9,orb_9,drb_9,trb_9,ast_9,stl_9,blk_9,tov_9,pf_9,pts_9,date_10.1,fg_10,fga_10,fg_perc_10,3p_10,3pa_10,3p_perc_10,ft_10,fta_10,ft_perc_10,orb_10,drb_10,trb_10,ast_10,stl_10,blk_10,tov_10,pf_10,pts_10,date_11.1,fg_11,fga_11,fg_perc_11,3p_11,3pa_11,3p_perc_11,ft_11,fta_11,ft_perc_11,orb_11,drb_11,trb_11,ast_11,stl_11,blk_11,tov_11,pf_11,pts_11,date_12.1,fg_12,fga_12,fg_perc_12,3p_12,3pa_12,3p_perc_12,ft_12,fta_12,ft_perc_12,orb_12,drb_12,trb_12,ast_12,stl_12,blk_12,tov_12,pf_12,pts_12,date_13.1,fg_13,fga_13,fg_perc_13,3p_13,3pa_13,3p_perc_13,ft_13,fta_13,ft_perc_13,orb_13,drb_13,trb_13,ast_13,stl_13,blk_13,tov_13,pf_13,pts_13,date_14.1,fg_14,fga_14,fg_perc_14,3p_14,3pa_14,3p_perc_14,ft_14,fta_14,ft_perc_14,orb_14,drb_14,trb_14,ast_14,stl_14,blk_14,tov_14,pf_14,pts_14,date_15.1,fg_15,fga_15,fg_perc_15,3p_15,3pa_15,3p_perc_15,ft_15,fta_15,ft_perc_15,orb_15,drb_15,trb_15,ast_15,stl_15,blk_15,tov_15,pf_15,pts_15,season
1327,2007-01-29,Charlotte Bobcats,Denver Nuggets,Denver Nuggets,37,80,0.463,4,17,0.235,23,29,0.793,9,27,36,26,5,9,11,22,101,"(2007-01-27 00:00:00, 2007-01-26 00:00:00, 200...",2007-01-27,2007-01-26,2007-01-23,2007-01-22,2007-01-20,2007-01-19,2007-01-14,2007-01-12,2007-01-10,2007-01-08,2007-01-06,2007-01-05,2007-01-02,2006-12-31,2006-12-29,4,2007-01-27,34.0,77.0,0.442,6.0,23.0,0.261,28.0,35.0,0.8,4.0,34.0,38.0,21.0,3.0,5.0,14.0,23.0,102.0,2007-01-26,38.0,76.0,0.5,4.0,11.0,0.364,31.0,40.0,0.775,12.0,25.0,37.0,17.0,9.0,3.0,18.0,31.0,111.0,2007-01-23,41.0,87.0,0.471,10.0,22.0,0.455,25.0,32.0,0.781,14.0,27.0,41.0,23.0,10.0,6.0,14.0,17.0,117.0,2007-01-22,43.0,96.0,0.448,7.0,24.0,0.292,22.0,36.0,0.611,19.0,30.0,49.0,32.0,13.0,6.0,14.0,27.0,115.0,2007-01-20,41.0,89.0,0.461,15.0,29.0,0.517,24.0,37.0,0.649,12.0,35.0,47.0,21.0,8.0,9.0,15.0,16.0,121.0,2007-01-19,41.0,81.0,0.506,6.0,15.0,0.4,22.0,25.0,0.88,7.0,38.0,45.0,23.0,7.0,5.0,14.0,21.0,110.0,2007-01-14,38.0,77.0,0.494,10.0,20.0,0.5,23.0,26.0,0.885,12.0,29.0,41.0,22.0,11.0,2.0,20.0,21.0,109.0,2007-01-12,32.0,92.0,0.348,9.0,28.0,0.321,13.0,17.0,0.765,13.0,38.0,51.0,17.0,5.0,5.0,13.0,20.0,86.0,2007-01-10,35.0,77.0,0.455,2.0,9.0,0.222,11.0,15.0,0.733,7.0,34.0,41.0,15.0,5.0,2.0,17.0,19.0,83.0,2007-01-08,35.0,79.0,0.443,7.0,17.0,0.412,27.0,36.0,0.75,15.0,37.0,52.0,23.0,8.0,8.0,16.0,19.0,104.0,2007-01-06,29.0,75.0,0.387,5.0,26.0,0.192,21.0,31.0,0.677,13.0,31.0,44.0,16.0,7.0,4.0,18.0,19.0,84.0,2007-01-05,39.0,92.0,0.424,7.0,16.0,0.438,19.0,24.0,0.792,17.0,32.0,49.0,24.0,12.0,2.0,14.0,17.0,104.0,2007-01-02,33.0,85.0,0.388,10.0,29.0,0.345,21.0,30.0,0.7,16.0,32.0,48.0,20.0,3.0,7.0,22.0,21.0,97.0,2006-12-31,30.0,80.0,0.375,1.0,10.0,0.1,24.0,30.0,0.8,7.0,30.0,37.0,17.0,12.0,6.0,16.0,21.0,85.0,2006-12-29,33.0,89.0,0.371,7.0,26.0,0.269,16.0,19.0,0.842,13.0,33.0,46.0,13.0,9.0,3.0,14.0,23.0,89.0,2006.0


In [68]:
# Calculate z-score
def z_score(value, mean, std):
    return (value - mean) / std

In [67]:
# Calculate perc difference
def perc_diff(value, mean):
    return (value - mean) / mean

### Last Performance

In [69]:
stats = ['fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']
dates = ['_1']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))]

last_game = last_15_games[cols].copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_game[stat] = 0
    for date in dates:
        last_game[stat] = last_game[stat] + last_game[stat + date]
    
    last_game[stat] = last_game[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_game[stat + '_std'] = 0
    for date in dates:
        last_game[stat + '_std'] = last_game[stat + '_std'] + ((last_game[stat + date] - last_game[stat]) ** 2)
    
    last_game[stat + '_std'] = last_game[stat + '_std'] / len(dates)
    last_game[stat + '_std'] = last_game[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_game[stat + '_trend'] = 0
    for date in dates:
        last_game[stat + '_trend'] = last_game[stat + '_trend'] + \
                                        z_score(last_game[stat + date], last_game[stat], last_game[stat + '_std']).fillna(0)
    
    last_game[stat + '_trend'] = last_game[stat + '_trend'] / 1


# Aggregate stats for the entire game
last_game = last_game.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_game_cols = [col 
                  for col in last_game.columns
                  if (col[0] == 'target' and col[1] == 'sum') or \
                     (col[0] in stats and col[1] == 'sum' and '_perc' not in col[0]) or \
                     (col[0] in stats and col[1] == 'mean' and '_perc' in col[0]) or \
                     ('_trend' in col[0] and col[1] == 'sum')]

last_game = last_game[last_game_cols].dropna(axis=0)
last_game.columns = [col[0] for col in last_game.columns]
last_game.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,fg_trend,fga_trend,fg_perc_trend,3p_trend,3pa_trend,3p_perc_trend,ft_trend,fta_trend,ft_perc_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
2006-11-28,Indiana Pacers,Portland Trail Blazers,13,40.0,79.0,0.506,8.0,12.0,0.667,13.0,19.0,0.684,8.0,27.0,35.0,22.0,6.0,4.0,15.0,21.0,101.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-11-28,New York Knicks,Chicago Bulls,9,34.0,73.0,0.466,3.0,9.0,0.333,24.0,38.0,0.632,15.0,34.0,49.0,19.0,4.0,4.0,25.0,32.0,95.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-11-29,Indiana Pacers,Golden State Warriors,18,79.0,156.0,0.5075,18.0,41.0,0.452,40.0,48.0,0.833,16.0,54.0,70.0,55.0,14.0,19.0,25.0,41.0,216.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-11-29,New York Knicks,Cleveland Cavaliers,13,33.0,69.0,0.478,2.0,9.0,0.222,17.0,30.0,0.567,7.0,29.0,36.0,19.0,11.0,6.0,22.0,27.0,85.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-11-29,Orlando Magic,Seattle SuperSonics,8,63.0,162.0,0.3895,3.0,21.0,0.141,37.0,50.0,0.7265,31.0,48.0,79.0,28.0,21.0,7.0,24.0,34.0,166.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Last 5 Performances

In [70]:
stats = ['fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']
dates = ['_1', '_2', '_3', '_4', '_5']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))]

last_5_games = last_15_games[cols].copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_5_games[stat] = 0
    for date in dates:
        last_5_games[stat] = last_5_games[stat] + last_5_games[stat + date]
    
    last_5_games[stat] = last_5_games[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_5_games[stat + '_std'] = 0
    for date in dates:
        last_5_games[stat + '_std'] = last_5_games[stat + '_std'] + ((last_5_games[stat + date] - last_5_games[stat]) ** 2)
    
    last_5_games[stat + '_std'] = last_5_games[stat + '_std'] / len(dates)
    last_5_games[stat + '_std'] = last_5_games[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_5_games[stat + '_trend'] = 0
    for date in dates[:3]:
        last_5_games[stat + '_trend'] = last_5_games[stat + '_trend'] + \
                                        z_score(last_5_games[stat + date], last_5_games[stat], last_5_games[stat + '_std']).fillna(0)
    
    last_5_games[stat + '_trend'] = last_5_games[stat + '_trend'] / len(dates[:3])

last_5_games = last_5_games.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_5_game_cols = [col 
                    for col in last_5_games.columns
                    if (col[0] == 'target' and col[1] == 'sum') or \
                       (col[0] in stats and col[1] == 'sum' and '_perc' not in col[0]) or \
                       (col[0] in stats and col[1] == 'mean' and '_perc' in col[0]) or \
                       ('_trend' in col[0] and col[1] == 'sum')]

last_5_games = last_5_games[last_5_game_cols].dropna(axis=0)
last_5_games.columns = [col[0] for col in last_5_games.columns]
last_5_games.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,fg_trend,fga_trend,fg_perc_trend,3p_trend,3pa_trend,3p_perc_trend,ft_trend,fta_trend,ft_perc_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
2006-11-28,Indiana Pacers,Portland Trail Blazers,13,35.6,73.8,0.4814,5.2,15.0,0.3644,17.6,22.4,0.7862,8.6,25.6,34.2,19.0,5.4,3.8,12.0,22.6,94.0,0.209609,0.373632,-0.12898,0.544331,0.248452,0.340323,-0.325142,0.191273,-0.784652,0.133038,-0.50425,-0.594476,-0.248452,-0.619048,-0.333333,-0.219265,-0.50322,0.190089
2006-11-28,New York Knicks,Chicago Bulls,9,33.2,77.2,0.4342,4.2,13.6,0.2994,28.0,36.0,0.7834,14.2,30.2,44.4,17.2,5.8,3.2,16.2,29.4,98.6,0.136083,-0.791349,0.728584,-0.588348,-0.740895,-0.164691,-0.360229,-0.112651,-0.58254,-0.550435,0.303455,-0.014265,-0.057354,-0.719295,-0.171499,0.579324,-0.458162,-0.339804
2006-11-29,Indiana Pacers,Golden State Warriors,18,76.4,166.6,0.4565,14.6,41.6,0.3573,37.0,50.2,0.7276,25.0,59.0,84.0,47.8,15.8,15.2,33.2,43.6,204.4,-0.159638,-0.880758,0.103304,-0.130744,-0.11341,-0.081415,0.549772,0.338355,0.778715,0.105004,-0.918926,-0.599463,0.224071,-0.355235,0.189804,-0.543079,-0.561249,0.144012
2006-11-29,New York Knicks,Cleveland Cavaliers,13,33.0,73.8,0.4508,3.0,11.2,0.2676,23.0,32.0,0.7288,12.8,29.6,42.4,17.2,6.4,3.8,18.4,27.4,92.0,0.745356,-0.590872,0.69092,0.0,-0.454859,0.509862,0.086646,0.660687,-0.70014,-0.364847,0.714563,0.344799,0.707365,0.067003,0.541667,0.508882,-0.203078,0.302282
2006-11-29,Orlando Magic,Seattle SuperSonics,8,69.4,158.2,0.4396,9.2,30.6,0.2888,40.4,56.0,0.7283,23.4,60.0,83.4,36.2,16.2,9.2,28.8,45.0,188.4,0.248282,0.987936,-0.473541,-0.291729,-0.596439,-0.051491,0.018035,-0.480608,0.688531,0.885986,-0.409356,-0.01078,0.111459,0.493922,0.15685,-0.738044,-0.662104,0.052509


### Last 10 Perfomances

In [71]:
stats = ['fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))]

last_10_games = last_15_games[cols].copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_10_games[stat] = 0
    for date in dates:
        last_10_games[stat] = last_10_games[stat] + last_10_games[stat + date]
    
    last_10_games[stat] = last_10_games[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_10_games[stat + '_std'] = 0
    for date in dates:
        last_10_games[stat + '_std'] = last_10_games[stat + '_std'] + ((last_10_games[stat + date] - last_10_games[stat]) ** 2)
    
    last_10_games[stat + '_std'] = last_10_games[stat + '_std'] / len(dates)
    last_10_games[stat + '_std'] = last_10_games[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_10_games[stat + '_trend'] = 0
    for date in dates[:5]:
        last_10_games[stat + '_trend'] = last_10_games[stat + '_trend'] + \
                                        z_score(last_10_games[stat + date], last_10_games[stat], last_10_games[stat + '_std']).fillna(0)
    
    last_10_games[stat + '_trend'] = last_10_games[stat + '_trend'] / len(dates[:5])

last_10_games = last_10_games.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_10_game_cols = [col 
                    for col in last_10_games.columns
                    if (col[0] == 'target' and col[1] == 'sum') or \
                       (col[0] in stats and col[1] == 'sum' and '_perc' not in col[0]) or \
                       (col[0] in stats and col[1] == 'mean' and '_perc' in col[0]) or \
                       ('_trend' in col[0] and col[1] == 'sum')]

last_10_games = last_10_games[last_10_game_cols].dropna(axis=0)
last_10_games.columns = [col[0] for col in last_10_games.columns]
last_10_games.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,fg_trend,fga_trend,fg_perc_trend,3p_trend,3pa_trend,3p_perc_trend,ft_trend,fta_trend,ft_perc_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
2006-11-28,Indiana Pacers,Portland Trail Blazers,13,33.5,73.6,0.4545,4.8,14.5,0.3465,20.6,26.3,0.7923,8.7,25.5,34.2,17.8,6.2,4.1,14.0,24.4,92.4,0.572609,0.046324,0.822568,0.342997,0.193892,0.139983,-0.6333,-0.58788,-0.063232,-0.033315,0.019745,-7.105427e-16,0.365826,-0.287183,-0.135665,-0.482243,-0.536895,0.283552
2006-11-28,New York Knicks,Chicago Bulls,9,35.3,78.6,0.4512,4.9,15.5,0.3035,22.2,29.2,0.7609,12.2,30.7,42.9,17.4,6.8,3.1,15.1,27.6,97.7,-0.816805,-0.249682,-0.394437,-0.298753,-0.379621,-0.057973,0.725227,0.699287,0.318955,0.551318,-0.102468,0.2503828,-0.051232,-0.289157,0.043478,0.260068,0.316033,0.112667
2006-11-29,Indiana Pacers,Golden State Warriors,18,75.7,163.8,0.46185,15.5,42.0,0.36925,37.3,51.3,0.72055,22.6,58.8,81.4,49.3,16.2,13.8,31.6,44.7,204.2,0.160212,0.33705,-0.155317,-0.308402,-0.029612,-0.256323,-0.107525,-0.274154,0.177002,0.657133,0.160269,0.4127687,-0.433355,-0.192901,0.520137,0.27326,-0.200144,0.065182
2006-11-29,New York Knicks,Cleveland Cavaliers,13,35.1,77.4,0.4558,4.5,14.5,0.2941,22.1,29.7,0.7456,11.9,31.3,43.2,17.8,6.7,3.2,16.5,28.4,96.8,-0.788672,-0.579141,-0.115582,-0.609837,-0.634498,-0.355332,0.111726,0.239001,-0.18402,0.230165,-0.402826,-0.1442437,-0.156174,-0.090412,0.25,0.480282,-0.202278,-0.544331
2006-11-29,Orlando Magic,Seattle SuperSonics,8,72.4,157.7,0.4605,9.7,29.7,0.3257,41.6,56.6,0.738,23.6,59.7,83.3,37.0,14.3,10.2,30.1,46.3,196.1,-0.87367,0.138988,-1.073659,-0.194635,0.237211,-0.55319,-0.23043,-0.12821,-0.130473,-0.030963,0.075737,0.06413992,-0.27391,0.817296,-0.398555,-0.278922,-0.370274,-0.722243


### Last 15 Performances (Unweighted)

In [72]:
stats = ['fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))]

last_15_games_unweighted = last_15_games[cols].copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_15_games_unweighted[stat] = 0
    for date in dates:
        last_15_games_unweighted[stat] = last_15_games_unweighted[stat] + last_15_games_unweighted[stat + date]
    
    last_15_games_unweighted[stat] = last_15_games_unweighted[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_15_games_unweighted[stat + '_std'] = 0
    for date in dates:
        last_15_games_unweighted[stat + '_std'] = last_15_games_unweighted[stat + '_std'] + \
                                                    ((last_15_games_unweighted[stat + date] - last_15_games_unweighted[stat]) ** 2)
    
    last_15_games_unweighted[stat + '_std'] = last_15_games_unweighted[stat + '_std'] / len(dates)
    last_15_games_unweighted[stat + '_std'] = last_15_games_unweighted[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_15_games_unweighted[stat + '_trend'] = 0
    for date in dates[:10]:
        last_15_games_unweighted[stat + '_trend'] = last_15_games_unweighted[stat + '_trend'] + \
                                        z_score(last_15_games_unweighted[stat + date], last_15_games_unweighted[stat], last_15_games_unweighted[stat + '_std']).fillna(0)
    
    last_15_games_unweighted[stat + '_trend'] = last_15_games_unweighted[stat + '_trend'] / len(dates[:10])

last_15_games_unweighted = last_10_games.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_15_game_cols = [col 
                    for col in last_15_games_unweighted.columns
                    if (col[0] == 'target' and col[1] == 'sum') or \
                       (col[0] in stats and col[1] == 'sum' and '_perc' not in col[0]) or \
                       (col[0] in stats and col[1] == 'mean' and '_perc' in col[0]) or \
                       ('_trend' in col[0] and col[1] == 'sum')]

last_15_games_unweighted = last_15_games_unweighted[last_15_game_cols].dropna(axis=0)
last_15_games_unweighted.columns = [col[0] for col in last_15_games_unweighted.columns]
last_15_games_unweighted.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,fg_trend,fga_trend,fg_perc_trend,3p_trend,3pa_trend,3p_perc_trend,ft_trend,fta_trend,ft_perc_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
2006-11-28,Indiana Pacers,Portland Trail Blazers,13,33.5,73.6,0.4545,4.8,14.5,0.3465,20.6,26.3,0.7923,8.7,25.5,34.2,17.8,6.2,4.1,14.0,24.4,92.4,0.572609,0.046324,0.822568,0.342997,0.193892,0.139983,-0.6333,-0.58788,-0.063232,-0.033315,0.019745,-7.105427e-16,0.365826,-0.287183,-0.135665,-0.482243,-0.536895,0.283552
2006-11-28,New York Knicks,Chicago Bulls,9,35.3,78.6,0.4512,4.9,15.5,0.3035,22.2,29.2,0.7609,12.2,30.7,42.9,17.4,6.8,3.1,15.1,27.6,97.7,-0.816805,-0.249682,-0.394437,-0.298753,-0.379621,-0.057973,0.725227,0.699287,0.318955,0.551318,-0.102468,0.2503828,-0.051232,-0.289157,0.043478,0.260068,0.316033,0.112667
2006-11-29,Indiana Pacers,Golden State Warriors,18,75.7,163.8,0.46185,15.5,42.0,0.36925,37.3,51.3,0.72055,22.6,58.8,81.4,49.3,16.2,13.8,31.6,44.7,204.2,0.160212,0.33705,-0.155317,-0.308402,-0.029612,-0.256323,-0.107525,-0.274154,0.177002,0.657133,0.160269,0.4127687,-0.433355,-0.192901,0.520137,0.27326,-0.200144,0.065182
2006-11-29,New York Knicks,Cleveland Cavaliers,13,35.1,77.4,0.4558,4.5,14.5,0.2941,22.1,29.7,0.7456,11.9,31.3,43.2,17.8,6.7,3.2,16.5,28.4,96.8,-0.788672,-0.579141,-0.115582,-0.609837,-0.634498,-0.355332,0.111726,0.239001,-0.18402,0.230165,-0.402826,-0.1442437,-0.156174,-0.090412,0.25,0.480282,-0.202278,-0.544331
2006-11-29,Orlando Magic,Seattle SuperSonics,8,72.4,157.7,0.4605,9.7,29.7,0.3257,41.6,56.6,0.738,23.6,59.7,83.3,37.0,14.3,10.2,30.1,46.3,196.1,-0.87367,0.138988,-1.073659,-0.194635,0.237211,-0.55319,-0.23043,-0.12821,-0.130473,-0.030963,0.075737,0.06413992,-0.27391,0.817296,-0.398555,-0.278922,-0.370274,-0.722243


### Last 15 Performances (Weighted)

In [73]:
stats = ['fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 
         'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
cols = ['date', 'visitor', 'home', 'team', 'target'] + \
    [tup[0] + tup[1] for tup in list(itertools.product(stats, dates))]

last_15_games_weighted = last_15_games[cols].copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_15_games_weighted[stat] = 0
    for date in dates:
        last_15_games_weighted[stat] = last_15_games_weighted[stat] + last_15_games_weighted[stat + date]
    
    last_15_games_weighted[stat] = last_15_games_weighted[stat] / len(dates)
    
# Calculate standard deviation for each stat over a team's performance
for stat in stats:
    last_15_games_weighted[stat + '_std'] = 0
    for date in dates:
        last_15_games_weighted[stat + '_std'] = last_15_games_weighted[stat + '_std'] + \
                                                ((last_15_games_weighted[stat + date] - last_15_games_weighted[stat]) ** 2)
    
    last_15_games_weighted[stat + '_std'] = last_15_games_weighted[stat + '_std'] / len(dates)
    last_15_games_weighted[stat + '_std'] = last_15_games_weighted[stat + '_std'] ** .5

# Feature engineer trends
for stat in stats:
    last_15_games_weighted[stat + '_trend'] = 0
    for date in dates[:10]:
        last_15_games_weighted[stat + '_trend'] = last_15_games_weighted[stat + '_trend'] + \
                                        z_score(last_15_games_weighted[stat + date], last_15_games_weighted[stat], last_15_games_weighted[stat + '_std']).fillna(0)
    
    last_15_games_weighted[stat + '_trend'] = last_15_games_weighted[stat + '_trend'] / len(dates[:10])

last_15_games_weighted = last_10_games.groupby(['date', 'visitor', 'home']).aggregate(['mean', 'sum'])

last_15_game_cols = [col 
                    for col in last_15_games_weighted.columns
                    if (col[0] == 'target' and col[1] == 'sum') or \
                       (col[0] in stats and col[1] == 'sum' and '_perc' not in col[0]) or \
                       (col[0] in stats and col[1] == 'mean' and '_perc' in col[0]) or \
                       ('_trend' in col[0] and col[1] == 'sum')]

last_15_games_weighted = last_15_games_weighted[last_15_game_cols].dropna(axis=0)
last_15_games_weighted.columns = [col[0] for col in last_15_games_weighted.columns]
last_15_games_weighted.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg,fga,fg_perc,3p,3pa,3p_perc,ft,fta,ft_perc,orb,drb,trb,ast,stl,blk,tov,pf,pts,fg_trend,fga_trend,fg_perc_trend,3p_trend,3pa_trend,3p_perc_trend,ft_trend,fta_trend,ft_perc_trend,orb_trend,drb_trend,trb_trend,ast_trend,stl_trend,blk_trend,tov_trend,pf_trend,pts_trend
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
2006-11-28,Indiana Pacers,Portland Trail Blazers,13,33.5,73.6,0.4545,4.8,14.5,0.3465,20.6,26.3,0.7923,8.7,25.5,34.2,17.8,6.2,4.1,14.0,24.4,92.4,0.572609,0.046324,0.822568,0.342997,0.193892,0.139983,-0.6333,-0.58788,-0.063232,-0.033315,0.019745,-7.105427e-16,0.365826,-0.287183,-0.135665,-0.482243,-0.536895,0.283552
2006-11-28,New York Knicks,Chicago Bulls,9,35.3,78.6,0.4512,4.9,15.5,0.3035,22.2,29.2,0.7609,12.2,30.7,42.9,17.4,6.8,3.1,15.1,27.6,97.7,-0.816805,-0.249682,-0.394437,-0.298753,-0.379621,-0.057973,0.725227,0.699287,0.318955,0.551318,-0.102468,0.2503828,-0.051232,-0.289157,0.043478,0.260068,0.316033,0.112667
2006-11-29,Indiana Pacers,Golden State Warriors,18,75.7,163.8,0.46185,15.5,42.0,0.36925,37.3,51.3,0.72055,22.6,58.8,81.4,49.3,16.2,13.8,31.6,44.7,204.2,0.160212,0.33705,-0.155317,-0.308402,-0.029612,-0.256323,-0.107525,-0.274154,0.177002,0.657133,0.160269,0.4127687,-0.433355,-0.192901,0.520137,0.27326,-0.200144,0.065182
2006-11-29,New York Knicks,Cleveland Cavaliers,13,35.1,77.4,0.4558,4.5,14.5,0.2941,22.1,29.7,0.7456,11.9,31.3,43.2,17.8,6.7,3.2,16.5,28.4,96.8,-0.788672,-0.579141,-0.115582,-0.609837,-0.634498,-0.355332,0.111726,0.239001,-0.18402,0.230165,-0.402826,-0.1442437,-0.156174,-0.090412,0.25,0.480282,-0.202278,-0.544331
2006-11-29,Orlando Magic,Seattle SuperSonics,8,72.4,157.7,0.4605,9.7,29.7,0.3257,41.6,56.6,0.738,23.6,59.7,83.3,37.0,14.3,10.2,30.1,46.3,196.1,-0.87367,0.138988,-1.073659,-0.194635,0.237211,-0.55319,-0.23043,-0.12821,-0.130473,-0.030963,0.075737,0.06413992,-0.27391,0.817296,-0.398555,-0.278922,-0.370274,-0.722243


## Correlations of performances (last 1, last 5, last 10)

In [78]:
corr_df = pd.DataFrame()

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in last_15_games_weighted:
    corr_p = pearsonr(last_15_games_weighted['target'], last_15_games_weighted[col])
    row = {'last': '15_weighted', 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in last_15_games_unweighted:
    corr_p = pearsonr(last_15_games_unweighted['target'], last_15_games_unweighted[col])
    row = {'last': '15_unweighted', 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)

# Correlations for last 10 game stats vs 3pt made 
for col in last_10_games:
    corr_p = pearsonr(last_10_games['target'], last_10_games[col])
    row = {'last': 10, 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Correlations for last 10 game stats vs 3pt made 
for col in last_5_games:
    corr_p = pearsonr(last_5_games['target'], last_5_games[col])
    row = {'last': 5, 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Correlations for last game stats vs 3pt made 
for col in last_game:
    corr_p = pearsonr(last_game['target'], last_game[col])
    row = {'last': 1, 'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Print each correlation
for stat in stats:
    print(f'Stat: {stat}')
    print(corr_df[(corr_df['stat'] == stat) & (corr_df['p-value'] < .05)].set_index(['last']).drop(['stat'], axis=1))
    print('\n')



Stat: fg
               corr  p-value
last                        
15_weighted    0.45      0.0
15_unweighted  0.45      0.0
10             0.45      0.0
5              0.41      0.0
1              0.28      0.0


Stat: fga
               corr  p-value
last                        
15_weighted    0.46      0.0
15_unweighted  0.46      0.0
10             0.46      0.0
5              0.44      0.0
1              0.33      0.0


Stat: fg_perc
               corr  p-value
last                        
15_weighted    0.12      0.0
15_unweighted  0.12      0.0
10             0.12      0.0
5              0.09      0.0
1              0.05      0.0


Stat: 3p
               corr  p-value
last                        
15_weighted    0.72      0.0
15_unweighted  0.72      0.0
10             0.72      0.0
5              0.70      0.0
1              0.58      0.0


Stat: 3pa
               corr  p-value
last                        
15_weighted    0.73      0.0
15_unweighted  0.73      0.0
10          