# 3 Pointers Made against shooting.csv

### Import packages

In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
from scipy.stats import pearsonr
import itertools

pd.set_option("display.max_columns", None)

### Set working directory

In [2]:
# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

# Change working directory
os.chdir('/Users/tyler/OneDrive/Documents/Python/NBA')

# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA\backend\analysis\3p
Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA


## Exploratory Data Analysis

### Import data

In [3]:
df = pd.read_csv('backend/data/shooting.csv').drop(['Unnamed: 0'], axis=1)

### Basic exploration

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205810 entries, 0 to 205809
Data columns (total 17 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   date      205810 non-null  object 
 1   visitor   205810 non-null  object 
 2   home      205810 non-null  object 
 3   team      205810 non-null  int64  
 4   quarter   205788 non-null  object 
 5   fg        205768 non-null  float64
 6   fga       205768 non-null  float64
 7   fg_perc   205768 non-null  float64
 8   2p        205768 non-null  float64
 9   2pa       205768 non-null  float64
 10  2p_perc   205765 non-null  float64
 11  3p        205768 non-null  float64
 12  3pa       205768 non-null  float64
 13  3p_perc   204304 non-null  float64
 14  efg_perc  205768 non-null  float64
 15  ast       205768 non-null  float64
 16  ast_perc  205745 non-null  float64
dtypes: float64(12), int64(1), object(4)
memory usage: 26.7+ MB


In [5]:
df.head()

Unnamed: 0,date,visitor,home,team,quarter,fg,fga,fg_perc,2p,2pa,2p_perc,3p,3pa,3p_perc,efg_perc,ast,ast_perc
0,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q1,5.0,20.0,0.25,5.0,16.0,0.313,0.0,4.0,0.0,0.25,3.0,0.6
1,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q2,15.0,19.0,0.789,12.0,16.0,0.75,3.0,3.0,1.0,0.868,10.0,0.667
2,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q3,8.0,21.0,0.381,5.0,16.0,0.313,3.0,5.0,0.6,0.452,4.0,0.5
3,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q4,11.0,19.0,0.579,10.0,18.0,0.556,1.0,1.0,1.0,0.605,5.0,0.455
4,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,total,39.0,79.0,0.494,32.0,66.0,0.485,7.0,13.0,0.538,0.538,22.0,0.564


In [6]:
# Convert 'date' column to Date object
df['date'] = pd.to_datetime(df['date'])


# Conver 'team' column to Team Name
df['team'] = np.where(df['team'], df['home'], df['visitor'])


# Merge in opponents (see team defensive stats)
df = pd.merge(
    df, 
    df, 
    left_on=['date', 'visitor', 'home', 'quarter'], 
    right_on=['date', 'visitor', 'home', 'quarter'],
    suffixes=('', '_opp'),
    how='left')

df = df[df['team'] != df['team_opp']]


df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 205810 entries, 1 to 411618
Data columns (total 30 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   date          205810 non-null  datetime64[ns]
 1   visitor       205810 non-null  object        
 2   home          205810 non-null  object        
 3   team          205810 non-null  object        
 4   quarter       205788 non-null  object        
 5   fg            205768 non-null  float64       
 6   fga           205768 non-null  float64       
 7   fg_perc       205768 non-null  float64       
 8   2p            205768 non-null  float64       
 9   2pa           205768 non-null  float64       
 10  2p_perc       205765 non-null  float64       
 11  3p            205768 non-null  float64       
 12  3pa           205768 non-null  float64       
 13  3p_perc       204304 non-null  float64       
 14  efg_perc      205768 non-null  float64       
 15  ast           205

In [7]:
total_df = df[df['quarter'] == 'total']
q1_df = df[df['quarter'] == 'q1']
q2_df = df[df['quarter'] == 'q2']
q3_df = df[df['quarter'] == 'q3']
q4_df = df[df['quarter'] == 'q4']

In [8]:
total_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40608 entries, 9 to 411618
Data columns (total 30 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          40608 non-null  datetime64[ns]
 1   visitor       40608 non-null  object        
 2   home          40608 non-null  object        
 3   team          40608 non-null  object        
 4   quarter       40608 non-null  object        
 5   fg            40588 non-null  float64       
 6   fga           40588 non-null  float64       
 7   fg_perc       40588 non-null  float64       
 8   2p            40588 non-null  float64       
 9   2pa           40588 non-null  float64       
 10  2p_perc       40588 non-null  float64       
 11  3p            40588 non-null  float64       
 12  3pa           40588 non-null  float64       
 13  3p_perc       40588 non-null  float64       
 14  efg_perc      40588 non-null  float64       
 15  ast           40588 non-null  float

# Dataframe of team's last 15 performances

In [9]:
# Totals
total_df = df[df['quarter'] == 'total'].copy()

# Return ten lastest dates team played
def last_15_date(team, date):
    schedule = total_df[total_df['team'] == team].sort_values(by='date').reset_index()
    date_index = schedule[schedule['date'] == date].index[0]
    if date_index - 15 < 0:
        return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    else:
        date_1, date_2 = schedule.iloc[date_index - 1]['date'], schedule.iloc[date_index - 2]['date']
        date_3, date_4 = schedule.iloc[date_index - 3]['date'], schedule.iloc[date_index - 4]['date']
        date_5, date_6 = schedule.iloc[date_index - 5]['date'], schedule.iloc[date_index - 6]['date']
        date_7, date_8 = schedule.iloc[date_index - 7]['date'], schedule.iloc[date_index - 8]['date']
        date_9, date_10 = schedule.iloc[date_index - 9]['date'], schedule.iloc[date_index - 10]['date']
        date_11, date_12 = schedule.iloc[date_index - 11]['date'], schedule.iloc[date_index - 12]['date']
        date_13, date_14 = schedule.iloc[date_index - 13]['date'], schedule.iloc[date_index - 14]['date']
        date_15 = schedule.iloc[date_index - 15]['date']
        return date_1, date_2, date_3, date_4, date_5, date_6, date_7, date_8, date_9, date_10, date_11, date_12, date_13, date_14, date_15

total_df['dates'] = total_df.apply(lambda x: last_15_date(x.team, x.date), axis=1)
total_df['date_1'], total_df['date_2'] = total_df['dates'].apply(lambda x: x[0]), total_df['dates'].apply(lambda x: x[1])
total_df['date_3'], total_df['date_4'] = total_df['dates'].apply(lambda x: x[2]), total_df['dates'].apply(lambda x: x[3])
total_df['date_5'], total_df['date_6'] = total_df['dates'].apply(lambda x: x[4]), total_df['dates'].apply(lambda x: x[5])
total_df['date_7'], total_df['date_8'] = total_df['dates'].apply(lambda x: x[6]), total_df['dates'].apply(lambda x: x[7])
total_df['date_9'], total_df['date_10'] = total_df['dates'].apply(lambda x: x[8]), total_df['dates'].apply(lambda x: x[9])
total_df['date_11'], total_df['date_12'] = total_df['dates'].apply(lambda x: x[10]), total_df['dates'].apply(lambda x: x[11])
total_df['date_13'], total_df['date_14'] = total_df['dates'].apply(lambda x: x[12]), total_df['dates'].apply(lambda x: x[13])
total_df['date_15'] = total_df['dates'].apply(lambda x: x[14])

In [10]:
# Define statistics
stats = ['fg', 'fga', '2p', '2pa', '3p', '3pa', 'ast',
         'fg_opp', 'fga_opp', '2p_opp', '2pa_opp', '3p_opp', '3pa_opp', 'ast_opp']

perc_stats = ['fg_perc', '2p_perc', '3p_perc', 'efg_perc', 'ast_perc',
              'fg_perc_opp', '2p_perc_opp', '3p_perc_opp', 'efg_perc_opp', 'ast_perc_opp']

# X and y column names to merge on
x_cols = ['date', 'team'] + stats

last_15_games = total_df.copy()
last_15_games.loc[:, 'target'] = last_15_games.loc[:, '3p']
X = total_df[x_cols].copy()

# Dataframe of target (3pt made by each team) and of variables (last 5 games stats for each team)
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
for date in dates:
    last_15_games = pd.merge(last_15_games, X, left_on=['date' + date, 'team'], right_on=['date', 'team'], how='left', suffixes=('', date))

### Last 15 Performances (Unweighted)

In [11]:
# Define statistics
stats = ['fg', 'fga', '2p', '2pa', '3p', '3pa', 'ast',
         'fg_opp', 'fga_opp', '2p_opp', '2pa_opp', '3p_opp', '3pa_opp', 'ast_opp']

perc_stats = ['fg_perc', '2p_perc', '3p_perc', 'efg_perc', 'ast_perc',
              'fg_perc_opp', '2p_perc_opp', '3p_perc_opp', 'efg_perc_opp', 'ast_perc_opp']

dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']

last_15 = last_15_games.copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_15[stat + '_last_15'] = 0
    last_15[stat + '_last_3'] = 0
    last_15[stat + '_last_1'] = 0
    
    for date in dates:
        # Last 15 games
        last_15[stat + '_last_15'] = last_15[stat + '_last_15'] + last_15[stat + date]
        
        # Last 3 games
        if date in ['_1', '_2', '_3']:
            last_15[stat + '_last_3'] = last_15[stat + '_last_3'] + last_15[stat + date]
        
        # Last game
        if date in ['_1']:
            last_15[stat + '_last_1'] = last_15[stat + '_last_1'] + last_15[stat + date]
        
    
    last_15[stat + '_last_15'] = last_15[stat + '_last_15'] / 15
    last_15[stat + '_last_3'] = last_15[stat + '_last_3'] / 3
    
# Calculate difference between last 15 games, 3 games and last game
for stat in stats:
    last_15[stat + '_trend_3'] = last_15[stat + '_last_15'] - last_15[stat + '_last_3']
    last_15[stat + '_trend_1'] = last_15[stat + '_last_15'] - last_15[stat + '_last_1']

# Sum stats for opposing teams for each game
last_15 = last_15.groupby(['date', 'visitor', 'home']).sum()

# Percentages for matchup
for perc in perc_stats:
    stat = perc.split('_')[0]
    opp = perc.split('_')[-1]
    if opp == 'opp':
        if stat == 'ast':
            last_15[perc + '_last_15'] = last_15[stat + '_opp_last_15'] / last_15['fg_opp_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_opp_last_3'] / last_15['fg_opp_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_opp_last_1'] / last_15['fg_opp_last_1']
        elif stat == 'efg':
            last_15[perc + '_last_15'] = (last_15['fg_opp_last_15'] + (.5 * last_15['3p_opp_last_15'])) / last_15['fga_opp_last_15']
            last_15[perc + '_last_3'] = (last_15['fg_opp_last_3'] + (.5 * last_15['3p_opp_last_3'])) / last_15['fga_opp_last_3']
            last_15[perc + '_last_1'] = (last_15['fg_opp_last_1'] + (.5 * last_15['3p_opp_last_1'])) / last_15['fga_opp_last_1']
        else:
            last_15[perc + '_last_15'] = last_15[stat + '_opp_last_15'] / last_15[stat + 'a_opp_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_opp_last_3'] / last_15[stat + 'a_opp_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_opp_last_1'] / last_15[stat + 'a_opp_last_1']
    else:
        if stat == 'ast':
            last_15[perc + '_last_15'] = last_15[stat + '_last_15'] / last_15['fg' + '_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_last_3'] / last_15['fg' + '_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_last_1'] / last_15['fg' + '_last_1']
        elif stat == 'efg':
            last_15[perc + '_last_15'] = (last_15['fg' + '_last_15'] + (.5 * last_15['3p' + '_last_15'])) / last_15['fga' + '_last_15']
            last_15[perc + '_last_3'] = (last_15['fg' + '_last_3'] + (.5 * last_15['3p' + '_last_3'])) / last_15['fga' + '_last_3']
            last_15[perc + '_last_1'] = (last_15['fg' + '_last_1'] + (.5 * last_15['3p' + '_last_1'])) / last_15['fga' + '_last_1']
        else:
            last_15[perc + '_last_15'] = last_15[stat + '_last_15'] / last_15[stat + 'a_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_last_3'] / last_15[stat + 'a_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_last_1'] / last_15[stat + 'a_last_1']

# Keep columns
stats_15 = [stat + '_last_15' for stat in stats]
stats_3 = [stat + '_last_3' for stat in stats]
stats_1 = [stat + '_last_1' for stat in stats]
trend_3 = [stat + '_trend_3' for stat in stats]
trend_1 = [stat + '_trend_1' for stat in stats]
last_15 = last_15[['target'] + stats_1 + stats_3 + stats_15 + perc_stats + trend_1 + trend_3]
        
last_15 = last_15.dropna(axis=0)
last_15.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg_last_1,fga_last_1,2p_last_1,2pa_last_1,3p_last_1,3pa_last_1,ast_last_1,fg_opp_last_1,fga_opp_last_1,2p_opp_last_1,2pa_opp_last_1,3p_opp_last_1,3pa_opp_last_1,ast_opp_last_1,fg_last_3,fga_last_3,2p_last_3,2pa_last_3,3p_last_3,3pa_last_3,ast_last_3,fg_opp_last_3,fga_opp_last_3,2p_opp_last_3,2pa_opp_last_3,3p_opp_last_3,3pa_opp_last_3,ast_opp_last_3,fg_last_15,fga_last_15,2p_last_15,2pa_last_15,3p_last_15,3pa_last_15,ast_last_15,fg_opp_last_15,fga_opp_last_15,2p_opp_last_15,2pa_opp_last_15,3p_opp_last_15,3pa_opp_last_15,ast_opp_last_15,fg_perc,2p_perc,3p_perc,efg_perc,ast_perc,fg_perc_opp,2p_perc_opp,3p_perc_opp,efg_perc_opp,ast_perc_opp,fg_trend_1,fga_trend_1,2p_trend_1,2pa_trend_1,3p_trend_1,3pa_trend_1,ast_trend_1,fg_opp_trend_1,fga_opp_trend_1,2p_opp_trend_1,2pa_opp_trend_1,3p_opp_trend_1,3pa_opp_trend_1,ast_opp_trend_1,fg_trend_3,fga_trend_3,2p_trend_3,2pa_trend_3,3p_trend_3,3pa_trend_3,ast_trend_3,fg_opp_trend_3,fga_opp_trend_3,2p_opp_trend_3,2pa_opp_trend_3,3p_opp_trend_3,3pa_opp_trend_3,ast_opp_trend_3
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1
2022-04-01,Dallas Mavericks,Washington Wizards,0.0,93.0,180.0,64.0,102.0,29.0,78.0,63.0,82.0,179.0,60.0,121.0,22.0,58.0,45.0,86.666667,169.0,57.333333,98.333333,29.333333,70.666667,56.333333,81.333333,177.0,61.333333,114.666667,20.0,62.333333,44.666667,80.2,169.466667,55.6,99.933333,24.6,69.533333,49.333333,82.133333,174.733333,58.466667,108.533333,23.666667,66.2,46.866667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-12.8,-10.533333,-8.4,-2.066667,-4.4,-8.466667,-13.666667,0.133333,-4.266667,-1.533333,-12.466667,1.666667,8.2,1.866667,-6.466667,0.466667,-1.733333,1.6,-4.733333,-1.133333,-7.0,0.8,-2.266667,-2.866667,-6.133333,3.666667,3.866667,2.2
2022-04-01,Detroit Pistons,Oklahoma City Thunder,0.0,82.0,169.0,55.0,93.0,27.0,76.0,57.0,80.0,160.0,55.0,95.0,25.0,65.0,50.0,85.333333,176.0,57.0,101.333333,28.333333,74.666667,56.333333,82.666667,172.666667,55.666667,101.0,27.0,71.666667,51.333333,81.066667,175.0,55.2,102.266667,25.866667,72.733333,49.733333,82.866667,178.266667,56.333333,104.8,26.533333,73.466667,51.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.933333,6.0,0.2,9.266667,-1.133333,-3.266667,-7.266667,2.866667,18.266667,1.333333,9.8,1.533333,8.466667,1.066667,-4.266667,-1.0,-1.8,0.933333,-2.466667,-1.933333,-6.6,0.2,5.6,0.666667,3.8,-0.466667,1.8,-0.266667
2022-04-01,Indiana Pacers,Boston Celtics,0.0,79.0,182.0,56.0,105.0,23.0,77.0,56.0,88.0,159.0,63.0,97.0,25.0,62.0,60.0,81.666667,176.333333,55.0,100.333333,26.666667,76.0,53.666667,92.333333,173.0,65.333333,106.333333,27.0,66.666667,56.0,84.733333,176.6,57.866667,102.933333,26.866667,73.666667,53.733333,84.6,176.533333,58.866667,106.266667,25.733333,70.266667,50.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.733333,-5.4,1.866667,-2.066667,3.866667,-3.333333,-2.266667,-3.4,17.533333,-4.133333,9.266667,0.733333,8.266667,-9.866667,3.066667,0.266667,2.866667,2.6,0.2,-2.333333,0.066667,-7.733333,3.533333,-6.466667,-0.066667,-1.266667,3.6,-5.866667
2022-04-01,Los Angeles Clippers,Milwaukee Bucks,0.0,85.0,179.0,57.0,102.0,28.0,77.0,43.0,95.0,199.0,67.0,133.0,28.0,66.0,55.0,80.333333,176.0,56.333333,106.666667,24.0,69.333333,42.666667,91.0,186.666667,63.0,116.666667,28.0,70.0,53.0,82.8,177.2,56.066667,107.6,26.733333,69.6,47.866667,85.533333,181.466667,57.266667,109.133333,28.266667,72.333333,52.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.2,-1.8,-0.933333,5.6,-1.266667,-7.4,4.866667,-9.466667,-17.533333,-9.733333,-23.866667,0.266667,6.333333,-2.933333,2.466667,1.2,-0.266667,0.933333,2.733333,0.266667,5.2,-5.466667,-5.2,-5.733333,-7.533333,0.266667,2.333333,-0.933333
2022-04-01,Minnesota Timberwolves,Denver Nuggets,0.0,87.0,172.0,64.0,107.0,23.0,65.0,59.0,92.0,193.0,62.0,117.0,30.0,76.0,61.0,85.333333,170.333333,62.666667,106.0,22.666667,64.333333,57.666667,86.333333,182.333333,58.333333,103.666667,28.0,78.666667,57.0,86.0,173.333333,58.6,100.133333,27.4,73.2,56.533333,83.4,178.533333,57.133333,103.266667,26.266667,75.266667,53.533333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.333333,-5.4,-6.866667,4.4,8.2,-2.466667,-8.6,-14.466667,-4.866667,-13.733333,-3.733333,-0.733333,-7.466667,0.666667,3.0,-4.066667,-5.866667,4.733333,8.866667,-1.133333,-2.933333,-3.8,-1.2,-0.4,-1.733333,-3.4,-3.466667
2022-04-01,New Orleans Pelicans,Los Angeles Lakers,0.0,81.0,161.0,68.0,112.0,13.0,49.0,44.0,85.0,175.0,56.0,89.0,29.0,86.0,55.0,82.333333,170.333333,63.0,108.0,19.333333,62.333333,46.0,85.0,175.0,58.666667,101.666667,26.333333,73.333333,51.666667,87.666667,180.2,64.0,112.866667,23.666667,67.333333,50.666667,86.733333,177.133333,60.533333,105.866667,26.2,71.266667,53.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.666667,19.2,-4.0,0.866667,10.666667,18.333333,6.666667,1.733333,2.133333,4.533333,16.866667,-2.8,-14.733333,-1.666667,5.333333,9.866667,1.0,4.866667,4.333333,5.0,4.666667,1.733333,2.133333,1.866667,4.2,-0.133333,-2.066667,1.666667
2022-04-01,Phoenix Suns,Memphis Grizzlies,0.0,82.0,183.0,59.0,116.0,23.0,67.0,46.0,77.0,166.0,50.0,93.0,27.0,73.0,52.0,90.333333,183.333333,65.666667,120.666667,24.666667,62.666667,52.666667,77.333333,170.0,54.666667,102.666667,22.666667,67.333333,46.0,92.0,186.6,66.0,120.933333,26.0,65.666667,55.733333,79.333333,176.133333,56.133333,109.466667,23.2,66.666667,48.533333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,3.6,7.0,4.933333,3.0,-1.333333,9.733333,2.333333,10.133333,6.133333,16.466667,-3.8,-6.333333,-3.466667,1.666667,3.266667,0.333333,0.266667,1.333333,3.0,3.066667,2.0,6.133333,1.466667,6.8,0.533333,-0.666667,2.533333
2022-04-01,Portland Trail Blazers,San Antonio Spurs,0.0,81.0,168.0,53.0,91.0,28.0,77.0,56.0,84.0,169.0,62.0,106.0,22.0,63.0,52.0,85.0,188.333333,60.333333,114.333333,24.666667,74.0,51.666667,84.666667,175.0,55.666667,100.333333,29.0,74.666667,55.666667,80.933333,182.466667,58.733333,115.066667,22.2,67.4,49.933333,84.933333,175.6,57.933333,103.4,27.0,72.2,52.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.066667,14.466667,5.733333,24.066667,-5.8,-9.6,-6.066667,0.933333,6.6,-4.066667,-2.6,5.0,9.2,0.666667,-4.066667,-5.866667,-1.6,0.733333,-2.466667,-6.6,-1.733333,0.266667,0.6,2.266667,3.066667,-2.0,-2.466667,-3.0
2022-04-01,Sacramento Kings,Houston Rockets,0.0,87.0,189.0,67.0,121.0,20.0,68.0,44.0,87.0,189.0,67.0,121.0,20.0,68.0,44.0,82.666667,181.333333,56.666667,107.0,26.0,74.333333,49.666667,88.333333,192.666667,64.666667,121.666667,23.666667,71.0,50.0,81.933333,173.533333,57.266667,104.0,24.666667,69.533333,49.933333,88.533333,182.333333,63.933333,113.533333,24.6,68.8,51.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.066667,-15.466667,-9.733333,-17.0,4.666667,1.533333,5.933333,1.533333,-6.666667,-3.066667,-7.466667,4.6,0.8,7.8,-0.733333,-7.8,0.6,-3.0,-1.333333,-4.8,0.266667,0.2,-10.333333,-0.733333,-8.133333,0.933333,-2.2,1.8
2022-04-01,Toronto Raptors,Orlando Magic,0.0,88.0,192.0,59.0,123.0,29.0,69.0,53.0,84.0,184.0,60.0,111.0,24.0,73.0,61.0,90.666667,191.666667,65.0,120.666667,25.666667,71.0,53.666667,78.333333,179.333333,54.666667,104.0,23.666667,75.333333,52.333333,80.666667,183.0,57.466667,113.266667,23.2,69.733333,46.266667,78.733333,175.866667,53.933333,103.933333,24.8,71.933333,50.533333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.333333,-9.0,-1.533333,-9.733333,-5.8,0.733333,-6.733333,-5.266667,-8.133333,-6.066667,-7.066667,0.8,-1.066667,-10.466667,-10.0,-8.666667,-7.533333,-7.4,-2.466667,-1.266667,-7.4,0.4,-3.466667,-0.733333,-0.066667,1.133333,-3.4,-1.8


## Correlations

In [12]:
corr_df = pd.DataFrame()

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in last_15:
    corr_p = pearsonr(last_15['target'], last_15[col])
    row = {'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Print correlation
corr_df = corr_df[corr_df['p-value'] < .05].drop(['p-value'], axis=1).sort_values(['corr'], axis=0, ascending=False)
corr_df

Unnamed: 0,corr,stat
0,1.00,target
34,0.72,3pa_last_15
33,0.72,3p_last_15
20,0.71,3pa_last_3
6,0.68,3pa_last_1
...,...,...
25,-0.20,2pa_opp_last_3
39,-0.21,2pa_opp_last_15
4,-0.27,2pa_last_1
18,-0.29,2pa_last_3


## Save dataframe with significantly correlated stats

In [13]:
stats = corr_df[corr_df['corr'].abs() >= .6]['stat']
df = last_15[stats]

df.to_csv('backend/data/inputs/3p/shooting.csv')