# 3 Pointers Made against shooting.csv

### Import packages

In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
from scipy.stats import pearsonr
import itertools

pd.set_option("display.max_columns", None)

### Set working directory

In [2]:
# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

# Change working directory
os.chdir('/Users/tyler/OneDrive/Documents/Python/NBA')

# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA\backend\analysis\3p
Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA


## Exploratory Data Analysis

### Import data

In [33]:
df = pd.read_csv('backend/data/shooting.csv').drop(['Unnamed: 0'], axis=1)
schedule = pd.read_csv('backend/data/schedules/2021.csv').drop(['Unnamed: 0'], axis=1)

### Basic exploration

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 206060 entries, 0 to 206059
Data columns (total 17 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   date      206060 non-null  object 
 1   visitor   206060 non-null  object 
 2   home      206060 non-null  object 
 3   team      206060 non-null  int64  
 4   quarter   206038 non-null  object 
 5   fg        206038 non-null  float64
 6   fga       206038 non-null  float64
 7   fg_perc   206038 non-null  float64
 8   2p        206038 non-null  float64
 9   2pa       206038 non-null  float64
 10  2p_perc   206035 non-null  float64
 11  3p        206038 non-null  float64
 12  3pa       206038 non-null  float64
 13  3p_perc   204574 non-null  float64
 14  efg_perc  206038 non-null  float64
 15  ast       206038 non-null  float64
 16  ast_perc  206015 non-null  float64
dtypes: float64(12), int64(1), object(4)
memory usage: 26.7+ MB


In [35]:
df.head()

Unnamed: 0,date,visitor,home,team,quarter,fg,fga,fg_perc,2p,2pa,2p_perc,3p,3pa,3p_perc,efg_perc,ast,ast_perc
0,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q1,5.0,20.0,0.25,5.0,16.0,0.313,0.0,4.0,0.0,0.25,3.0,0.6
1,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q2,15.0,19.0,0.789,12.0,16.0,0.75,3.0,3.0,1.0,0.868,10.0,0.667
2,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q3,8.0,21.0,0.381,5.0,16.0,0.313,3.0,5.0,0.6,0.452,4.0,0.5
3,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,q4,11.0,19.0,0.579,10.0,18.0,0.556,1.0,1.0,1.0,0.605,5.0,0.455
4,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,total,39.0,79.0,0.494,32.0,66.0,0.485,7.0,13.0,0.538,0.538,22.0,0.564


In [36]:
# Convert 'date' column to Date object
df['date'] = pd.to_datetime(df['date'])
schedule['date'] = pd.to_datetime(schedule['date'])


# Conver 'team' column to Team Name
df['team'] = np.where(df['team'], df['home'], df['visitor'])


# Merge in opponents (see team defensive stats)
df = pd.merge(
    df, 
    df, 
    left_on=['date', 'visitor', 'home', 'quarter'], 
    right_on=['date', 'visitor', 'home', 'quarter'],
    suffixes=('', '_opp'),
    how='left')

df = df[df['team'] != df['team_opp']]


df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 206060 entries, 1 to 412118
Data columns (total 30 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   date          206060 non-null  datetime64[ns]
 1   visitor       206060 non-null  object        
 2   home          206060 non-null  object        
 3   team          206060 non-null  object        
 4   quarter       206038 non-null  object        
 5   fg            206038 non-null  float64       
 6   fga           206038 non-null  float64       
 7   fg_perc       206038 non-null  float64       
 8   2p            206038 non-null  float64       
 9   2pa           206038 non-null  float64       
 10  2p_perc       206035 non-null  float64       
 11  3p            206038 non-null  float64       
 12  3pa           206038 non-null  float64       
 13  3p_perc       204574 non-null  float64       
 14  efg_perc      206038 non-null  float64       
 15  ast           206

In [37]:
total_df = df[df['quarter'] == 'total'].copy()
q1_df = df[df['quarter'] == 'q1'].copy()
q2_df = df[df['quarter'] == 'q2'].copy()
q3_df = df[df['quarter'] == 'q3'].copy()
q4_df = df[df['quarter'] == 'q4'].copy()

In [38]:
total_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40642 entries, 9 to 412118
Data columns (total 30 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          40642 non-null  datetime64[ns]
 1   visitor       40642 non-null  object        
 2   home          40642 non-null  object        
 3   team          40642 non-null  object        
 4   quarter       40642 non-null  object        
 5   fg            40642 non-null  float64       
 6   fga           40642 non-null  float64       
 7   fg_perc       40642 non-null  float64       
 8   2p            40642 non-null  float64       
 9   2pa           40642 non-null  float64       
 10  2p_perc       40642 non-null  float64       
 11  3p            40642 non-null  float64       
 12  3pa           40642 non-null  float64       
 13  3p_perc       40642 non-null  float64       
 14  efg_perc      40642 non-null  float64       
 15  ast           40642 non-null  float

## Next date with games

In [39]:
future_games = set(schedule['date'].unique()).difference(set(total_df['date'].unique()))
next_date = pd.Series(list(future_games)).sort_values(ascending=True).iloc[0]

## Add next day's games 

In [None]:
# Next slate of games
next_slate = schedule[schedule['date'] == next_date]

for index, game in next_slate:
    total_df.append({'date': game.date, 'visitor': game.visitor, 'home': game.home, 'team': game.home}, ignore_index=True)
    total_df.append({'date': game.date, 'visitor': game.visitor, 'home': game.home, 'team': game.visitor}, ignore_index=True)


# Dataframe of team's last 15 performances

In [9]:
# Return ten lastest dates team played
def last_15_date(team, date):
    schedule = total_df[total_df['team'] == team].sort_values(by='date').reset_index()
    date_index = schedule[schedule['date'] == date].index[0]
    if date_index - 15 < 0:
        return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    else:
        date_1, date_2 = schedule.iloc[date_index - 1]['date'], schedule.iloc[date_index - 2]['date']
        date_3, date_4 = schedule.iloc[date_index - 3]['date'], schedule.iloc[date_index - 4]['date']
        date_5, date_6 = schedule.iloc[date_index - 5]['date'], schedule.iloc[date_index - 6]['date']
        date_7, date_8 = schedule.iloc[date_index - 7]['date'], schedule.iloc[date_index - 8]['date']
        date_9, date_10 = schedule.iloc[date_index - 9]['date'], schedule.iloc[date_index - 10]['date']
        date_11, date_12 = schedule.iloc[date_index - 11]['date'], schedule.iloc[date_index - 12]['date']
        date_13, date_14 = schedule.iloc[date_index - 13]['date'], schedule.iloc[date_index - 14]['date']
        date_15 = schedule.iloc[date_index - 15]['date']
        return date_1, date_2, date_3, date_4, date_5, date_6, date_7, date_8, date_9, date_10, date_11, date_12, date_13, date_14, date_15

total_df['dates'] = total_df.apply(lambda x: last_15_date(x.team, x.date), axis=1)
total_df['date_1'], total_df['date_2'] = total_df['dates'].apply(lambda x: x[0]), total_df['dates'].apply(lambda x: x[1])
total_df['date_3'], total_df['date_4'] = total_df['dates'].apply(lambda x: x[2]), total_df['dates'].apply(lambda x: x[3])
total_df['date_5'], total_df['date_6'] = total_df['dates'].apply(lambda x: x[4]), total_df['dates'].apply(lambda x: x[5])
total_df['date_7'], total_df['date_8'] = total_df['dates'].apply(lambda x: x[6]), total_df['dates'].apply(lambda x: x[7])
total_df['date_9'], total_df['date_10'] = total_df['dates'].apply(lambda x: x[8]), total_df['dates'].apply(lambda x: x[9])
total_df['date_11'], total_df['date_12'] = total_df['dates'].apply(lambda x: x[10]), total_df['dates'].apply(lambda x: x[11])
total_df['date_13'], total_df['date_14'] = total_df['dates'].apply(lambda x: x[12]), total_df['dates'].apply(lambda x: x[13])
total_df['date_15'] = total_df['dates'].apply(lambda x: x[14])

In [10]:
# Define statistics
stats = ['fg', 'fga', '2p', '2pa', '3p', '3pa', 'ast',
         'fg_opp', 'fga_opp', '2p_opp', '2pa_opp', '3p_opp', '3pa_opp', 'ast_opp']

perc_stats = ['fg_perc', '2p_perc', '3p_perc', 'efg_perc', 'ast_perc',
              'fg_perc_opp', '2p_perc_opp', '3p_perc_opp', 'efg_perc_opp', 'ast_perc_opp']

# X and y column names to merge on
x_cols = ['date', 'team'] + stats

last_15_games = total_df.copy()
last_15_games.loc[:, 'target'] = last_15_games.loc[:, '3p']
X = total_df[x_cols].copy()

# Dataframe of target (3pt made by each team) and of variables (last 5 games stats for each team)
dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']
for date in dates:
    last_15_games = pd.merge(last_15_games, X, left_on=['date' + date, 'team'], right_on=['date', 'team'], how='left', suffixes=('', date))

### Last 15 Performances (Unweighted)

In [11]:
# Define statistics
stats = ['fg', 'fga', '2p', '2pa', '3p', '3pa', 'ast',
         'fg_opp', 'fga_opp', '2p_opp', '2pa_opp', '3p_opp', '3pa_opp', 'ast_opp']

perc_stats = ['fg_perc', '2p_perc', '3p_perc', 'efg_perc', 'ast_perc',
              'fg_perc_opp', '2p_perc_opp', '3p_perc_opp', 'efg_perc_opp', 'ast_perc_opp']

dates = ['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10', '_11', '_12', '_13', '_14', '_15']

last_15 = last_15_games.copy()

# Calculate mean for each stat over a team's last performance
for stat in stats:
    last_15[stat + '_last_15'] = 0
    last_15[stat + '_last_3'] = 0
    last_15[stat + '_last_1'] = 0
    
    for date in dates:
        # Last 15 games
        last_15[stat + '_last_15'] = last_15[stat + '_last_15'] + last_15[stat + date]
        
        # Last 3 games
        if date in ['_1', '_2', '_3']:
            last_15[stat + '_last_3'] = last_15[stat + '_last_3'] + last_15[stat + date]
        
        # Last game
        if date in ['_1']:
            last_15[stat + '_last_1'] = last_15[stat + '_last_1'] + last_15[stat + date]
        
    
    last_15[stat + '_last_15'] = last_15[stat + '_last_15'] / 15
    last_15[stat + '_last_3'] = last_15[stat + '_last_3'] / 3
    
# Calculate difference between last 15 games, 3 games and last game
for stat in stats:
    last_15[stat + '_trend_3'] = last_15[stat + '_last_15'] - last_15[stat + '_last_3']
    last_15[stat + '_trend_1'] = last_15[stat + '_last_15'] - last_15[stat + '_last_1']

# Sum stats for opposing teams for each game
last_15 = last_15.groupby(['date', 'visitor', 'home']).sum()

# Percentages for matchup
for perc in perc_stats:
    stat = perc.split('_')[0]
    opp = perc.split('_')[-1]
    if opp == 'opp':
        if stat == 'ast':
            last_15[perc + '_last_15'] = last_15[stat + '_opp_last_15'] / last_15['fg_opp_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_opp_last_3'] / last_15['fg_opp_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_opp_last_1'] / last_15['fg_opp_last_1']
        elif stat == 'efg':
            last_15[perc + '_last_15'] = (last_15['fg_opp_last_15'] + (.5 * last_15['3p_opp_last_15'])) / last_15['fga_opp_last_15']
            last_15[perc + '_last_3'] = (last_15['fg_opp_last_3'] + (.5 * last_15['3p_opp_last_3'])) / last_15['fga_opp_last_3']
            last_15[perc + '_last_1'] = (last_15['fg_opp_last_1'] + (.5 * last_15['3p_opp_last_1'])) / last_15['fga_opp_last_1']
        else:
            last_15[perc + '_last_15'] = last_15[stat + '_opp_last_15'] / last_15[stat + 'a_opp_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_opp_last_3'] / last_15[stat + 'a_opp_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_opp_last_1'] / last_15[stat + 'a_opp_last_1']
    else:
        if stat == 'ast':
            last_15[perc + '_last_15'] = last_15[stat + '_last_15'] / last_15['fg' + '_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_last_3'] / last_15['fg' + '_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_last_1'] / last_15['fg' + '_last_1']
        elif stat == 'efg':
            last_15[perc + '_last_15'] = (last_15['fg' + '_last_15'] + (.5 * last_15['3p' + '_last_15'])) / last_15['fga' + '_last_15']
            last_15[perc + '_last_3'] = (last_15['fg' + '_last_3'] + (.5 * last_15['3p' + '_last_3'])) / last_15['fga' + '_last_3']
            last_15[perc + '_last_1'] = (last_15['fg' + '_last_1'] + (.5 * last_15['3p' + '_last_1'])) / last_15['fga' + '_last_1']
        else:
            last_15[perc + '_last_15'] = last_15[stat + '_last_15'] / last_15[stat + 'a_last_15']
            last_15[perc + '_last_3'] = last_15[stat + '_last_3'] / last_15[stat + 'a_last_3']
            last_15[perc + '_last_1'] = last_15[stat + '_last_1'] / last_15[stat + 'a_last_1']

# Keep columns
stats_15 = [stat + '_last_15' for stat in stats]
stats_3 = [stat + '_last_3' for stat in stats]
stats_1 = [stat + '_last_1' for stat in stats]
trend_3 = [stat + '_trend_3' for stat in stats]
trend_1 = [stat + '_trend_1' for stat in stats]
last_15 = last_15[['target'] + stats_1 + stats_3 + stats_15 + perc_stats + trend_1 + trend_3]
        
last_15 = last_15.dropna(axis=0)
last_15.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,target,fg_last_1,fga_last_1,2p_last_1,2pa_last_1,3p_last_1,3pa_last_1,ast_last_1,fg_opp_last_1,fga_opp_last_1,2p_opp_last_1,2pa_opp_last_1,3p_opp_last_1,3pa_opp_last_1,ast_opp_last_1,fg_last_3,fga_last_3,2p_last_3,2pa_last_3,3p_last_3,3pa_last_3,ast_last_3,fg_opp_last_3,fga_opp_last_3,2p_opp_last_3,2pa_opp_last_3,3p_opp_last_3,3pa_opp_last_3,ast_opp_last_3,fg_last_15,fga_last_15,2p_last_15,2pa_last_15,3p_last_15,3pa_last_15,ast_last_15,fg_opp_last_15,fga_opp_last_15,2p_opp_last_15,2pa_opp_last_15,3p_opp_last_15,3pa_opp_last_15,ast_opp_last_15,fg_perc,2p_perc,3p_perc,efg_perc,ast_perc,fg_perc_opp,2p_perc_opp,3p_perc_opp,efg_perc_opp,ast_perc_opp,fg_trend_1,fga_trend_1,2p_trend_1,2pa_trend_1,3p_trend_1,3pa_trend_1,ast_trend_1,fg_opp_trend_1,fga_opp_trend_1,2p_opp_trend_1,2pa_opp_trend_1,3p_opp_trend_1,3pa_opp_trend_1,ast_opp_trend_1,fg_trend_3,fga_trend_3,2p_trend_3,2pa_trend_3,3p_trend_3,3pa_trend_3,ast_trend_3,fg_opp_trend_3,fga_opp_trend_3,2p_opp_trend_3,2pa_opp_trend_3,3p_opp_trend_3,3pa_opp_trend_3,ast_opp_trend_3
date,visitor,home,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1
2022-04-03,Detroit Pistons,Indiana Pacers,33.0,82.0,175.0,50.0,91.0,32.0,84.0,54.0,81.0,167.0,58.0,100.0,23.0,67.0,53.0,86.666667,180.333333,57.666667,102.666667,29.0,77.666667,56.333333,83.666667,159.333333,59.0,98.0,24.666667,61.333333,54.666667,83.666667,178.733333,58.933333,108.733333,24.733333,70.0,52.666667,84.0,174.266667,60.4,108.266667,23.6,66.0,52.133333,0.913,0.968,0.828,1.083,1.454,0.913,0.968,0.828,1.083,1.454,1.666667,3.733333,8.933333,17.733333,-7.266667,-14.0,-1.333333,3.0,7.266667,2.4,8.266667,0.6,-1.0,-0.866667,-3.0,-1.6,1.266667,6.066667,-4.266667,-7.666667,-3.666667,0.333333,14.933333,1.4,10.266667,-1.066667,4.666667,-2.533333
2022-04-03,Golden State Warriors,Sacramento Kings,23.0,82.0,165.0,50.0,89.0,32.0,76.0,59.0,79.0,186.0,47.0,95.0,32.0,91.0,45.0,77.0,168.333333,52.666667,100.333333,24.333333,68.0,48.333333,84.0,184.666667,56.333333,107.666667,27.666667,77.0,48.0,80.8,174.066667,55.533333,102.8,25.266667,71.266667,49.733333,84.133333,177.8,56.8,105.466667,27.333333,72.333333,51.466667,0.902,1.056,0.683,1.046,1.32,0.902,1.056,0.683,1.046,1.32,-1.2,9.066667,5.533333,13.8,-6.733333,-4.733333,-9.266667,5.133333,-8.2,9.8,10.466667,-4.666667,-18.666667,6.466667,3.8,5.733333,2.866667,2.466667,0.933333,3.266667,1.4,0.133333,-6.866667,0.466667,-2.2,-0.333333,-4.666667,3.466667
2022-04-03,Miami Heat,Toronto Raptors,30.0,80.0,168.0,50.0,92.0,30.0,76.0,49.0,71.0,166.0,51.0,97.0,20.0,69.0,45.0,87.0,179.666667,57.0,104.666667,30.0,75.0,51.333333,73.0,172.666667,51.0,100.0,22.0,72.666667,47.333333,80.866667,174.6,56.133333,105.666667,24.733333,68.933333,46.666667,75.866667,169.066667,52.733333,98.066667,23.133333,71.0,48.0,0.941,1.084,0.782,1.127,1.275,0.941,1.084,0.782,1.127,1.275,0.866667,6.6,6.133333,13.666667,-5.266667,-7.066667,-2.333333,4.866667,3.066667,1.733333,1.066667,3.133333,2.0,3.0,-6.133333,-5.066667,-0.866667,1.0,-5.266667,-6.066667,-4.666667,2.866667,-3.6,1.733333,-1.933333,1.133333,-1.666667,0.666667
2022-04-03,Minnesota Timberwolves,Houston Rockets,33.0,84.0,183.0,48.0,98.0,36.0,85.0,52.0,88.0,167.0,64.0,104.0,24.0,63.0,65.0,82.333333,181.666667,51.0,100.0,31.333333,81.666667,51.0,92.0,181.666667,67.666667,117.333333,24.333333,64.333333,57.666667,83.8,175.466667,54.066667,97.666667,29.733333,77.8,52.333333,84.8,181.133333,60.933333,109.2,23.866667,71.933333,52.0,1.158,1.467,0.815,1.351,1.21,1.158,1.467,0.815,1.351,1.21,-0.2,-7.533333,6.066667,-0.333333,-6.266667,-7.2,0.333333,-3.2,14.133333,-3.066667,5.2,-0.133333,8.933333,-13.0,1.466667,-6.2,3.066667,-2.333333,-1.6,-3.866667,1.333333,-7.2,-0.533333,-6.733333,-8.133333,-0.466667,7.6,-5.666667
2022-04-03,New Orleans Pelicans,Los Angeles Clippers,30.0,97.0,171.0,67.0,102.0,30.0,69.0,57.0,89.0,176.0,70.0,124.0,19.0,52.0,54.0,89.0,171.0,63.666667,107.0,25.333333,64.0,51.666667,87.666667,173.666667,64.666667,110.666667,23.0,63.0,52.333333,84.2,178.133333,60.8,114.533333,23.4,63.6,50.333333,83.733333,174.133333,58.266667,105.733333,25.466667,68.4,52.4,0.876,0.884,0.852,1.038,1.362,0.876,0.884,0.852,1.038,1.362,-12.8,7.133333,-6.2,12.533333,-6.6,-5.4,-6.666667,-5.266667,-1.866667,-11.733333,-18.266667,6.466667,16.4,-1.6,-4.8,7.133333,-2.866667,7.533333,-1.933333,-0.4,-1.333333,-3.933333,0.466667,-6.4,-4.933333,2.466667,5.4,0.066667
2022-04-03,New York Knicks,Orlando Magic,27.0,65.0,157.0,41.0,92.0,24.0,65.0,43.0,80.0,167.0,52.0,91.0,28.0,76.0,47.0,75.0,171.333333,50.666667,100.0,24.333333,71.333333,45.666667,82.333333,170.333333,58.333333,100.0,24.0,70.333333,54.333333,75.933333,174.666667,50.0,100.666667,25.933333,74.0,46.866667,78.6,174.2,54.133333,102.133333,24.466667,72.066667,47.6,0.837,1.051,0.614,0.989,1.244,0.837,1.051,0.614,0.989,1.244,10.933333,17.666667,9.0,8.666667,1.933333,9.0,3.866667,-1.4,7.2,2.133333,11.133333,-3.533333,-3.933333,0.6,0.933333,3.333333,-0.666667,0.666667,1.6,2.666667,1.2,-3.733333,3.866667,-4.2,2.133333,0.466667,1.733333,-6.733333
2022-04-03,Philadelphia 76ers,Cleveland Cavaliers,26.0,97.0,168.0,61.0,91.0,36.0,77.0,64.0,73.0,164.0,49.0,104.0,24.0,60.0,53.0,84.0,166.333333,58.333333,100.0,25.666667,66.333333,52.333333,83.0,171.0,56.0,99.666667,27.0,71.333333,53.333333,78.6,169.8,55.4,104.0,23.2,65.8,48.666667,83.133333,173.066667,59.6,109.533333,23.533333,63.533333,51.066667,0.861,0.893,0.81,1.026,1.383,0.861,0.893,0.81,1.026,1.383,-18.4,1.8,-5.6,13.0,-12.8,-11.2,-15.333333,10.133333,9.066667,10.6,5.533333,-0.466667,3.533333,-1.933333,-5.4,3.466667,-2.933333,4.0,-2.466667,-0.533333,-3.666667,0.133333,2.066667,3.6,9.866667,-3.466667,-7.8,-2.266667
2022-04-03,Phoenix Suns,Oklahoma City Thunder,24.0,83.0,170.0,62.0,109.0,21.0,61.0,51.0,86.0,196.0,64.0,124.0,22.0,72.0,59.0,85.0,175.333333,59.0,108.0,26.0,67.333333,52.666667,86.0,190.333333,58.333333,112.0,27.666667,78.333333,54.333333,86.133333,177.8,59.8,107.0,26.333333,70.8,52.733333,84.533333,181.4,55.266667,104.8,29.266667,76.6,54.066667,0.91,1.153,0.599,1.047,1.362,0.91,1.153,0.599,1.047,1.362,3.133333,7.8,-2.2,-2.0,5.333333,9.8,1.733333,-1.466667,-14.6,-8.733333,-19.2,7.266667,4.6,-4.933333,1.133333,2.466667,0.8,-1.0,0.333333,3.466667,0.066667,-1.466667,-8.933333,-3.066667,-7.2,1.6,-1.733333,-0.266667
2022-04-03,Portland Trail Blazers,San Antonio Spurs,30.0,87.0,173.0,56.0,101.0,31.0,72.0,61.0,87.0,173.0,56.0,101.0,31.0,72.0,61.0,88.666667,184.333333,60.0,108.333333,28.666667,76.0,57.666667,85.666667,174.666667,54.666667,98.333333,31.0,76.333333,57.0,82.0,182.2,59.066667,114.2,22.933333,68.0,50.533333,84.866667,175.866667,57.466667,103.0,27.4,72.866667,53.0,0.845,1.007,0.689,1.024,1.325,0.845,1.007,0.689,1.024,1.325,-5.0,9.2,3.066667,13.2,-8.066667,-4.0,-10.466667,-2.133333,2.866667,1.466667,2.0,-3.6,0.866667,-8.0,-6.666667,-2.133333,-0.933333,5.866667,-5.733333,-8.0,-7.133333,-0.8,1.2,2.8,4.666667,-3.6,-3.466667,-4.0
2022-04-03,Washington Wizards,Boston Celtics,31.0,94.0,173.0,65.0,107.0,29.0,66.0,62.0,76.0,159.0,48.0,85.0,28.0,74.0,39.0,84.333333,174.333333,60.666667,106.0,23.666667,68.333333,57.333333,81.0,175.0,58.333333,109.0,22.666667,66.0,43.0,83.733333,171.266667,58.266667,103.066667,25.466667,68.2,54.066667,80.8,174.333333,56.333333,105.0,24.466667,69.333333,44.2,1.078,1.194,0.904,1.254,1.249,1.078,1.194,0.904,1.254,1.249,-10.266667,-1.733333,-6.733333,-3.933333,-3.533333,2.2,-7.933333,4.8,15.333333,8.333333,20.0,-3.533333,-4.666667,5.2,-0.6,-3.066667,-2.4,-2.933333,1.8,-0.133333,-3.266667,-0.2,-0.666667,-2.0,-4.0,1.8,3.333333,1.2


## Correlations

In [12]:
corr_df = pd.DataFrame()

# Correlations for last 15 game stats vs 3pt made (unweighted)
for col in last_15:
    corr_p = pearsonr(last_15['target'], last_15[col])
    row = {'stat': col, 'corr': round(corr_p[0], 2), 'p-value': round(corr_p[1], 2)}
    corr_df = corr_df.append(row, ignore_index=True)
    
# Print correlation
corr_df = corr_df[corr_df['p-value'] < .05].drop(['p-value'], axis=1).sort_values(['corr'], axis=0, ascending=False)
corr_df

Unnamed: 0,corr,stat
0,1.00,target
34,0.73,3pa_last_15
33,0.72,3p_last_15
20,0.72,3pa_last_3
6,0.68,3pa_last_1
...,...,...
25,-0.21,2pa_opp_last_3
39,-0.22,2pa_opp_last_15
4,-0.27,2pa_last_1
18,-0.29,2pa_last_3


## Save dataframe with significantly correlated stats

In [13]:
stats = corr_df[corr_df['corr'].abs() >= .6]['stat']
df = last_15[stats]

df.to_csv('backend/data/inputs/3p/shooting.csv')