In [1]:
from scipy import stats
import time
import requests
from bs4 import BeautifulSoup
import threading
import pandas as pd
import tqdm
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler


base_url = 'https://www.basketball-reference.com/'
day_scores_base_url = 'https://www.basketball-reference.com/boxscores/?month={month}&day={day}&year={year}'
data_path = r'/media/td/Samsung_T5/sports/nba'
db_name = 'nba_db'
box_score_link_table_name = 'boxscore_links'

box_score_details_table_name = 'boxscore_details'
processed_team_data_table_name = 'processed_team_data'
player_detail_table_name = 'player_details'
processed_player_data_table_name = 'processed_player_data'

target = 'win'

date_record_pickle_file_name = 'scraped_dates'
box_score_record_pickle_file_name = 'scraped_games'
max_tries = 5
file_lock = threading.Lock()

starting_rating = 1000
rating_k_factor = 100
rating_floor = 100
rating_ceiling = 10000
rating_d = 1000
k_min_sensitivity = 1

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


In [2]:
team_data = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=box_score_details_table_name),sep='|', low_memory=False)
player_data = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=player_detail_table_name), sep='|', low_memory=False)

team_data.shape, player_data.shape

((2018, 45), (25400, 48))

- Negatively correlated with win
- High stat def rating is highly correlated with losing (.51). Teams under pressure don't win that game.
- Personal fouls also correlated with losing (.13)
- Turnovers - (investigate, not intuitive)
- 

In [3]:
team_data_corr = team_data.corr()
team_data_corr = team_data_corr[(team_data_corr['win'] > .2)|(team_data_corr['win'] < -.08)]
team_data_corr.sort_values('win')[['win']]

Unnamed: 0,win
stat_def_rtg,-0.524067
stat_tov,-0.12538
stat_tov_pct,-0.124517
stat_pf,-0.110728
stat_fg3,0.276752
stat_trb,0.302295
stat_fg3_pct,0.31176
stat_ast,0.313733
stat_drb,0.366708
stat_fg,0.369798


In [4]:
player_data_corr = player_data.corr()
player_data_corr = player_data_corr[(player_data_corr['win'] > .05)|(player_data_corr['win'] < -.05)]
player_data_corr.sort_values('win')[['win']]

Unnamed: 0,win
def_rtg,-0.467127
fg,0.054867
ast,0.058011
drb,0.060627
pts,0.062979
fg3,0.066424
fg3_pct,0.096516
fg_pct,0.098342
efg_pct,0.108876
ts_pct,0.110037


In [5]:
del team_data, team_data_corr, player_data, player_data_corr

In [6]:
team_features = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=processed_team_data_table_name),sep='|', low_memory=False)
team_features.head()

Unnamed: 0,team_tag,team_link,team_name,opponent_tag,opponent_link,opponent_name,location,win,year,month,day,stat_mp,stat_fg,stat_fga,stat_fg_pct,stat_fg3,stat_fg3a,stat_fg3_pct,stat_ft,stat_fta,stat_ft_pct,stat_orb,stat_drb,stat_trb,stat_ast,stat_stl,stat_blk,stat_tov,stat_pf,stat_pts,stat_plus_minus,stat_ts_pct,stat_efg_pct,stat_fg3a_per_fga_pct,stat_fta_per_fga_pct,stat_orb_pct,stat_drb_pct,stat_trb_pct,stat_ast_pct,stat_stl_pct,stat_blk_pct,stat_tov_pct,stat_usg_pct,stat_off_rtg,stat_def_rtg,date_str,game_key,team_game_key,feature_home,feature_team_rl_avg_stat_ast_1,feature_team_rl_avg_stat_ast_pct_1,feature_team_rl_avg_stat_blk_1,feature_team_rl_avg_stat_blk_pct_1,feature_team_rl_avg_stat_def_rtg_1,feature_team_rl_avg_stat_drb_1,feature_team_rl_avg_stat_drb_pct_1,feature_team_rl_avg_stat_efg_pct_1,feature_team_rl_avg_stat_fg_1,feature_team_rl_avg_stat_fg3_1,feature_team_rl_avg_stat_fg3_pct_1,feature_team_rl_avg_stat_fg3a_1,feature_team_rl_avg_stat_fg3a_per_fga_pct_1,feature_team_rl_avg_stat_fg_pct_1,feature_team_rl_avg_stat_fga_1,feature_team_rl_avg_stat_ft_1,feature_team_rl_avg_stat_ft_pct_1,feature_team_rl_avg_stat_fta_1,feature_team_rl_avg_stat_fta_per_fga_pct_1,feature_team_rl_avg_stat_mp_1,feature_team_rl_avg_stat_off_rtg_1,feature_team_rl_avg_stat_orb_1,feature_team_rl_avg_stat_orb_pct_1,feature_team_rl_avg_stat_pf_1,feature_team_rl_avg_stat_plus_minus_1,feature_team_rl_avg_stat_pts_1,feature_team_rl_avg_stat_stl_1,feature_team_rl_avg_stat_stl_pct_1,feature_team_rl_avg_stat_tov_1,feature_team_rl_avg_stat_tov_pct_1,feature_team_rl_avg_stat_trb_1,feature_team_rl_avg_stat_trb_pct_1,feature_team_rl_avg_stat_ts_pct_1,feature_team_rl_avg_stat_usg_pct_1,feature_team_rl_avg_win_1,feature_team_rl_avg_stat_ast_3,feature_team_rl_avg_stat_ast_pct_3,feature_team_rl_avg_stat_blk_3,feature_team_rl_avg_stat_blk_pct_3,feature_team_rl_avg_stat_def_rtg_3,feature_team_rl_avg_stat_drb_3,feature_team_rl_avg_stat_drb_pct_3,feature_team_rl_avg_stat_efg_pct_3,feature_team_rl_avg_stat_fg_3,feature_team_rl_avg_stat_fg3_3,feature_team_rl_avg_stat_fg3_pct_3,feature_team_rl_avg_stat_fg3a_3,feature_team_rl_avg_stat_fg3a_per_fga_pct_3,feature_team_rl_avg_stat_fg_pct_3,feature_team_rl_avg_stat_fga_3,feature_team_rl_avg_stat_ft_3,feature_team_rl_avg_stat_ft_pct_3,feature_team_rl_avg_stat_fta_3,feature_team_rl_avg_stat_fta_per_fga_pct_3,feature_team_rl_avg_stat_mp_3,feature_team_rl_avg_stat_off_rtg_3,feature_team_rl_avg_stat_orb_3,feature_team_rl_avg_stat_orb_pct_3,feature_team_rl_avg_stat_pf_3,feature_team_rl_avg_stat_plus_minus_3,feature_team_rl_avg_stat_pts_3,feature_team_rl_avg_stat_stl_3,feature_team_rl_avg_stat_stl_pct_3,feature_team_rl_avg_stat_tov_3,feature_team_rl_avg_stat_tov_pct_3,feature_team_rl_avg_stat_trb_3,feature_team_rl_avg_stat_trb_pct_3,feature_team_rl_avg_stat_ts_pct_3,feature_team_rl_avg_stat_usg_pct_3,feature_team_rl_avg_win_3,feature_team_rl_avg_stat_ast_5,feature_team_rl_avg_stat_ast_pct_5,feature_team_rl_avg_stat_blk_5,feature_team_rl_avg_stat_blk_pct_5,feature_team_rl_avg_stat_def_rtg_5,feature_team_rl_avg_stat_drb_5,feature_team_rl_avg_stat_drb_pct_5,feature_team_rl_avg_stat_efg_pct_5,feature_team_rl_avg_stat_fg_5,feature_team_rl_avg_stat_fg3_5,feature_team_rl_avg_stat_fg3_pct_5,feature_team_rl_avg_stat_fg3a_5,feature_team_rl_avg_stat_fg3a_per_fga_pct_5,feature_team_rl_avg_stat_fg_pct_5,feature_team_rl_avg_stat_fga_5,feature_team_rl_avg_stat_ft_5,feature_team_rl_avg_stat_ft_pct_5,feature_team_rl_avg_stat_fta_5,feature_team_rl_avg_stat_fta_per_fga_pct_5,feature_team_rl_avg_stat_mp_5,feature_team_rl_avg_stat_off_rtg_5,feature_team_rl_avg_stat_orb_5,feature_team_rl_avg_stat_orb_pct_5,feature_team_rl_avg_stat_pf_5,feature_team_rl_avg_stat_plus_minus_5,feature_team_rl_avg_stat_pts_5,feature_team_rl_avg_stat_stl_5,feature_team_rl_avg_stat_stl_pct_5,feature_team_rl_avg_stat_tov_5,feature_team_rl_avg_stat_tov_pct_5,feature_team_rl_avg_stat_trb_5,feature_team_rl_avg_stat_trb_pct_5,feature_team_rl_avg_stat_ts_pct_5,feature_team_rl_avg_stat_usg_pct_5,feature_team_rl_avg_win_5,feature_team_rl_avg_stat_ast_10,feature_team_rl_avg_stat_ast_pct_10,feature_team_rl_avg_stat_blk_10,feature_team_rl_avg_stat_blk_pct_10,feature_team_rl_avg_stat_def_rtg_10,feature_team_rl_avg_stat_drb_10,feature_team_rl_avg_stat_drb_pct_10,feature_team_rl_avg_stat_efg_pct_10,feature_team_rl_avg_stat_fg_10,feature_team_rl_avg_stat_fg3_10,feature_team_rl_avg_stat_fg3_pct_10,feature_team_rl_avg_stat_fg3a_10,feature_team_rl_avg_stat_fg3a_per_fga_pct_10,feature_team_rl_avg_stat_fg_pct_10,feature_team_rl_avg_stat_fga_10,feature_team_rl_avg_stat_ft_10,feature_team_rl_avg_stat_ft_pct_10,feature_team_rl_avg_stat_fta_10,feature_team_rl_avg_stat_fta_per_fga_pct_10,feature_team_rl_avg_stat_mp_10,feature_team_rl_avg_stat_off_rtg_10,feature_team_rl_avg_stat_orb_10,feature_team_rl_avg_stat_orb_pct_10,feature_team_rl_avg_stat_pf_10,feature_team_rl_avg_stat_plus_minus_10,feature_team_rl_avg_stat_pts_10,feature_team_rl_avg_stat_stl_10,feature_team_rl_avg_stat_stl_pct_10,feature_team_rl_avg_stat_tov_10,feature_team_rl_avg_stat_tov_pct_10,feature_team_rl_avg_stat_trb_10,feature_team_rl_avg_stat_trb_pct_10,feature_team_rl_avg_stat_ts_pct_10,feature_team_rl_avg_stat_usg_pct_10,feature_team_rl_avg_win_10,feature_team_rl_avg_stat_ast_25,feature_team_rl_avg_stat_ast_pct_25,feature_team_rl_avg_stat_blk_25,feature_team_rl_avg_stat_blk_pct_25,feature_team_rl_avg_stat_def_rtg_25,feature_team_rl_avg_stat_drb_25,feature_team_rl_avg_stat_drb_pct_25,feature_team_rl_avg_stat_efg_pct_25,feature_team_rl_avg_stat_fg_25,feature_team_rl_avg_stat_fg3_25,feature_team_rl_avg_stat_fg3_pct_25,feature_team_rl_avg_stat_fg3a_25,feature_team_rl_avg_stat_fg3a_per_fga_pct_25,feature_team_rl_avg_stat_fg_pct_25,feature_team_rl_avg_stat_fga_25,feature_team_rl_avg_stat_ft_25,feature_team_rl_avg_stat_ft_pct_25,feature_team_rl_avg_stat_fta_25,feature_team_rl_avg_stat_fta_per_fga_pct_25,feature_team_rl_avg_stat_mp_25,feature_team_rl_avg_stat_off_rtg_25,feature_team_rl_avg_stat_orb_25,feature_team_rl_avg_stat_orb_pct_25,feature_team_rl_avg_stat_pf_25,feature_team_rl_avg_stat_plus_minus_25,feature_team_rl_avg_stat_pts_25,feature_team_rl_avg_stat_stl_25,feature_team_rl_avg_stat_stl_pct_25,feature_team_rl_avg_stat_tov_25,feature_team_rl_avg_stat_tov_pct_25,feature_team_rl_avg_stat_trb_25,feature_team_rl_avg_stat_trb_pct_25,feature_team_rl_avg_stat_ts_pct_25,feature_team_rl_avg_stat_usg_pct_25,feature_team_rl_avg_win_25,feature_team_rl_avg_stat_ast_50,feature_team_rl_avg_stat_ast_pct_50,feature_team_rl_avg_stat_blk_50,feature_team_rl_avg_stat_blk_pct_50,feature_team_rl_avg_stat_def_rtg_50,feature_team_rl_avg_stat_drb_50,feature_team_rl_avg_stat_drb_pct_50,feature_team_rl_avg_stat_efg_pct_50,feature_team_rl_avg_stat_fg_50,feature_team_rl_avg_stat_fg3_50,feature_team_rl_avg_stat_fg3_pct_50,feature_team_rl_avg_stat_fg3a_50,feature_team_rl_avg_stat_fg3a_per_fga_pct_50,feature_team_rl_avg_stat_fg_pct_50,feature_team_rl_avg_stat_fga_50,feature_team_rl_avg_stat_ft_50,feature_team_rl_avg_stat_ft_pct_50,feature_team_rl_avg_stat_fta_50,feature_team_rl_avg_stat_fta_per_fga_pct_50,feature_team_rl_avg_stat_mp_50,feature_team_rl_avg_stat_off_rtg_50,feature_team_rl_avg_stat_orb_50,feature_team_rl_avg_stat_orb_pct_50,feature_team_rl_avg_stat_pf_50,feature_team_rl_avg_stat_plus_minus_50,feature_team_rl_avg_stat_pts_50,feature_team_rl_avg_stat_stl_50,feature_team_rl_avg_stat_stl_pct_50,feature_team_rl_avg_stat_tov_50,feature_team_rl_avg_stat_tov_pct_50,feature_team_rl_avg_stat_trb_50,feature_team_rl_avg_stat_trb_pct_50,feature_team_rl_avg_stat_ts_pct_50,feature_team_rl_avg_stat_usg_pct_50,feature_team_rl_avg_win_50,feature_team_rl_avg_stat_ast_100,feature_team_rl_avg_stat_ast_pct_100,feature_team_rl_avg_stat_blk_100,feature_team_rl_avg_stat_blk_pct_100,feature_team_rl_avg_stat_def_rtg_100,feature_team_rl_avg_stat_drb_100,feature_team_rl_avg_stat_drb_pct_100,feature_team_rl_avg_stat_efg_pct_100,feature_team_rl_avg_stat_fg_100,feature_team_rl_avg_stat_fg3_100,feature_team_rl_avg_stat_fg3_pct_100,feature_team_rl_avg_stat_fg3a_100,feature_team_rl_avg_stat_fg3a_per_fga_pct_100,feature_team_rl_avg_stat_fg_pct_100,feature_team_rl_avg_stat_fga_100,feature_team_rl_avg_stat_ft_100,feature_team_rl_avg_stat_ft_pct_100,feature_team_rl_avg_stat_fta_100,feature_team_rl_avg_stat_fta_per_fga_pct_100,feature_team_rl_avg_stat_mp_100,feature_team_rl_avg_stat_off_rtg_100,feature_team_rl_avg_stat_orb_100,feature_team_rl_avg_stat_orb_pct_100,feature_team_rl_avg_stat_pf_100,feature_team_rl_avg_stat_plus_minus_100,feature_team_rl_avg_stat_pts_100,feature_team_rl_avg_stat_stl_100,feature_team_rl_avg_stat_stl_pct_100,feature_team_rl_avg_stat_tov_100,feature_team_rl_avg_stat_tov_pct_100,feature_team_rl_avg_stat_trb_100,feature_team_rl_avg_stat_trb_pct_100,feature_team_rl_avg_stat_ts_pct_100,feature_team_rl_avg_stat_usg_pct_100,feature_team_rl_avg_win_100,feature_team_pregame_rating_0,feature_team_postgame_rating_0,feature_team_pregame_rating_1,feature_team_postgame_rating_1,feature_team_pregame_rating_2,feature_team_postgame_rating_2,feature_team_pregame_rating_3,feature_team_postgame_rating_3
0,orl,https://www.basketball-reference.com//teams/OR...,Orlando Magic,uta,https://www.basketball-reference.com//teams/UT...,Utah Jazz,"Vivint Smart Home Arena, Salt Lake City, Utah",0,2018,3,5,240,29,85,0.341,11,34,0.324,11,14,0.786,6,30,36,18,7,4,14,19,80,,0.439,0.406,0.4,0.165,12.5,83.3,42.9,62.1,7.5,9.5,13.3,100.0,86.2,101.2,2018-03-05,"['2018-03-05', 'orl', 'uta']","['2018-03-05', 'orl', 'uta']",0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,950.0,1000.0,950.0,1000,900,1000.0,950.0
1,det,https://www.basketball-reference.com//teams/DE...,Detroit Pistons,cle,https://www.basketball-reference.com//teams/CL...,Cleveland Cavaliers,"Quicken Loans Arena, Cleveland, Ohio",0,2018,3,5,240,34,87,0.391,12,34,0.353,10,15,0.667,8,32,40,17,9,6,11,22,90,,0.481,0.46,0.391,0.172,16.0,74.4,43.0,50.0,9.4,10.9,10.5,100.0,94.0,117.0,2018-03-05,"['2018-03-05', 'cle', 'det']","['2018-03-05', 'det', 'cle']",0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,950.0,1000.0,950.0,1000,900,1000.0,950.0
2,por,https://www.basketball-reference.com//teams/PO...,Portland Trail Blazers,lal,https://www.basketball-reference.com//teams/LA...,Los Angeles Lakers,"STAPLES Center, Los Angeles, California",1,2018,3,5,240,35,83,0.422,10,23,0.435,28,38,0.737,7,43,50,19,6,5,14,25,108,,0.542,0.482,0.277,0.458,14.9,86.0,51.5,54.3,5.8,10.0,12.3,100.0,104.8,100.0,2018-03-05,"['2018-03-05', 'lal', 'por']","['2018-03-05', 'por', 'lal']",0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,1050.0,1000.0,1050.0,1000,1100,1000.0,1050.0
3,uta,https://www.basketball-reference.com//teams/UT...,Utah Jazz,orl,https://www.basketball-reference.com//teams/OR...,Orlando Magic,"Vivint Smart Home Arena, Salt Lake City, Utah",1,2018,3,5,240,36,73,0.493,10,31,0.323,12,18,0.667,6,42,48,23,8,7,15,22,94,,0.581,0.562,0.425,0.247,16.7,87.5,57.1,63.9,8.6,13.7,15.6,100.0,101.2,86.2,2018-03-05,"['2018-03-05', 'orl', 'uta']","['2018-03-05', 'uta', 'orl']",1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,1050.0,1000.0,1050.0,1000,1100,1000.0,1050.0
4,pho,https://www.basketball-reference.com//teams/PH...,Phoenix Suns,mia,https://www.basketball-reference.com//teams/MI...,Miami Heat,"AmericanAirlines Arena, Miami, Florida",0,2018,3,5,240,42,87,0.483,10,24,0.417,9,18,0.5,12,28,40,21,3,2,17,20,103,,0.543,0.54,0.276,0.207,25.5,63.6,44.0,50.0,3.1,3.6,15.2,100.0,107.0,129.8,2018-03-05,"['2018-03-05', 'mia', 'pho']","['2018-03-05', 'pho', 'mia']",0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,950.0,1000.0,950.0,1000,900,1000.0,950.0


In [None]:
invalid_columns = ['stat_ast', 'stat_ast_pct','stat_blk', 'stat_blk_pct','stat_def_rtg','stat_drb','stat_drb_pct','stat_efg_pct','stat_fg',
                  'stat_fg3','stat_fg3_pct','stat_fg3a','stat_fg3a_per_fga_pct','stat_fg_pct','stat_fga','stat_ft','stat_ft_pct',
                  'stat_fta','stat_fta_per_fga_pct','stat_mp','stat_off_rtg','stat_orb','stat_orb_pct','stat_pf','stat_plus_minus',
                  'stat_pts','stat_stl','stat_stl_pct','stat_tov', 'stat_tov_pct', 'stat_trb','stat_trb_pct', 'stat_ts_pct',
                  'stat_usg_pct', 'team_tag', 'team_link', 'team_name', 'opponent_tag', 'opponent_name', 'location']


In [7]:
team_features_corr = team_features[[i for i in team_features.columns if i not in invalid_columns]].corr()
team_features_corr = team_features_corr[(team_features_corr[target] > .05)|(team_features_corr[target] < -.05)]
team_features_corr.sort_values(target)[[target]]

Unnamed: 0,win
feature_team_rl_avg_stat_def_rtg_50,-0.194836
feature_team_rl_avg_stat_def_rtg_10,-0.121314
feature_team_rl_avg_stat_def_rtg_5,-0.115803
feature_team_rl_avg_stat_def_rtg_1,-0.11427
feature_team_rl_avg_stat_def_rtg_25,-0.103431
feature_team_rl_avg_stat_def_rtg_3,-0.081643
feature_team_rl_avg_stat_tov_pct_25,-0.052089
feature_team_rl_avg_stat_tov_3,-0.050455
feature_team_rl_avg_stat_tov_pct_3,-0.050214
feature_team_rl_avg_stat_orb_pct_3,0.051775


In [None]:
valid_columns = [i for i in team_features.columns if i not in invalid_columns + [target]]
interactions = ['product', 'addition', 'subtraction', 'division']

results = list()
for normalization in [True, False]:
    for column1 in valid_columns:
        combined_column = team_features[column1].copy()
        combined_column = combined_column.replace(np.inf, np.nan)replace(-np.inf, np.nan)
        combined_column = combined_column.fillna(combined_column.median())
        
        if normalization:
            combined_column = StandardScaler().fit_transform(combined_column)
            
        slope, intercept, r_value, p_value, std_err = stats.lingress(combined_column, team_features[target])

        next_dict = {'column1':column1,
                    'interaction':'identity',
                    'normalization':normalization,
                    'slope':slope,
                    'intercept':intercept,
                    'r_value':r_value,
                    'r2_value':r_value*r_value,
                    'p_value':p_value,
                     'std_err':std_err
                    }
        results.append(next_dict)

    for column1 in valid_columns:
        for column2 in valid_columns:
            for interaction in  interactions:
                if interaction == 'product':
                    combined_column = team_features[column1].copy() * team_features[column2].copy()
                if interaction == 'addition':
                    combined_column = team_features[column1].copy() + team_features[column2].copy()
                if interaction == 'subtraction':
                    combined_column = team_features[column1].copy() - team_features[column2].copy()
                if interaction == 'division':
                    combined_column = team_features[column1].copy() / team_features[column2].copy()
                combined_column = combined_column.replace(np.inf, np.nan)replace(-np.inf, np.nan)
                combined_column = combined_column.fillna(combined_column.median())
                
                if normalization:
                    combined_column = StandardScaler().fit_transform(combined_column)
                
                next_dict = {'column1':column1,
                             'column2':column1,
                    'interaction':interaction,
                    'normalization':normalization,
                    'slope':slope,
                    'intercept':intercept,
                    'r_value':r_value,
                    'r2_value':r_value*r_value,
                    'p_value':p_value,
                     'std_err':std_err
                    }
                results.append(next_dict)
            

