In [126]:
from scipy import stats
import time
import requests
from bs4 import BeautifulSoup
import threading
import pandas as pd
import tqdm
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler, PowerTransformer
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

base_url = 'https://www.basketball-reference.com/'
day_scores_base_url = 'https://www.basketball-reference.com/boxscores/?month={month}&day={day}&year={year}'
data_path = r'/media/td/Samsung_T5/sports/nba'
db_name = 'nba_db'
box_score_link_table_name = 'boxscore_links'

box_score_details_table_name = 'boxscore_details'
processed_team_data_table_name = 'processed_team_data'
player_detail_table_name = 'player_details'
processed_player_data_table_name = 'processed_player_data'

target = 'win'

date_record_pickle_file_name = 'scraped_dates'
box_score_record_pickle_file_name = 'scraped_games'
max_tries = 5
file_lock = threading.Lock()

starting_rating = 1000
rating_k_factor = 100
rating_floor = 100
rating_ceiling = 10000
rating_d = 1000
k_min_sensitivity = 1

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


In [17]:
team_data = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=box_score_details_table_name),sep='|', low_memory=False)
player_data = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=player_detail_table_name), sep='|', low_memory=False)

team_data.shape, player_data.shape

((2, 45), (26, 48))

In [18]:
team_data.head()

Unnamed: 0,team_tag,team_link,team_name,opponent_tag,opponent_link,opponent_name,location,win,year,month,day,mp,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,fta,ft_pct,orb,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,ts_pct,efg_pct,fg3a_per_fga_pct,fta_per_fga_pct,orb_pct,drb_pct,trb_pct,ast_pct,stl_pct,blk_pct,tov_pct,usg_pct,off_rtg,def_rtg
0,tor,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,gsw,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,"Oracle Arena, Oakland, California",1,2019,6,13,240,39,82,0.476,13,33,0.394,23,29,0.793,11,28,39,25,8,2,12,23,114,,0.602,0.555,0.402,0.354,26.2,71.8,48.1,64.1,8.5,4.1,11.2,100.0,120.5,116.3
1,gsw,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,tor,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,"Oracle Arena, Oakland, California",0,2019,6,13,240,39,80,0.488,11,31,0.355,21,30,0.7,11,31,42,28,9,6,16,23,110,,0.59,0.556,0.388,0.375,28.2,73.8,51.9,71.8,9.5,12.2,14.7,100.0,116.3,120.5


In [19]:
player_data.head()

Unnamed: 0,ast,ast_pct,blk,blk_pct,day,def_rtg,drb,drb_pct,efg_pct,fg,fg3,fg3_pct,fg3a,fg3a_per_fga_pct,fg_pct,fga,ft,ft_pct,fta,fta_per_fga_pct,location,month,mp,off_rtg,opponent_link,opponent_name,opponent_tag,orb,orb_pct,pf,player_link,player_name,plus_minus,pts,reason,stl,stl_pct,team_link,team_name,team_tag,tov,tov_pct,trb,trb_pct,ts_pct,usg_pct,win,year
0,3.0,10.9,1.0,2.1,13,115.0,8.0,21.3,0.676,10.0,3.0,0.5,6.0,0.353,0.588,17.0,3.0,0.75,4.0,0.235,"Oracle Arena, Oakland, California",6,46:10,132.0,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,gsw,2.0,5.0,2.0,https://www.basketball-reference.com//players/...,Pascal Siakam,2.0,26.0,,1.0,1.1,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,tor,2.0,9.6,10.0,12.8,0.693,20.2,1,2019
1,10.0,40.2,0.0,0.0,13,112.0,5.0,14.8,0.688,9.0,4.0,0.571,7.0,0.438,0.563,16.0,4.0,0.667,6.0,0.375,"Oracle Arena, Oakland, California",6,41:42,137.0,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,gsw,2.0,5.5,5.0,https://www.basketball-reference.com//players/...,Kyle Lowry,16.0,26.0,,3.0,3.7,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,tor,3.0,13.9,7.0,9.9,0.697,23.3,1,2019
2,3.0,11.4,1.0,2.4,13,113.0,5.0,15.0,0.469,7.0,1.0,0.2,5.0,0.313,0.438,16.0,7.0,0.875,8.0,0.5,"Oracle Arena, Oakland, California",6,41:05,116.0,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,gsw,1.0,2.8,4.0,https://www.basketball-reference.com//players/...,Kawhi Leonard,-2.0,22.0,,2.0,2.5,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,tor,2.0,9.3,6.0,8.7,0.564,23.6,1,2019
3,4.0,18.5,0.0,0.0,13,116.0,6.0,27.8,0.0,0.0,0.0,0.0,2.0,0.4,0.0,5.0,3.0,0.75,4.0,0.8,"Oracle Arena, Oakland, California",6,26:34,93.0,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,gsw,3.0,12.9,4.0,https://www.basketball-reference.com//players/...,Marc Gasol,-7.0,3.0,,0.0,0.0,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,tor,1.0,12.9,9.0,20.1,0.222,13.1,1,2019
4,3.0,20.8,0.0,0.0,13,116.0,1.0,6.9,,0.0,0.0,,0.0,,,0.0,0.0,,0.0,,"Oracle Arena, Oakland, California",6,17:43,102.0,https://www.basketball-reference.com//teams/GS...,Golden State Warriors,gsw,0.0,0.0,1.0,https://www.basketball-reference.com//players/...,Danny Green,7.0,0.0,,1.0,2.9,https://www.basketball-reference.com//teams/TO...,Toronto Raptors,tor,1.0,100.0,1.0,3.3,,2.5,1,2019


- Negatively correlated with win
- High stat def rating is highly correlated with losing (.51). Teams under pressure don't win that game.
- Personal fouls also correlated with losing (.13)
- Turnovers - (investigate, not intuitive)
- 

In [5]:
team_data_corr = team_data.corr()
team_data_corr = team_data_corr[(team_data_corr['win'] > .2)|(team_data_corr['win'] < -.08)]
team_data_corr.sort_values('win')[['win']]

Unnamed: 0,win
stat_def_rtg,-0.520501
stat_tov,-0.111793
stat_tov_pct,-0.106198
stat_pf,-0.090094
stat_fg3,0.262046
stat_trb,0.287934
stat_ast,0.303615
stat_fg3_pct,0.33026
stat_drb,0.362345
stat_fg,0.379037


In [6]:
player_data_corr = player_data.corr()
player_data_corr = player_data_corr[(player_data_corr['win'] > .05)|(player_data_corr['win'] < -.05)]
player_data_corr.sort_values('win')[['win']]

Unnamed: 0,win
def_rtg,-0.457735
trb,0.05007
fg,0.060092
ast,0.060231
pts,0.065686
drb,0.065905
fg3,0.066131
fg3_pct,0.096947
fg_pct,0.104209
off_rtg,0.111082


In [7]:
del team_data, team_data_corr, player_data, player_data_corr

In [161]:
team_features = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=processed_team_data_table_name),sep='|', low_memory=False)
team_features.shape

(4988, 127)

In [162]:
player_features = pd.read_csv('{data_path}/{db_name}.csv'.format(data_path=data_path,db_name=processed_player_data_table_name),sep='|', low_memory=False)
player_features.shape

(4989, 731)

In [163]:
team_features = team_features.merge(player_features)
team_features.shape

(4988, 857)

In [164]:
team_features[['team_game_key']].sort_values('team_game_key')

Unnamed: 0,team_game_key
4,"['2017-11-04', 'chi', 'nop']"
0,"['2017-11-04', 'dal', 'min']"
2,"['2017-11-04', 'den', 'gsw']"
9,"['2017-11-04', 'det', 'sac']"
3,"['2017-11-04', 'gsw', 'den']"
...,...
4982,"['2019-06-07', 'tor', 'gsw']"
4984,"['2019-06-10', 'gsw', 'tor']"
4985,"['2019-06-10', 'tor', 'gsw']"
4987,"['2019-06-13', 'gsw', 'tor']"


In [165]:
invalid_columns = {'ast', 'ast_pct', 'blk', 'blk_pct', 'def_rtg', 'drb', 'drb_pct', 'efg_pct',
                                   'fg', 'fg3', 'fg3_pct', 'fg3a', 'fg3a_per_fga_pct', 'fg_pct', 'fga', 'ft', 'ft_pct',
                                   'fta', 'fta_per_fga_pct', 'mp', 'off_rtg', 'orb', 'orb_pct', 'pf',
                                   'plus_minus', 'pts', 'stl', 'stl_pct', 'tov', 'tov_pct', 'trb', 'trb_pct', 'ts_pct',
                                   'usg_pct', 'team_tag', 'team_link', 'team_name', 'opponent_tag', 'opponent_name', 'opponent_link', 'location',
                  'date_str', 'game_key', 'team_game_key'}
invalid_columns.update({i for i in team_features.columns if 'postgame' in i})


In [166]:
team_features_corr = team_features[[i for i in team_features.columns if i not in invalid_columns]].corr()
team_features_corr = team_features_corr[(team_features_corr[target] > .1)|(team_features_corr[target] < -.1)]
team_features_corr.sort_values(target)[[target]]

Unnamed: 0,win
feature_team_rl_avg_def_rtg_25_player_aggregate_mean,-0.14383
feature_team_rl_avg_def_rtg_25_player_aggregate_amax,-0.14383
feature_team_rl_avg_def_rtg_25_player_aggregate_median,-0.14383
feature_team_rl_avg_def_rtg_25_player_aggregate_amin,-0.14383
feature_team_rl_avg_def_rtg_25,-0.14383
feature_player_rl_avg_def_rtg_25_player_aggregate_mean,-0.138702
feature_player_rl_avg_def_rtg_25_player_aggregate_median,-0.136087
feature_player_rl_avg_def_rtg_25_player_aggregate_amin,-0.133541
feature_player_rl_avg_def_rtg_5_player_aggregate_mean,-0.120113
feature_team_rl_avg_def_rtg_5_player_aggregate_amin,-0.120069


In [167]:
team_features.shape

(4988, 857)

In [168]:

def evaluate_column_vs_target(input_series, target_array, standardization_method):
    print('input_series: {0}, target_array: {1}, standardization_method:{2}'.format(input_series.shape, target_array.shape, standardization_method))

    input_series = input_series.replace(np.inf, np.nan).replace(-np.inf, np.nan)
    if input_series.isna().sum() / input_series.shape[0] < 1.0:
        input_series = input_series.fillna(combined_column.median())
    input_series = input_series.fillna(0)

    if standardization_method:
        transformer = eval(standardization_method)
        input_series = transformer.fit_transform(input_series.values.reshape(-1, 1)).flatten()
    else:
        input_series = input_series.values.reshape(-1, 1).flatten()

    slope, intercept, r_value, p_value, std_err = stats.linregress(input_series, target_array)
    return {'slope':slope,
                'intercept':intercept,
                'r_value':r_value,
                'r2_value':r_value*r_value,
                'p_value':p_value,
                 'std_err':std_err
                }

In [119]:

valid_columns = [i for i in team_features.columns if i not in invalid_columns | {target}]
interactions = ['product', 'division']
# standardization_methods = [None, 'StandardScaler()']
standardization_methods = [None]
standardization_method = None

target_array = team_features[target].values

results = list()
print('checking columns')
for counter1, column1 in enumerate(valid_columns):
    next_column = team_features[column1].copy()
    next_results = evaluate_column_vs_target(next_column, target_array, standardization_method)
    next_results.update({'column1':column1,
                         'column2':None,
                         'column3':None,
                'interaction':None})
    results.append(next_results)


#     for counter2, column2 in enumerate(valid_columns):
#         for interaction in  interactions:
#             if interaction == 'product':
#                 combined_column = next_column * team_features[column2]
#             if interaction == 'division':
#                 combined_column = next_column / team_features[column2]

#             next_results = evaluate_column_vs_target(combined_column, target_array, standardization_method)
#             next_results.update({'column1':column1,
#                                  'column2':column2,
#                                  'column3':None,
#                         'interaction':interaction})
#             results.append(next_results)
#             print(counter1, counter2, len(valid_columns))

#         for counter3, column3 in enumerate(valid_columns):
#             combined_column = next_column * team_features[column2]
#             combined_column = next_column * team_features[column3]
#             next_results = evaluate_column_vs_target(combined_column, target_array, standardization_method)
#             next_results.update({'column1':column1,
#              'column2':column2,
#              'column3':column3,
#              'interaction':'product'})
#             print(counter1, counter2, counter3, len(valid_columns))
            
results_df = pd.DataFrame.from_dict(results)
results_df.head()

checking columns
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,)

  X -= avg[:, None]
  slope = r_num / ssxm
  sterrest = np.sqrt((1 - r**2) * ssym / ssxm / df)


input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (

input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (

input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (

input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (

input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (3858,), standardization_method:None
input_series: (3858,), target_array: (

Unnamed: 0,slope,intercept,r_value,r2_value,p_value,std_err,column1,column2,column3,interaction
0,0.0,0.5,0.0,0.0,1.0,0.016464,year,,,
1,-7.198484e-21,0.5,-5.757109999999999e-20,3.314431e-39,1.0,0.002014,month,,,
2,-1.6702399999999998e-20,0.5,-3.037835e-19,9.228438999999999e-38,1.0,0.000885,day,,,
3,0.1732678,0.415657,0.1732073,0.03000075,2.301254e-27,0.015866,feature_home,,,
4,,,,,,,feature_team_rl_avg_ast_5,,,


In [120]:
results_df = results_df[['column1', 'column2', 'interaction', 'r2_value', 'slope', 'intercept', 'r_value', 'p_value', 'std_err']]

In [121]:
results_df.sort_values('r2_value', ascending = False).head(50)

Unnamed: 0,column1,column2,interaction,r2_value,slope,intercept,r_value,p_value,std_err
74,feature_team_pregame_rating_0,,,0.0467726,0.000236812,0.251748,0.2162697,4.5969229999999996e-42,1.7e-05
444,feature_home_player_aggregate_median,,,0.03000075,0.1732678,0.415657,0.1732073,2.301254e-27,0.015866
445,feature_home_player_aggregate_amax,,,0.03000075,0.1732678,0.415657,0.1732073,2.301254e-27,0.015866
3,feature_home,,,0.03000075,0.1732678,0.415657,0.1732073,2.301254e-27,0.015866
446,feature_home_player_aggregate_amin,,,0.03000075,0.1732678,0.415657,0.1732073,2.301254e-27,0.015866
443,feature_home_player_aggregate_mean,,,0.03000075,0.1732678,0.415657,0.1732073,2.301254e-27,0.015866
75,feature_team_pregame_rating_1,,,0.02917637,5.284278e-05,0.412335,0.1708109,1.200147e-26,5e-06
77,feature_team_pregame_rating_3,,,0.008774505,0.001477172,-0.977574,0.09367233,5.569863e-09,0.000253
799,feature_team_pregame_rating_2_player_aggregate...,,,0.006252428,1.352578e-05,0.466992,0.0790723,8.765149e-07,3e-06
801,feature_team_pregame_rating_2_player_aggregate...,,,0.006252428,1.352578e-05,0.466992,0.0790723,8.765149e-07,3e-06


In [169]:
valid_columns = [i for i in team_features.columns if i not in invalid_columns | {target}]
max_iter = 10000

lgbm_params =  {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'binary_error',
"learning_rate": 0.1,
"max_depth": -1,
'num_leaves':31,
}
    

def get_model_score(x, y):
    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=.3333, random_state=1)
    train_x, val_x, train_y, val_y = train_test_split(x, y, test_size=.5, random_state=1)
    lgtrain = lgb.Dataset(train_x, train_y)
    lgvalid = lgb.Dataset(val_x, val_y)

    model = lgb.train(
        lgbm_params,
        lgtrain,
        num_boost_round=max_iter,
        valid_sets=[lgtrain, lgvalid],
        valid_names=['train', 'valid'],
        early_stopping_rounds=10,
        verbose_eval=0
    )
    return accuracy_score(test_y, np.rint(model.predict(test_x, num_iteration=model.best_iteration)))
    

    


In [170]:
team_features.shape

(4988, 857)

In [171]:
get_model_score(team_features[valid_columns], team_features[target])

0.6127480457005412

In [176]:
    

interactions = ['product', 'division']

def add_best_feature(team_features, starting_columns, interactions = False):
    for i in valid_columns:
        team_features[i] = team_features[i].replace(np.inf, np.nan).replace(-np.inf, np.nan)
        if team_features[i].isna().sum() / team_features[i].shape[0] < 1.0:
            team_features[i] = team_features[i].fillna(team_features[i].median())
        team_features[i] = team_features[i].fillna(0)

    results = list()
    for i in valid_columns:
        score = get_model_score(team_features[starting_columns + [i]], team_features[target])
        results.append({'column1':i,
                               'column2':None,
                               'interaction':None,
                               'score':score})
    
    if interactions:
        for i in valid_columns:
            for j in valid_columns:
                for k in interactions:
                    if k == 'product':
                        input_series = team_features[i] * team_features[j]
                    if k == 'division':
                        input_series = team_features[i] / team_features[j]

                    input_series = input_series.replace(np.inf, np.nan).replace(-np.inf, np.nan)
                    if input_series.isna().sum() / input_series.shape[0] < 1.0:
                        input_series = input_series.fillna(combined_column.median())
                    input_series = input_series.fillna(0)
                    team_features['temp_column'] = input_series

                    score = get_model_score(team_features[starting_columns + ['temp_column']], team_features[target])
                    print(score)
                    results.append({'column1':i,
                                   'column2':j,
                                   'interaction':k,
                                   'score':score})

    results = sorted(results, key = lambda x: x['score'], reverse = True)
    print(results[0:5])
    
    for i in results:
        if i['interaction'] == 'product':
            input_series = team_features[i['column1']] * team_features[i['column2']]
            col_name = '{0}_product_{1}'.format(i['column1'], i['column2'])
        if i['interaction'] == 'division':
            input_series = team_features[i['column1']] / team_features[i['column2']]
            col_name = '{0}_division_{1}'.format(i['column1'], i['column2'])
        if not i['interaction']:
            col_name = i['column1']
        if i['interaction']:
            input_series = input_series.replace(np.inf, np.nan).replace(-np.inf, np.nan)
            if input_series.isna().sum() / input_series.shape[0] < 1.0:
                input_series = input_series.fillna(combined_column.median())
            input_series = input_series.fillna(0)
            team_features[col_name] = input_series
        
        if col_name not in starting_columns:
            starting_columns.append(col_name)
            break
            
    return team_features, starting_columns
    


In [177]:
current_columns = ['feature_home', 'feature_team_pregame_rating_1']

['feature_home', 'feature_team_pregame_rating_1', 'feature_player_rl_avg_usg_pct_5_player_aggregate_amin', 'feature_player_rl_avg_ast_pct_25_player_aggregate_median']

while len(current_columns) < 10:
    team_features, current_columns= add_best_feature(team_features, current_columns, interactions = False)
    print(current_columns)
    

[{'column1': 'feature_player_rl_avg_usg_pct_5_player_aggregate_amin', 'column2': None, 'interaction': None, 'score': 0.6319903788334336}, {'column1': 'feature_player_rl_avg_tov_pct_25_player_aggregate_amin', 'column2': None, 'interaction': None, 'score': 0.629585087191822}, {'column1': 'feature_player_rl_avg_ft_25_player_aggregate_mean', 'column2': None, 'interaction': None, 'score': 0.6265784726398076}, {'column1': 'feature_player_rl_avg_fg3a_25_player_aggregate_amin', 'column2': None, 'interaction': None, 'score': 0.6229705351773902}, {'column1': 'feature_player_rl_avg_drb_25_player_aggregate_amin', 'column2': None, 'interaction': None, 'score': 0.6211665664461816}]
['feature_home', 'feature_team_pregame_rating_1', 'feature_player_rl_avg_usg_pct_5_player_aggregate_amin']
[{'column1': 'feature_player_rl_avg_ast_pct_25_player_aggregate_median', 'column2': None, 'interaction': None, 'score': 0.6331930246542393}, {'column1': 'feature_player_rl_avg_fta_per_fga_pct_25_player_aggregate_amax

KeyboardInterrupt: 