In [231]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [232]:
import sys
import os

REPO_NAME = 'sewer-nfl'
CWD = str(os.getcwd())
REPO_DIR = CWD[:CWD.find(REPO_NAME)+len(REPO_NAME)]
sys.path.insert(0,REPO_DIR)

In [233]:
from models._utilities.data.pipe_layer import build_training_dataset
from warehouse.config import Configuration # At model level, swictch this to model's config
config = Configuration()
t = build_training_dataset(config)

2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


  # Add filter


In [234]:

import pickle
import xgboost as xgb
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

NUMERIC_META_COLS = [
    'season',
    'week',
    'home_score',
    'away_score',
    'home_cover',
    'within_three',
    'missing_N'	
]

class Chrystal_Ball:
    '''

    Model object designed to generate predictions on a series of live or test data
    - Will be stored as a .pkl file

    '''

    def __init__(self,
                 training_data,
                 test_years = [2022],
                 response = 'home_cover', # Options: ['home_cover','spread_line','within_three']
                 ):

        self.training_data = training_data
        self.test_years = test_years
        self.response = response

        self.predictors = [c for c in self.training_data.columns if c not in \
                           NUMERIC_META_COLS and c in self.training_data.select_dtypes(np.number)]

        self.train_test_split()
        self.model = XGBClassifier(eta = 0.01, reg_lambda=1, min_child_weight=1)
        self.params = {"objective": "multi:softprob", "tree_method": "gpu_hist", "num_class": 2}
        self.model.fit(self.X_train, self.y_train)
        self.assess_on_test()

    def train_test_split(
            self,
            mode = 'years'
    ):
        if mode == 'years':
            mask = self.training_data['season'].isin(self.test_years)
            self.train_data = self.training_data[~mask]
            self.test_data = self.training_data[mask]

        self.X_train = self.train_data[self.predictors]
        self.X_test = self.test_data[self.predictors]
        self.y_train = self.train_data[self.response]
        self.y_test = self.test_data[self.response]
        self.dtrain = xgb.DMatrix(self.X_train, self.y_train, enable_categorical=True)
        self.dtest = xgb.DMatrix(self.X_test, self.y_test, enable_categorical=True)

    def assess_on_test(self):
        self.y_preds = self.model.predict(self.X_test)
        self.y_proba = [x[1] for x in self.model.predict_proba(self.X_test)]
    
    def test_results(self):
        return pd.concat([self.y_preds,self.y_proba,self.y_test])


c = Chrystal_Ball(
    training_data = t
)

In [235]:
cols = {'y_pred' : c.y_preds,'y_proba':c.y_proba}
data_cols = {key:c.test_data[key] for key in c.test_data.columns}
cols.update(data_cols)
test_res = pd.DataFrame(cols)
test_res['conf'] = test_res.apply(lambda x: 1 - x['y_proba'] if x['y_pred'] == 0 else x['y_proba'], axis = 1)
mask = test_res.groupby('week')['conf'].nlargest(5).index
top_N = test_res[test_res.index.isin(mask.droplevel(0))][['y_pred','y_proba','conf','home_score','away_score','home_cover']]

In [236]:
test_res

Unnamed: 0,y_pred,y_proba,season,week,home_team,away_team,spread_line,home_score,away_score,home_cover,within_three,team_x,home_turnover_propensity,home_def_turnover_propensity,home_balanced_player_efficacy,home_points_per_epa,home_off_coaching,home_def_coaching,home_conservative_coverage,home_offensive_scoring_ability,home_defensive_scoring_allow,home_off_big_play_propensity,home_defense_big_play_propensity,home_normaltime_epa,home_garbagetime_epa,home_overall_coaching,home_yards_per_carry,home_yards_per_pass,home_epa_per_rush,home_epa_per_pass,home_off_epa_x,home_pct_pass,home_pct_run,home_team_HHI,home_team_passing_HHI,home_def_yards_per_pass,home_defteam_x,home_def_yards_per_rush,home_def_pass_epa,home_def_rush_epa,...,away_def_yards_per_rush,away_def_pass_epa,away_def_rush_epa,away_def_points_per_drive,away_points_per_RZ,away_def_points_per_RZ,away_off_ppg,away_defteam_y,away_def_ppg,away_proportion_leading,away_proportion_leading_three,away_turnover_rate_x,away_turnover_rate_y,away_posteam_score,away_off_epa_y,away_qb_aggr,away_def_aggr,away_def_box_stuff_rate,away_def_cushion,away_def_separation,away_off_avg_throw_dist,away_plays_over_25_yd,away_td_over_25_yd,away_def_plays_over_25_yd,away_def_td_over_25_yd,away_team_scr,away_defteam_scr,away_off_qb_comp,away_def_qb_comp,away_off_qbhit,away_def_qbhit,away_score_diff,away_first_drive_pts_avg,away_first_drive_pts_avg_allowed,away_trailing_pct_air_yards,away_trailing_pct_yac,away_h2_first_drive_pts_avg,away_h2_first_drive_pts_avg_allowed,missing_N,conf
1638,1,0.573011,2022,4,TB,ATL,-10.5,21,15,0,0,TB,0.702736,0.571642,1.120453,46.704936,0.480403,-0.673071,0.735521,1.056903,0.791939,0.758558,-0.231888,-0.302679,-0.161638,0.601687,2.692662,6.147456,-1.218266,0.116273,-0.078137,0.679214,0.320786,0.216337,0.230166,5.217874,TB,4.294184,-0.163983,-0.093198,...,4.994177,0.037506,0.151082,2.556250,4.419643,5.512500,25.75,ATL,24.50,0.474118,0.224661,0.115259,0.150565,23.0,1.907941,20.821832,15.995883,,5.892623,2.443068,11.315392,2.25,0.00,2.25,0.25,46.50,52.25,0.526721,0.657584,0.062096,0.063337,3.0,3.25,2.50,0.606829,0.393171,2.50,3.25,0.029197,0.573011
1639,1,0.533222,2022,4,MIN,CHI,-10.0,29,22,0,0,MIN,0.761343,0.082703,1.052947,44.486841,0.569277,-0.423000,0.673228,1.222868,1.541637,0.757172,0.105444,-0.011671,-0.234245,0.746980,4.635758,6.179829,-0.055918,0.016480,-0.029361,0.653044,0.346956,0.231445,0.322475,7.304215,MIN,4.866223,0.125771,0.038156,...,5.014537,-0.055942,0.044297,1.886364,4.354167,4.412500,15.25,CHI,19.25,0.274945,0.069561,0.099941,0.099217,12.0,-5.362024,,,,,,,2.00,0.25,2.25,0.00,33.50,48.50,0.419387,0.561844,0.086044,0.037469,-8.0,3.25,0.75,0.539096,0.460904,4.25,1.50,0.058394,0.533222
1640,1,0.547000,2022,4,BAL,CIN,-3.0,19,17,0,1,BAL,0.665560,0.451571,1.150676,57.191578,0.695250,-0.648500,0.604865,1.522398,1.506852,0.998304,0.191253,0.160785,0.132295,0.749937,5.754788,7.031068,0.476891,0.492183,0.125920,0.561459,0.438541,0.179501,0.251698,6.912715,BAL,5.090110,0.068960,0.030792,...,3.811412,-0.127992,-0.077146,1.363636,5.300000,3.666667,22.75,CIN,16.00,0.473372,0.319662,0.085181,0.153026,27.0,6.831907,16.500135,20.595214,15.378788,6.669498,2.730943,7.699869,1.50,0.50,2.50,0.00,55.75,40.00,0.585187,0.561121,0.084920,0.086807,12.0,4.25,3.25,0.484876,0.515124,2.50,0.00,0.000000,0.547000
1641,1,0.542304,2022,4,LA,DAL,-5.5,10,22,0,0,LA,1.226602,-0.110096,0.881293,39.299959,0.694927,-0.728500,1.049331,1.002951,1.232665,0.652397,-0.092792,0.161715,-0.229839,0.721727,3.555556,6.039797,-0.288350,-0.156320,-0.064398,0.664727,0.335273,0.287289,0.388721,7.095918,LA,3.937500,0.168456,-0.092710,...,5.156186,-0.153961,0.076967,1.424126,,5.145833,17.00,DAL,15.50,0.663033,0.399729,0.044389,0.042357,25.0,-10.443115,24.045235,16.204597,32.653750,5.930047,3.080103,7.840463,2.50,0.25,1.00,0.25,41.75,45.00,0.570640,0.555800,0.058033,0.101171,15.0,3.25,1.50,0.587206,0.412794,0.00,1.50,0.014599,0.542304
1642,1,0.656925,2022,4,NE,DET,-3.0,29,0,1,0,NE,0.819164,0.184330,1.095851,39.183613,0.856228,-0.654286,0.803298,1.138134,1.420417,0.840481,-0.151527,,,0.852548,4.699675,6.445035,0.319185,-0.506183,-0.019523,0.538216,0.461784,0.214688,0.312502,5.976147,NE,5.305416,-0.012630,0.128000,...,5.678208,0.209317,0.244674,2.768007,6.541667,5.798810,34.50,DET,35.25,0.497143,0.344551,0.048078,0.025288,44.0,18.046932,16.033508,15.058601,24.499030,4.815805,3.036993,9.126408,4.00,0.50,3.25,0.75,59.50,63.50,0.590257,0.615881,0.058453,0.071978,-3.0,1.75,1.75,0.503207,0.496793,2.50,3.50,0.021898,0.656925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1839,0,0.489794,2022,17,CHI,MIN,6.5,13,29,0,0,CHI,0.842390,0.197450,0.872773,38.562463,0.529339,-1.032000,0.469748,0.854750,1.745870,0.694267,0.421847,-0.054044,-0.266142,0.507870,5.949451,4.732426,0.104421,-1.463566,-0.191865,0.515358,0.484642,0.289894,0.696126,6.951386,CHI,6.290748,0.021426,0.277574,...,4.921726,0.071178,0.042365,2.386616,5.512500,5.029762,24.75,MIN,32.50,0.247934,0.077540,0.180168,0.061458,16.0,-10.926062,16.190272,16.078713,8.854167,6.895963,2.486032,8.675107,2.25,0.50,2.50,0.75,54.25,52.00,0.618714,0.625472,0.108356,0.061448,-24.0,0.00,0.75,0.544639,0.455361,0.00,2.50,0.000000,0.510206
1841,0,0.433570,2022,17,PHI,NYG,-16.5,22,16,0,0,PHI,0.764636,0.334973,0.963750,56.219778,0.899630,-0.497500,0.843471,1.500396,1.341111,1.043532,-0.136673,-0.014785,-0.335441,0.935635,4.889583,7.241243,0.150603,0.183604,0.045743,0.592957,0.407043,0.249311,0.368226,5.293780,PHI,4.644364,-0.091088,0.008319,...,5.922803,-0.124661,0.211081,2.165278,4.895833,4.833333,25.00,NYG,23.50,0.417930,0.337860,0.063846,0.049964,38.0,12.690416,11.441799,,7.668067,6.601831,2.914929,5.264856,2.00,0.25,1.75,0.75,51.00,50.50,0.668773,0.596031,0.082608,0.084837,28.0,0.00,1.75,0.536926,0.463074,2.50,2.50,0.014599,0.566430
1842,1,0.569947,2022,17,MIA,NYJ,-3.5,11,6,1,0,MIA,1.040229,0.252292,1.030946,43.679876,0.429540,-0.573786,1.007780,1.389473,1.466367,0.838819,-0.335296,-0.034567,0.072669,0.603376,5.025712,6.894733,0.480560,-0.310584,0.004058,0.597477,0.402523,0.239492,0.365195,5.941703,MIA,4.294341,0.073838,-0.061107,...,4.419221,0.034110,-0.068773,1.707343,3.375000,4.300000,9.50,NYJ,20.25,0.108687,0.011494,0.150758,0.018002,6.0,-21.640078,20.624882,15.391398,22.820766,6.146743,3.201792,10.247728,2.00,0.25,1.50,0.25,41.25,42.75,0.499584,0.577263,0.103064,0.071396,-17.0,0.75,1.75,0.667333,0.332667,1.75,4.00,0.007299,0.569947
1843,1,0.589555,2022,17,ATL,TB,-6.0,30,17,1,0,ATL,0.606269,0.217214,1.035454,31.828770,0.593530,-0.756571,0.691190,1.183700,1.356709,0.530404,-0.035028,,,0.645995,4.638609,5.392989,0.034652,-0.617925,-0.059916,0.466244,0.533756,0.232940,0.398911,7.265581,ATL,4.742345,0.223120,-0.045747,...,4.184964,0.084232,-0.173661,2.218074,4.666667,5.591667,19.00,TB,27.25,0.337693,0.132418,0.177437,0.131952,30.0,8.966685,15.666035,12.275352,51.071845,6.438406,3.145368,6.709426,1.25,0.75,2.50,0.75,54.00,49.25,0.654256,0.636772,0.070391,0.067156,6.0,1.50,3.50,0.488551,0.511449,0.00,2.50,0.014599,0.589555


In [237]:
test_res['win'] = test_res['y_pred']==test_res['home_cover']
test_res['pick'] = test_res.apply(lambda x: x['home_team'] if x['y_pred']==1 else x['away_team'],axis=1)

In [238]:
sum(top_N['y_pred']==top_N['home_cover'])/len(top_N)

0.4857142857142857

In [239]:
test_res[test_res.index.isin(mask.droplevel(0))][['home_team','away_team','conf','week','y_pred','home_cover','win','pick']].head(24)

Unnamed: 0,home_team,away_team,conf,week,y_pred,home_cover,win,pick
1638,TB,ATL,0.573011,4,1,0,False,TB
1642,NE,DET,0.656925,4,1,1,True,NE
1645,CLE,LAC,0.599141,4,0,0,True,LAC
1646,KC,LV,0.619136,4,1,0,False,KC
1648,GB,NYG,0.576206,4,1,0,False,GB
1654,SEA,ARI,0.601921,5,0,1,False,ARI
1659,PHI,DAL,0.607778,5,1,1,True,PHI
1661,IND,JAX,0.545972,5,1,1,True,IND
1664,GB,NYJ,0.549554,5,0,0,True,NYJ
1667,CHI,WAS,0.566596,5,1,0,False,CHI
