In [80]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [81]:
import sys
import os

REPO_NAME = 'sewer-nfl'
CWD = str(os.getcwd())
REPO_DIR = CWD[:CWD.find(REPO_NAME)+len(REPO_NAME)]
sys.path.insert(0,REPO_DIR)

In [82]:
from models._utilities.data.pipe_layer import build_training_dataset
from warehouse.config import Configuration # At model level, swictch this to model's config
config = Configuration()
t = build_training_dataset(config)

2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


  


In [95]:

import pickle
import xgboost as xgb
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

NUMERIC_META_COLS = [
    'season',
    'week',
    'spread_line',
    'home_score',
    'away_score',
    'home_cover',
    'within_three'	
]

class Chrystal_Ball:
    '''

    Model object designed to generate predictions on a series of live or test data
    - Will be stored as a .pkl file

    '''

    def __init__(self,
                 training_data,
                 test_years = [2022],
                 response = 'home_cover', # Options: ['home_cover','spread_line','within_three']
                 ):

        self.training_data = training_data
        self.test_years = test_years
        self.response = response

        self.predictors = [c for c in self.training_data.columns if c not in \
                           NUMERIC_META_COLS and c in self.training_data.select_dtypes(np.number)]

        self.train_test_split()
        self.model = XGBClassifier(eta = 0.01, reg_lambda=1, min_child_weight=1)
        self.params = {"objective": "multi:softprob", "tree_method": "gpu_hist", "num_class": 2}
        self.model.fit(self.X_train, self.y_train)

    def train_test_split(
            self,
            mode = 'years'
    ):
        if mode == 'years':
            mask = self.training_data['season'].isin(self.test_years)
            self.train_data = self.training_data[~mask]
            self.test_data = self.training_data[mask]

        self.X_train = self.train_data[self.predictors]
        self.X_test = self.test_data[self.predictors]
        self.y_train = self.train_data[self.response]
        self.y_test = self.test_data[self.response]
        self.dtrain = xgb.DMatrix(self.X_train, self.y_train, enable_categorical=True)
        self.dtest = xgb.DMatrix(self.X_test, self.y_test, enable_categorical=True)

    def assess_on_test(self):
        self.y_preds = self.model.predict(self.X_test)
        self.y_proba = self.model.predict_proba(self.X_test)
        return sum(self.y_preds == self.y_test) / len(self.y_preds)
    
    def test_results(self):
        self.assess_on_test()
        return pd.concat([self.y_preds,self.y_proba,self.y_test])


c = Chrystal_Ball(
    training_data = t
)

In [98]:
c.y_proba

array([[0.21804887, 0.7819511 ],
       [0.54102314, 0.45897686],
       [0.7945338 , 0.2054662 ],
       [0.8125094 , 0.18749061],
       [0.18770862, 0.8122914 ],
       [0.70888585, 0.29111415],
       [0.68724924, 0.31275076],
       [0.78019595, 0.21980403],
       [0.7338624 , 0.2661376 ],
       [0.18770862, 0.8122914 ],
       [0.67617095, 0.32382905],
       [0.4699788 , 0.5300212 ],
       [0.21207017, 0.78792983],
       [0.2157067 , 0.7842933 ],
       [0.8093142 , 0.19068582],
       [0.51963633, 0.48036367],
       [0.24123341, 0.7587666 ],
       [0.6150781 , 0.38492188],
       [0.52838576, 0.47161424],
       [0.18842864, 0.81157136],
       [0.63055813, 0.3694419 ],
       [0.26824462, 0.7317554 ],
       [0.35410088, 0.6458991 ],
       [0.21468854, 0.78531146],
       [0.772954  , 0.22704603],
       [0.8125094 , 0.18749061],
       [0.8125094 , 0.18749061],
       [0.18770862, 0.8122914 ],
       [0.56147563, 0.43852437],
       [0.5429758 , 0.45702422],
       [0.