In [74]:
import pandas as pd
import numpy as np

from sklearn.metrics import brier_score_loss, log_loss

In [3]:
data_folder = './kaggle_data/'

mens_reg = pd.read_csv(f'{data_folder}MRegularSeasonCompactResults.csv')

In [16]:
elo_df = mens_reg[['Season', 'WTeamID', 'LTeamID', 'WLoc']].copy()

In [12]:
elo_df.head()

Unnamed: 0,Season,WTeamID,LTeamID,WLoc
0,1985,1228,1328,N
1,1985,1106,1354,H
2,1985,1112,1223,H
3,1985,1165,1432,H
4,1985,1192,1447,H


In [58]:
elo_dict = {team : 1500 for team in set(pd.concat([elo_df['WTeamID'], elo_df['LTeamID']]))}

In [76]:
def process_elo()

elo_dict = {team : 1500 for team in set(pd.concat([elo_df['WTeamID'], elo_df['LTeamID']]))}
winner_probs = []
season = 1985

for s, w, l, h in zip(elo_df['Season'], elo_df['WTeamID'], elo_df['LTeamID'], elo_df['WLoc']):

    if s != season:
        elo_dict = {team: ((0.87 * value + 0.13 * 1500)) for team, value in elo_dict.items()}
        season += 1

    winner_prob = update_elo(winner=w, loser=l, home_adv=h, base_k=49)

    winner_probs.append(winner_prob)

0.17947289163681615


In [52]:
def update_elo(winner='1134', loser='1177', home_adv='H', base_k=49):
    
    if home_adv == 'H':
        adv = 105
    elif home_adv == 'A':
        adv = -105
    else:
        adv = 0

    prematch_winner_elo = elo_dict[winner] + adv
    prematch_loser_elo = elo_dict[loser]

    exp_a = 1 / \
        (1 + 10 ** ((prematch_loser_elo - prematch_winner_elo)/400))
    exp_b = 1 - exp_a

    winner_delta = base_k * (1 - exp_a)
    loser_delta = base_k * (0 - exp_b)

    elo_dict[winner] = elo_dict[winner] + winner_delta
    elo_dict[loser] = elo_dict[loser] + loser_delta
    
    return exp_a

In [98]:
class NCAAELO:

    def __init__(self, df: pd.DataFrame, base_k: int = 49, home_adv: int = 105):
        self.df = df
        self.teams = set(
            pd.concat([self.df['WTeamID'], self.df['LTeamID']]))
        self.winners = self.df.WTeamID
        self.losers = self.df.LTeamID
        self.seasons = self.df.Season
        self.win_loc = self.df.WLoc
        self.elo_dict = {name: 1500 for name in self.teams}
        self.base_k = base_k
        self.home_advantage = home_adv
        self.processed = False
        self.winner_elo = []
        self.loser_elo = []
        self.winner_probs = []

    def update_elo(self, winner=None, loser=None, win_loc=None):

        if win_loc == 'H':
            adv = self.home_advantage
        elif win_loc == 'A':
            adv = -self.home_advantage
        else:
            adv = 0

        prematch_winner_elo = self.elo_dict[winner] + adv
        prematch_loser_elo = self.elo_dict[loser]

        exp_a = 1 / \
            (1 + 10 ** ((prematch_loser_elo - prematch_winner_elo)/400))
        exp_b = 1 - exp_a

        winner_delta = self.base_k * (1 - exp_a)
        loser_delta = self.base_k * (0 - exp_b)

        self.elo_dict[winner] = self.elo_dict[winner] + winner_delta
        self.elo_dict[loser] = self.elo_dict[loser] + loser_delta

        return exp_a

    def process_elo(self):
        
        if self.processed == True:
            return "already processed"
        
        season_ = min(self.seasons)

        for s, w, l, h in zip(self.seasons, self.winners, self.losers, self.win_loc):

            if s != season_:
                self.elo_dict = {team: ((0.87 * value + 0.13 * 1500)) for team, value in self.elo_dict.items()}
                season_ += 1

            winner_prob = self.update_elo(winner=w, loser=l, win_loc=h)

            self.winner_probs.append(winner_prob)
        
        self.processed=True
        
        return self

In [99]:
test = NCAAELO(mens_reg)

In [100]:
test.process_elo()

<__main__.NCAAELO at 0x1fdaec3ff70>

In [101]:
test.winner_probs

[0.5,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.535200008375674,
 0.5,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.5,
 0.6466688423256146,
 0.5,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.35333115767438533,
 0.6466688423256146,
 0.6466688423256146,
 0.376421380455657,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.35333115767438533,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.35333115767438533,
 0.6466688423256146,
 0.535200008375674,
 0.6466688423256146,
 0.35333115767438533,
 0.35333115767438533,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.6466688423256146,
 0.6138193140913419,
 0.6466688423256146,
 0.3309057077450087,
 0.6466688423256146,
 0.4975178330701085,
 0.6466688423256146,
 0.46727072002477155,
 0.6466688423256146,
 0.35333115767438533,
 0.35333115767438533,
 0.46479999162432595,
 0.6

In [102]:
brier_score_loss(len(mens_reg) * [1], test.winner_probs)

0.17947289163681615