In [1]:
import pandas as pd

data = pd.read_csv('games.csv', index_col=0)

In [2]:
# Cleaning / getting data ready for machine learning
data['venue'] = data['venue'].map({'Home' : 1, 'Away' : 0})     # convert venue to 1's and 0's
data['Date'] = pd.to_datetime(data['Date'])
data['opponent'] = data['Opponent'].astype('category').cat.codes # Converting opponent to integers
data = data.drop(columns=['Time'], inplace=False)                # Dropping time column
data = data[data["Team"] != "Arizona Coyotes"]                   # Team doesnt exist anymore

predict = ['opponent', 'venue']

In [3]:
data

Unnamed: 0,Date,Team,Opponent,venue,Att.,G,GA,S,S%,SV%,PIM,SA,SA%,Opponent SV%,Opponent PIM,Result,opponent
0,2021-12-12,Anaheim Ducks,St. Louis Blues,0,17010.0,3,2,39,7.7,0.920,2,25,8.0,0.923,2,1,25
1,2023-01-28,Anaheim Ducks,Arizona Coyotes,1,16126.0,2,1,45,4.4,0.971,20,34,2.9,0.956,10,1,1
2,2022-01-14,Anaheim Ducks,Minnesota Wild,0,18300.0,3,7,42,7.1,0.793,5,42,16.7,0.929,7,0,14
3,2024-11-05,Anaheim Ducks,Vancouver Canucks,1,13538.0,1,5,22,4.5,0.865,10,37,13.5,0.955,8,0,29
4,2022-10-12,Anaheim Ducks,Seattle Kraken,1,17530.0,5,4,27,18.5,0.917,15,48,8.3,0.815,11,1,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10491,2024-01-02,Winnipeg Jets,Tampa Bay Lightning,1,14157.0,4,2,28,14.3,0.941,6,34,5.9,0.889,6,1,26
10492,2025-03-07,Winnipeg Jets,New Jersey Devils,0,16088.0,6,1,35,17.1,0.957,4,23,4.3,0.829,4,1,17
10493,2023-02-20,Winnipeg Jets,New York Rangers,0,18006.0,4,1,21,19.0,0.980,13,51,2.0,0.810,7,1,19
10494,2025-03-09,Winnipeg Jets,Carolina Hurricanes,0,18700.0,2,4,22,9.1,0.885,24,27,14.8,0.909,8,0,5


In [4]:
# Function to compute rolling averages over the last 3 games. Games for which there isnt enough data (i.e. the first 3 games of each teams) are dropped

def rolling_averages(team, cols, new_cols):
    team = team.sort_values("Date")    # Getting team data organized chronologically
    rolling = team[cols].rolling(3, closed='left').mean()   # closed=left to ignore current row in sliding window
    team[new_cols] = rolling
    team = team.dropna(subset=new_cols) # dropping first rows because not enough data
    return team

In [5]:
cols = ['G', 'GA', 'S', 'S%', 'SV%', 'PIM', 'Result']   # wanted columns for rolling
new_cols = [f"{c}_rolling" for c in cols]
predictors = new_cols + predict                         # New predictors


games_data = data.groupby('Team').apply(lambda x: rolling_averages(x, cols, new_cols))   # Compute rolling averages
games_data = games_data.droplevel("Team")
games_data.index = range(games_data.shape[0])  # fixing index level
games_data

  games_data = data.groupby('Team').apply(lambda x: rolling_averages(x, cols, new_cols))   # Compute rolling averages


Unnamed: 0,Date,Team,Opponent,venue,Att.,G,GA,S,S%,SV%,...,Opponent PIM,Result,opponent,G_rolling,GA_rolling,S_rolling,S%_rolling,SV%_rolling,PIM_rolling,Result_rolling
0,2021-10-19,Anaheim Ducks,Edmonton Oilers,0,14082.0,5,6,36,13.9,0.861,...,4,0,11,2.666667,1.666667,26.000000,10.900000,0.959000,20.000000,0.666667
1,2021-10-21,Anaheim Ducks,Winnipeg Jets,0,13886.0,1,5,39,2.6,0.846,...,6,0,32,3.000000,3.333333,30.666667,9.466667,0.922333,16.000000,0.333333
2,2021-10-23,Anaheim Ducks,Minnesota Wild,0,18055.0,3,4,24,12.5,0.889,...,10,0,14,3.000000,4.333333,34.000000,9.200000,0.886667,9.000000,0.333333
3,2021-10-26,Anaheim Ducks,Winnipeg Jets,1,11951.0,3,4,35,8.6,0.840,...,6,0,32,3.000000,5.000000,33.000000,9.666667,0.865333,9.333333,0.000000
4,2021-10-28,Anaheim Ducks,Buffalo Sabres,1,12014.0,3,4,37,8.1,0.862,...,2,0,3,2.333333,4.333333,32.666667,7.900000,0.858333,6.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10146,2025-04-07,Winnipeg Jets,St. Louis Blues,1,15225.0,3,1,26,11.5,0.933,...,2,1,25,2.000000,2.666667,24.666667,8.833333,0.910333,3.333333,0.333333
10147,2025-04-10,Winnipeg Jets,Dallas Stars,0,18532.0,4,0,35,11.4,1.000,...,7,1,9,2.666667,1.666667,27.000000,10.900000,0.940667,4.000000,0.666667
10148,2025-04-12,Winnipeg Jets,Chicago Blackhawks,0,20634.0,5,4,42,9.5,0.875,...,4,1,6,2.666667,1.666667,31.333333,8.633333,0.940667,5.666667,0.666667
10149,2025-04-13,Winnipeg Jets,Edmonton Oilers,1,15225.0,1,4,18,5.6,0.921,...,4,0,11,4.000000,1.666667,34.333333,10.800000,0.936000,5.000000,1.000000


In [6]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

model = XGBClassifier(random_state=10)    # base model

In [7]:
from sklearn.metrics import precision_score

# Function to make predictions given the data, input features and chosen model

def make_predictions(data, predictors, model):
    train = data[data['Date'] < '2024-04-19']
    test = data[data['Date'] > '2024-04-19']
    model.fit(train[predictors], train['Result'])
    preds = model.predict(test[predictors])
    combined  = pd.DataFrame(dict(actual=test['Result'], prediction = preds), index=test.index)
    precision = precision_score(test['Result'], preds)
    return combined, precision

In [27]:
# Defining search space for GridSearchCV
search_grid = {
    'n_estimators': [100, 200, 500],
    'max_depth': [3, 6],
    'learning_rate': [0.001, 0.01, 0.1],
    'reg_alpha': [0, 1, 5, 10],
    'reg_lambda': [0, 1, 5, 10]

}

GS = GridSearchCV(
    estimator = model,
    param_grid = search_grid,
    scoring = ["accuracy", 'f1', 'roc_auc'],
    refit = "accuracy",
    cv = 5,
    verbose= 4
)

training = games_data[games_data['Date'] < '2024-04-19']  # Training using 2021-2024 data
testing = games_data[games_data['Date'] > '2024-04-19']   # Testing on most recent season (2024-2025)


In [28]:
GS.fit(training[predictors], training['Result'])     # Training

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
[CV 1/5] END learning_rate=0.001, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=0; accuracy: (test=0.580) f1: (test=0.648) roc_auc: (test=0.597) total time=   0.0s
[CV 2/5] END learning_rate=0.001, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=0; accuracy: (test=0.546) f1: (test=0.631) roc_auc: (test=0.564) total time=   0.0s
[CV 3/5] END learning_rate=0.001, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=0; accuracy: (test=0.541) f1: (test=0.640) roc_auc: (test=0.584) total time=   0.0s
[CV 4/5] END learning_rate=0.001, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=0; accuracy: (test=0.515) f1: (test=0.607) roc_auc: (test=0.526) total time=   0.0s
[CV 5/5] END learning_rate=0.001, max_depth=3, n_estimators=100, reg_alpha=0, reg_lambda=0; accuracy: (test=0.523) f1: (test=0.614) roc_auc: (test=0.540) total time=   0.0s
[CV 1/5] END learning_rate=0.001, max_depth=3, n_estimators=100, reg_al

0,1,2
,estimator,"XGBClassifier...ree=None, ...)"
,param_grid,"{'learning_rate': [0.001, 0.01, ...], 'max_depth': [3, 6], 'n_estimators': [100, 200, ...], 'reg_alpha': [0, 1, ...], ...}"
,scoring,"['accuracy', 'f1', ...]"
,n_jobs,
,refit,'accuracy'
,cv,5
,verbose,4
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [29]:
GS.best_score_

np.float64(0.5699867197875166)

In [30]:
new_model = GS.best_estimator_
new_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [31]:
combined, precision = make_predictions(games_data, predictors, new_model)
precision

0.5922705314009662

In [32]:
from sklearn.metrics import classification_report, roc_auc_score, log_loss
predictions = new_model.predict(testing[predictors])

print(classification_report(testing['Result'], predictions))

              precision    recall  f1-score   support

           0       0.56      0.68      0.61      1312
           1       0.59      0.47      0.52      1309

    accuracy                           0.57      2621
   macro avg       0.58      0.57      0.57      2621
weighted avg       0.58      0.57      0.57      2621



In [19]:
# Create DataFrame pairing features with their importances
importances = pd.DataFrame({
    'Feature': predictors,
    'Importance': new_model.feature_importances_
})

# Sort by importance
importances = importances.sort_values(by='Importance', ascending=False)

# Display top features
print(importances.head(10))

          Feature  Importance
8           venue    0.191895
2       S_rolling    0.171168
7        opponent    0.157679
6  Result_rolling    0.140662
1      GA_rolling    0.096419
5     PIM_rolling    0.066562
3      S%_rolling    0.063916
4     SV%_rolling    0.061327
0       G_rolling    0.050371


In [13]:
combined

Unnamed: 0,actual,prediction
243,1,1
244,0,0
245,1,0
246,0,0
247,0,0
...,...,...
10146,1,0
10147,1,0
10148,1,1
10149,0,1


In [None]:
combined = combined.merge(games_data[['Date', 'Team', 'Opponent', 'Result']], left_index=True, right_index=True)
combined

In [None]:
final = combined.merge(combined, left_on=['Date', 'Team'], right_on=['Date', 'Opponent'])  # few games will drop due to rolling windows
final

In [None]:
final[(final['prediction_x'] == 1) & (final['prediction_y'] == 0)]['actual_x'].value_counts()

In [None]:
final[(final['prediction_x'] == 0) & (final['prediction_y'] == 1)]['actual_y'].value_counts()

In [None]:
print("Overall accuracy turned out to be ~60%.")
print("However, when merging predictions for both games, we can see that the model is 63.3% accurate when both teams predictions match.")