# This is a mostly non-technical demo notebook for a CNN model that predicts March Madness games
##### Below are imports, more than are in use, just extra tools in case.

In [46]:
from __future__ import print_function
import sys
import csv
import tensorflow as tf
import sklearn
import en_core_web_sm
import keras
from keras import layers
from keras import backend as K
from keras.preprocessing.sequence import pad_sequences 
from keras.models import Sequential
from keras.layers import Dense
from keras.models import model_from_json
from keras_tqdm import TQDMNotebookCallback
from keras.models import Sequential
from keras.layers import Dropout, Activation
from keras.optimizers import SGD
from IPython import display
from keras.preprocessing.sequence import pad_sequences
import warnings
from collections import Counter
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#pd.set_option('display.max_columns', None)
pd.options.display.max_columns = 10
pd.set_option('display.max_rows', None)
%matplotlib inline
from matplotlib import rcParams
rcParams['font.family'] = 'monospace'
from matplotlib.ticker import MaxNLocator
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import os, eli5, shap
np.set_printoptions(threshold=sys.maxsize)
from eli5.sklearn import PermutationImportance
from pdpbox import pdp

In [47]:
#Create dataframes that will be used for performance comparison later
pastModels=[[2017,1,.43857],[2017,2,.44981],[2017,3,.45373],[2017,4,.46100],[2017,5,.46107],[2018,1,.53194],[2018,2,.53693],[2018,3,.54013],[2018,4,.54967],[2018,5,.54987],[2019,1,.41477],[2019,2,.42012],[2019,3,.42698],[2019,4,.42788],[2019,5,.43148]]
cols=['Year','Rank','LogLoss']
pastScores=pd.DataFrame(pastModels, columns=cols)
pastMadness=[[2017,65.7,82,86,79],[2018,57,81,124,98.2],[2019,74,124,153,99.9]]
cols=['Year','avg_score','higherSeed_score','myScore','percentile_myScore']
pastMadness=pd.DataFrame(pastMadness, columns=cols)

## Importing the data here and slicing it in different ways for training and testing model
##### We will only be showing testing model performance

In [48]:
tourney_data=pd.read_csv("tourney_game_master_test_actual.csv")
tourney_data.dropna(subset=['KP_AdjEM_diff'], how='all',inplace=True)
train_data=tourney_data[tourney_data['Season']<2017].reset_index(drop=True)
test_data=tourney_data[tourney_data['Season']>2016].reset_index(drop=True)


col_names = ['Season','tourn_round','point_diff','ATeamID','ATeam','ASeed','Aregion','BTeamID','BTeam','BSeed','Bregion']
train_other_data=train_data.iloc[:,51:62]
train_output_data['AWon']=train_data.AWon
train_output_data['Season']=train_data.Season
test_other_data=test_data.iloc[:,51:62]
test_output_data['AWon']=test_data.AWon
test_output_data['Season']=test_data.Season
scaler = StandardScaler()

scaler.fit(train_data.iloc[:,:51])
train_input_data = pd.DataFrame(scaler.transform(train_data.iloc[:,:51]), index=train_data.iloc[:,:51].index, columns=train_data.iloc[:,:51].columns)
test_input_data = pd.DataFrame(scaler.transform(test_data.iloc[:,:51]), index=test_data.iloc[:,:51].index, columns=test_data.iloc[:,:51].columns)


test_full_data=test_input_data
test_full_data[col_names] = test_other_data 
test_full_data['AWon']=test_output_data.AWon
test_output_data['Season']=test_other_data['Season']

## Here we have a sample of some rows that will be used as inputs to predict the outcome of some March Madness games
##### Does not show all inputs, these inputs are an array of end of season stats for each team and then a comparison differential of said stats
###### Below we also have they testing output data which has the actual outcomes we will be trying to predict

In [49]:
#Show sample of DataFrame
test_input_data.head(5)

Unnamed: 0,A_KP_AdjEM,A_KP_SOS_Adj,ALuck,Asos,Ats_pct,...,BTeamID,BTeam,BSeed,Bregion,AWon
0,-2.098734,-1.269418,2.502111,-1.109039,0.8535,...,1112,arizona,2,X,0
1,0.558475,1.034164,0.339268,0.7537,1.093753,...,1388,saint-marys,7,X,1
2,-0.546887,-1.109381,-0.686163,-1.271972,-0.523168,...,1116,arkansas,8,Z,0
3,0.66353,-0.181664,-0.584214,-0.187317,-0.223352,...,1308,new-mexico-st,14,W,1
4,0.66353,-0.181664,-0.584214,-0.187317,-0.223352,...,1425,usc,11,W,1


In [50]:
test_output_data.head(5)



Unnamed: 0,AWon,Season,ASpread
0,0,2017,1.0
1,1,2017,1.0
2,0,2017,1.0
3,1,2017,1.0
4,1,2017,0.0


## Loading Model Framework and Weights
##### This loads a file that has the saved framework and tuned model, it then loads the weights which will be used for predicting games

In [51]:
#Load Model and weights
json_file = open('model_valid_demo.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
optimizer = keras.optimizers.Adagrad(lr=.01,epsilon=0,decay=0)
model.compile(optimizer= optimizer ,loss='binary_crossentropy',metrics=['acc'])
model.load_weights('model_valid_demo.h5')

print("Loaded model weights from disk")

Loaded model weights from disk


## How we show performance?
##### The primary basis for measuring a model performance is measuring 'Log Loss' or basically a measure of accuracy that factors in confidence
##### Below we will show: Log Loss for random choice of winner/confidence, The Log Loss for the Top 5 models as part of the Kaggle NCAA competition for the 3 years we are testing
##### Link to Kaggle Leaderboard for 2019: https://www.kaggle.com/c/mens-machine-learning-competition-2019/leaderboard

In [52]:
def LogLoss(predictions, realizations):
    predictions_use = predictions.clip(0)
    realizations_use = realizations.clip(0)
    LogLoss = -np.mean( (realizations_use * np.log(predictions_use)) + 
                        (1 - realizations_use) * np.log(1 - predictions_use) )
    return LogLoss

In [53]:
#For reference this is the LogLoss of a model that predicts 50/50 shot of either team winning each time
#rather than favoring one team or another

bench_5050 = np.repeat(0.5, len(test_output_data))
LogLoss(bench_5050, test_output_data.AWon)

0.6931471805599452

In [54]:
#Log Loss for past Kaggle competition winners
#https://www.kaggle.com/c/mens-machine-learning-competition-2019/leaderboard
pastScores

Unnamed: 0,Year,Rank,LogLoss
0,2017,1,0.43857
1,2017,2,0.44981
2,2017,3,0.45373
3,2017,4,0.461
4,2017,5,0.46107
5,2018,1,0.53194
6,2018,2,0.53693
7,2018,3,0.54013
8,2018,4,0.54967
9,2018,5,0.54987


## How does my model stack up?
##### Here is a readout of accuracy followed by Log Loss for the 3 years of Kaggle competition shown above
##### We then show a sample of outputs for the models outputted prediction/confidence

In [55]:
#LogLoss FN for accuracy weighted by confidence


#Accuracy on Test set
years=[2017,2018,2019]
for year in years:
    test_input_temp=test_input_data[test_input_data['Season']==year].reset_index(drop=True)
    test_output_temp=test_output_data[test_output_data['Season']==year].reset_index(drop=True)
    test_output_temp=test_output_temp[['AWon']]
    
    scores = model.evaluate(test_input_temp.iloc[:,0:51], test_output_temp, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    
    #Test LogLoss on holdout test set
    y_pred_keras = model.predict(test_input_temp.iloc[:,0:51]).ravel()
    y_pred_round=np.round(y_pred_keras,0)
    temp = np.array(y_pred_keras)[np.newaxis]
    np.set_printoptions(suppress=True)
    y_pred_tran=temp.T
    print("LL_Test ",year,": ",LogLoss(y_pred_tran, test_output_temp))


y_pred_keras = model.predict(test_input_data.iloc[:,0:51]).ravel()
y_pred_round=np.round(y_pred_keras,0)
testPerf=test_input_data.iloc[:,0:51]
testPerf['y_pred_confidence']=y_pred_keras

testPerf['y_pred_test']=y_pred_round
testPerf['y_act']=test_output_data.AWon
testPerf.sample(10)

acc: 85.07%
LL_Test  2017 :  AWon    0.3434
dtype: float64
acc: 80.60%
LL_Test  2018 :  AWon    0.419315
dtype: float64
acc: 86.57%
LL_Test  2019 :  AWon    0.297221
dtype: float64


Unnamed: 0,A_KP_AdjEM,A_KP_SOS_Adj,ALuck,Asos,Ats_pct,...,AdjustD_diff,AdjustO_diff,y_pred_confidence,y_pred_test,y_act
176,0.402034,-0.989269,-1.139634,-0.574058,-1.192326,...,-0.4196,0.104969,0.450424,0.0,0
127,1.751764,2.411094,0.144679,1.547086,2.966961,...,0.509999,1.573338,0.926435,1.0,1
124,-2.046207,-0.961915,2.902752,-1.001428,0.057671,...,2.383073,-1.459687,0.084478,0.0,1
144,1.393206,0.903174,-0.088497,1.169111,0.20276,...,-0.62772,0.225327,0.87112,1.0,1
170,-0.524049,1.041051,1.004745,0.860144,1.082656,...,0.288005,-0.087604,0.628605,1.0,1
76,1.192231,0.883695,-0.564792,0.805527,0.730665,...,0.218632,1.850162,0.917575,1.0,1
129,1.751764,2.411094,0.144679,1.547086,2.966961,...,0.468375,1.741839,0.931374,1.0,1
152,-1.549479,-0.840658,1.409058,-1.054586,0.398393,...,1.633843,-0.918075,0.094126,0.0,0
137,1.058629,0.671944,-0.329892,0.445884,0.4958,...,-0.516723,-0.147783,0.363792,0.0,0
117,0.439717,-0.755679,-0.603949,-0.297111,0.069589,...,-0.710967,1.68166,0.913368,1.0,1


# So that's cool, but how do we do at ACTUALLY picking a bracket?
#### We start with the 2017 bracket and show each rounds matchups as my model picked them, followed by the calculated ESPN score

In [56]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from runTournament import runTournament

df=pd.read_csv("tourney_game_master_test_actual.csv")
#2017 results
dfList, scoreDF=runTournament(df,2017,'model_valid_demo','model_valid_demo')
for df in dfList:
    print('\n',df)

scoreDF


               ATeam               BTeam  ATeamID  BTeamID
0         villanova      mount-st-marys     1437     1291
1         wisconsin       virginia-tech     1458     1439
2          virginia      unc-wilmington     1438     1423
3           florida   east-tennessee-st     1196     1190
4               usc                 smu     1425     1374
5            baylor       new-mexico-st     1124     1308
6         marquette      south-carolina     1266     1376
7              troy                duke     1407     1181
8           gonzaga     south-dakota-st     1211     1355
9        vanderbilt        northwestern     1435     1321
10        princeton          notre-dame     1343     1323
11    west-virginia            bucknell     1452     1137
12         maryland              xavier     1268     1462
13       florida-st  florida-gulf-coast     1199     1195
14              vcu         saint-marys     1433     1388
15     north-dakota             arizona     1315     1112
16         u

Unnamed: 0,round,score
0,32,28
1,16,22
2,8,28
3,4,8
4,2,0
5,1,0
6,Total,86


# 2017 could have been better, how about 2018?
#### Same process as before

In [58]:
#2018 results
df=pd.read_csv("tourney_game_master_test_actual.csv")
dfList, scoreDF=runTournament(df,2018,'model_valid_demo','model_valid_demo')
for df in dfList:
    print('\n',df)

scoreDF


                 ATeam                  BTeam  ATeamID  BTeamID
0           villanova                radford     1437     1347
1       virginia-tech                alabama     1439     1104
2           murray-st          west-virginia     1293     1452
3            marshall             wichita-st     1267     1455
4      st-bonaventure                florida     1382     1196
5          texas-tech       stephen-f-austin     1403     1372
6            arkansas                 butler     1116     1139
7              purdue       cal-st-fullerton     1345     1168
8              kansas                   penn     1242     1335
9   north-carolina-st             seton-hall     1301     1371
10            clemson          new-mexico-st     1155     1308
11             auburn  college-of-charleston     1120     1158
12                tcu               syracuse     1395     1393
13           bucknell            michigan-st     1137     1277
14       rhode-island               oklahoma     1348

Unnamed: 0,round,score
0,32,28
1,16,20
2,8,12
3,4,16
4,2,16
5,1,32
6,Total,124


# We did ok considering a 16 seed upset a 1 seed in the 1st round 2018. Finally, this last year:

In [59]:
#2019 results
df=pd.read_csv("tourney_game_master_test_actual.csv")
dfList, scoreDF=runTournament(df,2019,'model_valid_demo','model_valid_demo')
for df in dfList:
    print('\n',df)

scoreDF



                   ATeam              BTeam  ATeamID  BTeamID
0                  duke    north-dakota-st     1181     1295
1                   vcu                ucf     1433     1416
2        mississippi-st            liberty     1280     1251
3         virginia-tech        saint-louis     1439     1387
4               belmont           maryland     1125     1268
5                   lsu               yale     1261     1463
6             minnesota         louisville     1278     1257
7           michigan-st            bradley     1277     1133
8   fairleigh-dickinson            gonzaga     1192     1211
9                baylor           syracuse     1124     1393
10            murray-st          marquette     1293     1266
11           florida-st            vermont     1199     1436
12           arizona-st            buffalo     1113     1138
13           texas-tech  northern-kentucky     1403     1297
14              florida             nevada     1196     1305
15             michiga

Unnamed: 0,round,score
0,32,25
1,16,28
2,8,28
3,4,24
4,2,16
5,1,32
6,Total,153


# 2019 went pretty well, but how well?
#### Below is my models score compared to the average ESPN score and the score given that you just pick higher seeds
#### Also shown is a percentile score compared to all other users on ESPN
#### Here's a link to the 2019 leaderboard: http://fantasy.espn.com/tournament-challenge-bracket/2019/en/group?groupID=1041234
#### Another note Nate Silver's Fivethirtyeight would only get 72 in 2017, 114 in 2018, and 95 in 2019: https://projects.fivethirtyeight.com/2018-march-madness-predictions/
#### So not only is the model superior on statistical metrics, but it is good at the element humans care about - winning your office pool

In [60]:
pastMadness

Unnamed: 0,Year,avg_score,higherSeed_score,myScore,percentile_myScore
0,2017,65.7,82,86,79.0
1,2018,57.0,81,124,98.2
2,2019,74.0,124,153,99.9


# Finally, are bigger companies with lots of staff doing better?
#### Not quite, as far as I could find the only company publishing any kind of results is Adobe
#### There model achieve performance that garnered the 98th percentile, not quite 99.99...
#### See this page: https://www.adobe.com/analytics/hack-the-bracket.html?red=a

# Ok, but how does this make anyone money, how about betting the spread?
#### Well, beyond just raising your likelihood to win your office pool, I've also modified the model to predict against the spread
#### So lets load some spread data and the model that predicts against it.

In [62]:

tourney_data=pd.read_csv("tourney_game_spread_master.csv")
tourney_data.dropna(inplace=True)
train_data=tourney_data[tourney_data['Season']<2017].reset_index(drop=True)
test_data=tourney_data[tourney_data['Season']>2016].reset_index(drop=True)

col_names = ['Season','tourn_round','point_diff','ATeamID','ATeam','ASeed','Aregion','BTeamID','BTeam','BSeed','Bregion']
train_other_data=train_data.iloc[:,51:62]
train_output_data['ASpread']=train_data.ASpread
train_output_data['Season']=train_data.Season
test_other_data=test_data.iloc[:,51:62]
test_output_data['ASpread']=test_data.ASpread
test_output_data['Season']=test_data.Season
scaler = StandardScaler()

scaler.fit(train_data.iloc[:,:51])
train_input_data = pd.DataFrame(scaler.transform(train_data.iloc[:,:51]), index=train_data.iloc[:,:51].index, columns=train_data.iloc[:,:51].columns)
test_input_data = pd.DataFrame(scaler.transform(test_data.iloc[:,:51]), index=test_data.iloc[:,:51].index, columns=test_data.iloc[:,:51].columns)


test_full_data=test_input_data
test_full_data[col_names] = test_other_data 
test_full_data['ASpread']=test_output_data.ASpread
test_output_data['Season']=test_other_data['Season']

json_file = open('model_spread.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
optimizer = keras.optimizers.Adagrad(lr=.015,epsilon=0,decay=0)
model.compile(optimizer= optimizer ,loss='binary_crossentropy',metrics=['acc'])
model.load_weights('model_spread.h5')

print("Loaded model weights from disk")



Loaded model weights from disk
acc: 67.16%
LL_Test  2017 :  ASpread    0.57725
dtype: float64
acc: 67.16%
LL_Test  2018 :  ASpread    0.61954
dtype: float64


Unnamed: 0,A_KP_AdjEM,A_KP_SOS_Adj,ALuck,Asos,Ats_pct,...,AdjustD_diff,AdjustO_diff,y_pred_confidence,y_pred_test,y_act
32,1.127245,0.532458,-0.542157,0.638328,0.117493,...,-2.070813,2.089427,0.752583,1.0,1.0
100,-0.950929,-0.124102,0.748243,-0.1556,0.376076,...,1.157452,-0.705853,0.294517,0.0,1.0
23,0.591958,2.298287,0.33316,1.009913,2.492316,...,1.312521,0.601031,0.88765,1.0,1.0
105,-0.019133,1.041117,-0.045721,0.068346,1.281792,...,2.158354,0.830946,0.807663,1.0,0.0
31,-0.297855,-1.230574,-0.431543,-0.73865,-1.430532,...,-0.449632,-0.294427,0.822821,1.0,1.0
128,0.575631,0.230974,-0.357068,0.237052,1.710884,...,0.523077,-0.826861,0.879474,1.0,1.0
42,-0.267534,-0.657753,-0.325348,-0.455061,-0.693275,...,0.325715,-0.802659,0.777225,1.0,0.0
67,-0.294356,-0.680437,-1.005257,-0.73871,1.149627,...,0.621757,0.806744,0.879947,1.0,1.0
113,-0.587073,-0.503144,-0.743076,-1.095481,0.407679,...,0.311618,0.516325,0.726649,1.0,1.0
22,-0.2687,-0.585504,-1.568368,-1.398966,1.06479,...,1.326618,0.964054,0.893901,1.0,1.0


# How do we compare performance for spread?
#### Well because we are trying to beat vegas in this instance an average performance would be 50/50 because of how they try to set the lines
#### With that being said a good log loss would be anything less that .69 and a good accuracy would obviously be better than 50%

In [66]:
years=[2017,2018]
for year in years:
    test_input_temp=test_input_data[test_input_data['Season']==year].reset_index(drop=True)
    test_output_temp=test_output_data[test_output_data['Season']==year].reset_index(drop=True)
    test_output_temp=test_output_temp[['ASpread']]
    
    scores = model.evaluate(test_input_temp.iloc[:,0:51], test_output_temp, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

    #Test LogLoss on holdout test set
    y_pred_keras = model.predict(test_input_temp.iloc[:,0:51]).ravel()
    y_pred_round=np.round(y_pred_keras,0)
    temp = np.array(y_pred_keras)[np.newaxis]
    np.set_printoptions(suppress=True)
    y_pred_tran=temp.T
    print("LL_Test ",year,": ",LogLoss(y_pred_tran, test_output_temp))




#show pred df for last season
y_pred_keras = model.predict(test_input_data.iloc[:,0:51]).ravel()
y_pred_round=np.round(y_pred_keras,0)
testPerf=test_input_data.iloc[:,0:51]
testPerf['y_pred_confidence']=y_pred_keras

testPerf['y_pred_test']=y_pred_round
testPerf['y_act']=test_output_data.ASpread

testPerf.sample(10)

acc: 67.16%
LL_Test  2017 :  ASpread    0.57725
dtype: float64
acc: 67.16%
LL_Test  2018 :  ASpread    0.61954
dtype: float64


Unnamed: 0,A_KP_AdjEM,A_KP_SOS_Adj,ALuck,Asos,Ats_pct,...,AdjustD_diff,AdjustO_diff,y_pred_confidence,y_pred_test,y_act
29,-0.410977,-1.126815,-1.037597,-0.831413,-0.575827,...,-0.097201,-0.173419,0.869288,1.0,1.0
80,-0.091437,-0.53054,-1.006573,-0.956753,0.63168,...,-0.026715,0.298511,0.883927,1.0,1.0
26,-2.090309,-1.20662,1.431402,-1.654986,-0.705343,...,1.946896,-1.371396,0.54927,1.0,0.0
94,-0.364329,-0.659742,-0.477076,-0.753773,0.327442,...,0.382104,0.189604,0.871517,1.0,1.0
58,0.668927,-0.811078,-1.318787,0.074246,-0.439829,...,-2.423243,-0.693752,0.796215,1.0,0.0
59,0.138305,0.157599,-0.722831,-0.244872,0.585804,...,1.312521,0.201705,0.885151,1.0,1.0
12,0.175623,-0.354542,-0.62834,0.066321,0.63245,...,-1.210882,0.74624,0.792364,1.0,0.0
107,-0.974253,0.411549,0.997847,0.025957,1.336301,...,1.270229,-0.258124,0.406589,0.0,1.0
90,-0.441298,-0.994787,-0.707149,-0.902351,0.579004,...,-0.985326,0.855147,0.889714,1.0,0.0
23,0.591958,2.298287,0.33316,1.009913,2.492316,...,1.312521,0.601031,0.88765,1.0,1.0


## This 67% accuracy tells us over the long run we could stand to make quite a bit of money in Vegas, especially if we focused on picks our model was most confident in which would surely raise our overall accuracy