# Predicting NBA Game Winners

In [1]:
import platform
import numpy as np
import pandas as pd
import sklearn as sk
import os
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from keras.utils import np_utils
import math

Using TensorFlow backend.


In [2]:
# Load the dataset: NBA statistics from the 2013-2014 basketball season
df = pd.read_csv('Data/2018-2019gamedata.csv', parse_dates = ['Date']).drop([0],axis=0)
df.columns = ['Date','Visitor Team','Visitor Score','Home Team','Home Score']
df.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score
1,2018-10-16,Philadelphia 76ers,87.0,Boston Celtics,105.0
2,2018-10-16,Oklahoma City Thunder,100.0,Golden State Warriors,108.0
3,2018-10-17,Milwaukee Bucks,113.0,Charlotte Hornets,112.0
4,2018-10-17,Brooklyn Nets,100.0,Detroit Pistons,103.0
5,2018-10-17,Memphis Grizzlies,83.0,Indiana Pacers,111.0


In [3]:
df['Home Win'] = df['Visitor Score'] < df['Home Score']
y_true = df['Home Win'].values

In [4]:
n_games = df['Home Win'].count()
n_homewins = df['Home Win'].sum()
win_percentage = n_homewins / n_games

print('Home Win percentage: {0:.2f}%'.format(100 * win_percentage))

Home Win percentage: 59.27%


In [5]:
df['Home Last Win'] = False
df['Visitor Last Win'] = False

In [6]:
from collections import defaultdict

won_last = defaultdict(int)   # The default value of int is 0

for index, row in df.iterrows():
    home_team = row['Home Team']
    visitor_team = row['Visitor Team']
    df.at[index,'Home Last Win'] = won_last[home_team]
    df.at[index,'Visitor Last Win'] = won_last[visitor_team]
    won_last[home_team] = row['Home Win']
    won_last[visitor_team] = not row['Home Win']
df.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win
1,2018-10-16,Philadelphia 76ers,87.0,Boston Celtics,105.0,True,False,False
2,2018-10-16,Oklahoma City Thunder,100.0,Golden State Warriors,108.0,True,False,False
3,2018-10-17,Milwaukee Bucks,113.0,Charlotte Hornets,112.0,False,False,False
4,2018-10-17,Brooklyn Nets,100.0,Detroit Pistons,103.0,True,False,False
5,2018-10-17,Memphis Grizzlies,83.0,Indiana Pacers,111.0,True,False,False


In [18]:
def create_baseline2():
    model = Sequential()
    model.add(Dense(4, input_dim=2, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(2, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [20]:
estimator = KerasClassifier(build_fn=create_baseline2, epochs=20, batch_size=600, verbose=0)

X_previouswins = df[['Home Last Win', 'Visitor Last Win']].values

scores = cross_val_score(estimator, X_previouswins, y_true)
print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Baseline: 59.27% (0.72%)


In [21]:
df["Home Win Streak"] = 0
df["Visitor Win Streak"] = 0

win_streak = defaultdict(int)

for index, row in df.iterrows():
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    df.at[index, "Home Win Streak"] = win_streak[home_team]
    df.at[index, "Visitor Win Streak"] = win_streak[visitor_team]
    
    if row["Home Win"]:
        win_streak[home_team] += 1
        win_streak[visitor_team] = 0
    else:
        win_streak[home_team] = 0
        win_streak[visitor_team] += 1
df.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win,Home Win Streak,Visitor Win Streak
1,2018-10-16,Philadelphia 76ers,87.0,Boston Celtics,105.0,True,False,False,0,0
2,2018-10-16,Oklahoma City Thunder,100.0,Golden State Warriors,108.0,True,False,False,0,0
3,2018-10-17,Milwaukee Bucks,113.0,Charlotte Hornets,112.0,False,False,False,0,0
4,2018-10-17,Brooklyn Nets,100.0,Detroit Pistons,103.0,True,False,False,0,0
5,2018-10-17,Memphis Grizzlies,83.0,Indiana Pacers,111.0,True,False,False,0,0


In [22]:
def create_baseline4():
    model = Sequential()
    model.add(Dense(8, input_dim=4, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(4, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(2, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [24]:
X_winstreak = df[["Home Last Win", "Visitor Last Win", "Home Win Streak", "Visitor Win Streak"]].values

estimator = KerasClassifier(build_fn=create_baseline4, epochs=20, batch_size=600, verbose=0)
scores = cross_val_score(estimator, X_winstreak, y_true)

print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Baseline: 56.75% (4.11%)


In [25]:
rank = pd.read_csv('Data/2018-2019standings.csv', index_col = 'Team')
rank.head()

Unnamed: 0_level_0,Rk,Overall,Home,Road,E,W,A,C,SE,NW,...,Post,≤3,≥10,Oct,Nov,Dec,Jan,Feb,Mar,Apr
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Milwaukee Bucks,1,60-22,33-8,27-14,40-12,20-10,13-5,14-2,13-5,6-4,...,17-8,5-6,45-5,7-0,8-6,10-4,12-3,10-1,10-6,3-2
Toronto Raptors,2,58-24,32-9,26-15,36-16,22-8,12-4,10-8,14-4,6-4,...,15-8,11-7,33-9,7-1,12-3,8-7,10-5,8-1,9-6,4-1
Golden State Warriors,3,57-25,30-11,27-14,22-8,35-17,6-4,8-2,8-2,12-6,...,16-9,7-7,34-10,8-1,7-7,10-5,11-2,7-4,9-5,5-1
Denver Nuggets,4,54-28,34-7,20-21,20-10,34-18,7-3,6-4,7-3,12-4,...,15-10,13-3,23-11,6-1,9-6,8-4,12-4,7-4,9-6,3-3
Houston Rockets,5,53-29,31-10,22-19,21-9,32-20,8-2,6-4,7-3,8-10,...,20-5,5-7,29-12,1-5,9-6,11-4,8-6,8-4,12-3,4-1


In [26]:
rank.columns = ['Rank', 'Overall', 'Home', 'Road', 'Eastern Conference', 'Western Conference', 
                'Atlantic Div', 'Central Div', 'Southeast Div', 'Northwest Div', 'Pacific Div', 'Southwest Div', 
                'Pre All-Star', 'Post All-Star', 'Margin ≤3', 'Margin ≥10', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 
                'Mar', 'Apr']
rank.head(1)

Unnamed: 0_level_0,Rank,Overall,Home,Road,Eastern Conference,Western Conference,Atlantic Div,Central Div,Southeast Div,Northwest Div,...,Post All-Star,Margin ≤3,Margin ≥10,Oct,Nov,Dec,Jan,Feb,Mar,Apr
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Milwaukee Bucks,1,60-22,33-8,27-14,40-12,20-10,13-5,14-2,13-5,6-4,...,17-8,5-6,45-5,7-0,8-6,10-4,12-3,10-1,10-6,3-2


In [27]:
def home_team_ranks_higher(row):
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    
    home_rank = rank.loc[home_team]["Rank"]
    visitor_rank = rank.loc[visitor_team]["Rank"]
    
    return home_rank < visitor_rank   # The higher ranking will be the lower number

In [28]:
df["Home Team Ranks Higher"] = df.apply(home_team_ranks_higher, axis = 1)
df.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win,Home Win Streak,Visitor Win Streak,Home Team Ranks Higher
1,2018-10-16,Philadelphia 76ers,87.0,Boston Celtics,105.0,True,False,False,0,0,False
2,2018-10-16,Oklahoma City Thunder,100.0,Golden State Warriors,108.0,True,False,False,0,0,True
3,2018-10-17,Milwaukee Bucks,113.0,Charlotte Hornets,112.0,False,False,False,0,0,False
4,2018-10-17,Brooklyn Nets,100.0,Detroit Pistons,103.0,True,False,False,0,0,False
5,2018-10-17,Memphis Grizzlies,83.0,Indiana Pacers,111.0,True,False,False,0,0,True


In [29]:
def create_baseline3():
    model = Sequential()
    model.add(Dense(6, input_dim=3, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(3, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [30]:
X_homehigher = df[["Home Last Win", "Visitor Last Win", "Home Team Ranks Higher"]].values

estimator = KerasClassifier(build_fn=create_baseline3, epochs=20, batch_size=600, verbose=0)
scores = cross_val_score(estimator, X_homehigher, y_true)

print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Baseline: 62.20% (2.59%)


In [31]:
last_game_winner = defaultdict(int)

def home_team_won_last(row):
    home_team = row['Home Team']
    visitor_team = row['Visitor Team']
    
    teams = tuple(sorted([home_team, visitor_team]))
    if last_game_winner[teams] == row['Home Team']:
        result = 1
    else:
        result = 0
    
    winner = row['Home Team'] if row['Home Win'] else row['Visitor Team']
    last_game_winner[teams] = winner
    
    return result

df['Home Team Won Last'] = df.apply(home_team_won_last, axis = 1)
df.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win,Home Win Streak,Visitor Win Streak,Home Team Ranks Higher,Home Team Won Last
1,2018-10-16,Philadelphia 76ers,87.0,Boston Celtics,105.0,True,False,False,0,0,False,0
2,2018-10-16,Oklahoma City Thunder,100.0,Golden State Warriors,108.0,True,False,False,0,0,True,0
3,2018-10-17,Milwaukee Bucks,113.0,Charlotte Hornets,112.0,False,False,False,0,0,False,0
4,2018-10-17,Brooklyn Nets,100.0,Detroit Pistons,103.0,True,False,False,0,0,False,0
5,2018-10-17,Memphis Grizzlies,83.0,Indiana Pacers,111.0,True,False,False,0,0,True,0


In [32]:
X_home_higher = df[['Home Last Win', 'Visitor Last Win', "Home Team Ranks Higher", "Home Team Won Last"]].values

estimator = KerasClassifier(build_fn=create_baseline4, epochs=20, batch_size=600, verbose=0)
scores = cross_val_score(estimator, X_home_higher, y_true)

print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Baseline: 59.27% (0.72%)


In [33]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

encoding = LabelEncoder()
encoding.fit(df["Home Team"].values)

LabelEncoder()

In [34]:
home_teams = encoding.transform(df["Home Team"].values)
visitor_teams = encoding.transform(df["Visitor Team"].values)

X_teams = np.vstack([home_teams, visitor_teams]).T

In [35]:
onehot = OneHotEncoder()
X_teams = onehot.fit_transform(X_teams).todense()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [40]:
def create_baseline60():
    model = Sequential()
    model.add(Dense(128, input_dim=60, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [41]:
estimator = KerasClassifier(build_fn=create_baseline60, epochs=20, batch_size=600, verbose=0)
scores = cross_val_score(estimator, X_teams, y_true)

print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Baseline: 59.27% (0.72%)


In [42]:
X_all = np.hstack([X_home_higher, X_teams])
print(X_all.shape)

(1230, 64)


In [43]:
def create_baseline64():
    model = Sequential()
    model.add(Dense(128, input_dim=64, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [45]:
estimator = KerasClassifier(build_fn=create_baseline64, epochs=150, batch_size=500, verbose=0)
scores = cross_val_score(estimator, X_all, y_true)

print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Baseline: 63.17% (2.02%)


In [46]:
df_20 = pd.read_csv('Data/2019-2020gamedata.csv', parse_dates = ['Date']).drop(0,axis=0)
df_20.head()
df_20.columns = ['Date', 'Visitor Team', 'Visitor Score', 'Home Team', 'Home Score']
df_20.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score
1,2019-10-22,New Orleans Pelicans,122.0,Toronto Raptors,130.0
2,2019-10-22,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0
3,2019-10-23,Chicago Bulls,125.0,Charlotte Hornets,126.0
4,2019-10-23,Detroit Pistons,119.0,Indiana Pacers,110.0
5,2019-10-23,Cleveland Cavaliers,85.0,Orlando Magic,94.0


In [47]:
rank_19 = pd.read_csv('Data/2018-2019standings.csv', index_col = 'Team')

rank_19.columns = ['Rank', 'Overall', 'Home', 'Road', 'Eastern Conference', 'Western Conference', 
                'Atlantic Div', 'Central Div', 'Southeast Div', 'Northwest Div', 'Pacific Div', 'Southwest Div', 
                'Pre All-Star', 'Post All-Star', 'Margin ≤3', 'Margin ≥10', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 
                'Mar', 'Apr']
rank_19.head()

Unnamed: 0_level_0,Rank,Overall,Home,Road,Eastern Conference,Western Conference,Atlantic Div,Central Div,Southeast Div,Northwest Div,...,Post All-Star,Margin ≤3,Margin ≥10,Oct,Nov,Dec,Jan,Feb,Mar,Apr
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Milwaukee Bucks,1,60-22,33-8,27-14,40-12,20-10,13-5,14-2,13-5,6-4,...,17-8,5-6,45-5,7-0,8-6,10-4,12-3,10-1,10-6,3-2
Toronto Raptors,2,58-24,32-9,26-15,36-16,22-8,12-4,10-8,14-4,6-4,...,15-8,11-7,33-9,7-1,12-3,8-7,10-5,8-1,9-6,4-1
Golden State Warriors,3,57-25,30-11,27-14,22-8,35-17,6-4,8-2,8-2,12-6,...,16-9,7-7,34-10,8-1,7-7,10-5,11-2,7-4,9-5,5-1
Denver Nuggets,4,54-28,34-7,20-21,20-10,34-18,7-3,6-4,7-3,12-4,...,15-10,13-3,23-11,6-1,9-6,8-4,12-4,7-4,9-6,3-3
Houston Rockets,5,53-29,31-10,22-19,21-9,32-20,8-2,6-4,7-3,8-10,...,20-5,5-7,29-12,1-5,9-6,11-4,8-6,8-4,12-3,4-1


In [48]:
df_20['Home Win'] = df_20['Visitor Score'] < df_20['Home Score']

y_true = df_20['Home Win'].values

In [49]:
df_20['Home Last Win'] = False
df_20['Visitor Last Win'] = False

won_last = defaultdict(int)   # The default value of int is 0

for index, row in df_20.iterrows():
    home_team = row['Home Team']
    visitor_team = row['Visitor Team']
    row['Home Last Win'] = won_last[home_team]
    row['Visitor Last Win'] = won_last[visitor_team]
    won_last[home_team] = row['Home Win']
    won_last[visitor_team] = not row['Home Win']
df_20.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win
1,2019-10-22,New Orleans Pelicans,122.0,Toronto Raptors,130.0,True,False,False
2,2019-10-22,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0,True,False,False
3,2019-10-23,Chicago Bulls,125.0,Charlotte Hornets,126.0,True,False,False
4,2019-10-23,Detroit Pistons,119.0,Indiana Pacers,110.0,False,False,False
5,2019-10-23,Cleveland Cavaliers,85.0,Orlando Magic,94.0,True,False,False


In [100]:
df_20["Home Win Streak"] = 0
df_20["Visitor Win Streak"] = 0
win_streak = defaultdict(int)

for index, row in df_20.iterrows():
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    row["Home Win Streak"] = win_streak[home_team]
    row["Visitor Win Streak"] = win_streak[visitor_team]
    df_20.loc[index] = row    
    
    if row["Home Win"]:
        win_streak[home_team] += 1
        win_streak[visitor_team] = 0
    else:
        win_streak[home_team] = 0
        win_streak[visitor_team] += 1
        
df_20.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win,Home Win Streak,Visitor Win Streak
1,2019-10-22,New Orleans Pelicans,122.0,Toronto Raptors,130.0,True,False,False,0,0
2,2019-10-22,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0,True,False,False,0,0
3,2019-10-23,Chicago Bulls,125.0,Charlotte Hornets,126.0,True,False,False,0,0
4,2019-10-23,Detroit Pistons,119.0,Indiana Pacers,110.0,False,False,False,0,0
5,2019-10-23,Cleveland Cavaliers,85.0,Orlando Magic,94.0,True,False,False,0,0


In [50]:
def home_team_ranks_higher_2020(row):
    home_team = row['Home Team']
    visitor_team = row['Visitor Team']
    
    home_rank = rank_19.loc[home_team]['Rank']
    visitor_rank = rank_19.loc[visitor_team]['Rank']
    
    return home_rank < visitor_rank

In [51]:
df_20["Home Team Ranks Higher"] = df_20.apply(home_team_ranks_higher_2020, axis = 1)
df_20.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win,Home Team Ranks Higher
1,2019-10-22,New Orleans Pelicans,122.0,Toronto Raptors,130.0,True,False,False,True
2,2019-10-22,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0,True,False,False,True
3,2019-10-23,Chicago Bulls,125.0,Charlotte Hornets,126.0,True,False,False,True
4,2019-10-23,Detroit Pistons,119.0,Indiana Pacers,110.0,False,False,False,True
5,2019-10-23,Cleveland Cavaliers,85.0,Orlando Magic,94.0,True,False,False,True


In [53]:
# Add a new feature without using function -> Determines whether a team won the last matchup between the 2 teams
# This does not take into consideration the home/visitor teams

last_match_winner = defaultdict(int)
df_20["Home Team Won Last"] = 0

for index, row in df_20.iterrows():
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    teams = tuple(sorted([home_team, visitor_team]))  # Sort for a consistent ordering
    
    # Set in the row, who won the last encounter
    row["Home Team Won Last"] = 1 if last_match_winner[teams] == row["Home Team"] else 0
    df_20.loc[index] = row
    
    # Who won this one?
    winner = row["Home Team"] if row["Home Win"] else row["Visitor Team"]
    last_match_winner[teams] = winner
    
df_20.head()

Unnamed: 0,Date,Visitor Team,Visitor Score,Home Team,Home Score,Home Win,Home Last Win,Visitor Last Win,Home Team Ranks Higher,Home Team Won Last
1,2019-10-22,New Orleans Pelicans,122.0,Toronto Raptors,130.0,True,False,False,True,0
2,2019-10-22,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0,True,False,False,True,0
3,2019-10-23,Chicago Bulls,125.0,Charlotte Hornets,126.0,True,False,False,True,0
4,2019-10-23,Detroit Pistons,119.0,Indiana Pacers,110.0,False,False,False,True,0
5,2019-10-23,Cleveland Cavaliers,85.0,Orlando Magic,94.0,True,False,False,True,0


In [55]:
X_home_higher_20 = df_20[['Home Last Win', 'Visitor Last Win', 'Home Team Ranks Higher', 'Home Team Won Last']].values

In [56]:
home_teams_20 = encoding.transform(df_20['Home Team'].values)
visitor_teams_20 = encoding.transform(df_20['Visitor Team'].values)

X_teams_20 = np.vstack([home_teams_20, visitor_teams_20]).T
X_teams_20 = onehot.transform(X_teams_20).todense()

In [57]:
X_all_20 = np.hstack([X_home_higher_20, X_teams_20])
X_all_20.shape

(698, 64)

In [63]:
estimator = KerasClassifier(build_fn=create_baseline64, epochs=400, batch_size=50, verbose=2)
scores = cross_val_score(estimator, X_all_20, y_true)

print("Baseline: %.2f%% (%.2f%%)" % (scores.mean()*100, scores.std()*100))

Epoch 1/400
 - 1s - loss: 0.7287 - accuracy: 0.4796
Epoch 2/400
 - 0s - loss: 0.7164 - accuracy: 0.4559
Epoch 3/400
 - 0s - loss: 0.7029 - accuracy: 0.4989
Epoch 4/400
 - 0s - loss: 0.6887 - accuracy: 0.5806
Epoch 5/400
 - 0s - loss: 0.6875 - accuracy: 0.5355
Epoch 6/400
 - 0s - loss: 0.6969 - accuracy: 0.4839
Epoch 7/400
 - 0s - loss: 0.6807 - accuracy: 0.5634
Epoch 8/400
 - 0s - loss: 0.6812 - accuracy: 0.5441
Epoch 9/400
 - 0s - loss: 0.6973 - accuracy: 0.4817
Epoch 10/400
 - 0s - loss: 0.6817 - accuracy: 0.5720
Epoch 11/400
 - 0s - loss: 0.6887 - accuracy: 0.5290
Epoch 12/400
 - 0s - loss: 0.6827 - accuracy: 0.5613
Epoch 13/400
 - 0s - loss: 0.6720 - accuracy: 0.5806
Epoch 14/400
 - 0s - loss: 0.6844 - accuracy: 0.5570
Epoch 15/400
 - 0s - loss: 0.6857 - accuracy: 0.5484
Epoch 16/400
 - 0s - loss: 0.6750 - accuracy: 0.5699
Epoch 17/400
 - 0s - loss: 0.6682 - accuracy: 0.6022
Epoch 18/400
 - 0s - loss: 0.6632 - accuracy: 0.6129
Epoch 19/400
 - 0s - loss: 0.6714 - accuracy: 0.5871
Ep

Epoch 155/400
 - 0s - loss: 0.1237 - accuracy: 0.9548
Epoch 156/400
 - 0s - loss: 0.1112 - accuracy: 0.9677
Epoch 157/400
 - 0s - loss: 0.0925 - accuracy: 0.9699
Epoch 158/400
 - 0s - loss: 0.0849 - accuracy: 0.9656
Epoch 159/400
 - 0s - loss: 0.0942 - accuracy: 0.9677
Epoch 160/400
 - 0s - loss: 0.0860 - accuracy: 0.9699
Epoch 161/400
 - 0s - loss: 0.1015 - accuracy: 0.9677
Epoch 162/400
 - 0s - loss: 0.0885 - accuracy: 0.9634
Epoch 163/400
 - 0s - loss: 0.0868 - accuracy: 0.9699
Epoch 164/400
 - 0s - loss: 0.1024 - accuracy: 0.9634
Epoch 165/400
 - 0s - loss: 0.0902 - accuracy: 0.9656
Epoch 166/400
 - 0s - loss: 0.0964 - accuracy: 0.9677
Epoch 167/400
 - 0s - loss: 0.0860 - accuracy: 0.9677
Epoch 168/400
 - 0s - loss: 0.1094 - accuracy: 0.9591
Epoch 169/400
 - 0s - loss: 0.0978 - accuracy: 0.9634
Epoch 170/400
 - 0s - loss: 0.1072 - accuracy: 0.9634
Epoch 171/400
 - 0s - loss: 0.1075 - accuracy: 0.9699
Epoch 172/400
 - 0s - loss: 0.1016 - accuracy: 0.9613
Epoch 173/400
 - 0s - loss: 

Epoch 307/400
 - 0s - loss: 0.0482 - accuracy: 0.9806
Epoch 308/400
 - 0s - loss: 0.0553 - accuracy: 0.9828
Epoch 309/400
 - 0s - loss: 0.0510 - accuracy: 0.9742
Epoch 310/400
 - 0s - loss: 0.0504 - accuracy: 0.9914
Epoch 311/400
 - 0s - loss: 0.0374 - accuracy: 0.9828
Epoch 312/400
 - 0s - loss: 0.0557 - accuracy: 0.9806
Epoch 313/400
 - 0s - loss: 0.0460 - accuracy: 0.9828
Epoch 314/400
 - 0s - loss: 0.0395 - accuracy: 0.9849
Epoch 315/400
 - 0s - loss: 0.0545 - accuracy: 0.9806
Epoch 316/400
 - 0s - loss: 0.0496 - accuracy: 0.9806
Epoch 317/400
 - 0s - loss: 0.0477 - accuracy: 0.9806
Epoch 318/400
 - 0s - loss: 0.0319 - accuracy: 0.9828
Epoch 319/400
 - 0s - loss: 0.0343 - accuracy: 0.9806
Epoch 320/400
 - 0s - loss: 0.0781 - accuracy: 0.9785
Epoch 321/400
 - 0s - loss: 0.0510 - accuracy: 0.9763
Epoch 322/400
 - 0s - loss: 0.0485 - accuracy: 0.9785
Epoch 323/400
 - 0s - loss: 0.0346 - accuracy: 0.9828
Epoch 324/400
 - 0s - loss: 0.0399 - accuracy: 0.9742
Epoch 325/400
 - 0s - loss: 

Epoch 60/400
 - 0s - loss: 0.4200 - accuracy: 0.8430
Epoch 61/400
 - 0s - loss: 0.4022 - accuracy: 0.8430
Epoch 62/400
 - 0s - loss: 0.3547 - accuracy: 0.8495
Epoch 63/400
 - 0s - loss: 0.3724 - accuracy: 0.8559
Epoch 64/400
 - 0s - loss: 0.4003 - accuracy: 0.8215
Epoch 65/400
 - 0s - loss: 0.3615 - accuracy: 0.8409
Epoch 66/400
 - 0s - loss: 0.3709 - accuracy: 0.8559
Epoch 67/400
 - 0s - loss: 0.3520 - accuracy: 0.8645
Epoch 68/400
 - 0s - loss: 0.3540 - accuracy: 0.8645
Epoch 69/400
 - 0s - loss: 0.3491 - accuracy: 0.8430
Epoch 70/400
 - 0s - loss: 0.3538 - accuracy: 0.8495
Epoch 71/400
 - 0s - loss: 0.3397 - accuracy: 0.8581
Epoch 72/400
 - 0s - loss: 0.3557 - accuracy: 0.8473
Epoch 73/400
 - 0s - loss: 0.3454 - accuracy: 0.8430
Epoch 74/400
 - 0s - loss: 0.3452 - accuracy: 0.8559
Epoch 75/400
 - 0s - loss: 0.3285 - accuracy: 0.8559
Epoch 76/400
 - 0s - loss: 0.3035 - accuracy: 0.8624
Epoch 77/400
 - 0s - loss: 0.2850 - accuracy: 0.8860
Epoch 78/400
 - 0s - loss: 0.3393 - accuracy: 

Epoch 213/400
 - 0s - loss: 0.0975 - accuracy: 0.9527
Epoch 214/400
 - 0s - loss: 0.0940 - accuracy: 0.9613
Epoch 215/400
 - 0s - loss: 0.0854 - accuracy: 0.9634
Epoch 216/400
 - 0s - loss: 0.0979 - accuracy: 0.9656
Epoch 217/400
 - 0s - loss: 0.1265 - accuracy: 0.9505
Epoch 218/400
 - 0s - loss: 0.1084 - accuracy: 0.9505
Epoch 219/400
 - 0s - loss: 0.0911 - accuracy: 0.9548
Epoch 220/400
 - 0s - loss: 0.1026 - accuracy: 0.9548
Epoch 221/400
 - 0s - loss: 0.1075 - accuracy: 0.9591
Epoch 222/400
 - 0s - loss: 0.1087 - accuracy: 0.9570
Epoch 223/400
 - 0s - loss: 0.0850 - accuracy: 0.9656
Epoch 224/400
 - 0s - loss: 0.1148 - accuracy: 0.9484
Epoch 225/400
 - 0s - loss: 0.1125 - accuracy: 0.9462
Epoch 226/400
 - 0s - loss: 0.1078 - accuracy: 0.9462
Epoch 227/400
 - 0s - loss: 0.0863 - accuracy: 0.9548
Epoch 228/400
 - 0s - loss: 0.1069 - accuracy: 0.9634
Epoch 229/400
 - 0s - loss: 0.1083 - accuracy: 0.9548
Epoch 230/400
 - 0s - loss: 0.1033 - accuracy: 0.9484
Epoch 231/400
 - 0s - loss: 

Epoch 365/400
 - 0s - loss: 0.0716 - accuracy: 0.9548
Epoch 366/400
 - 0s - loss: 0.0599 - accuracy: 0.9720
Epoch 367/400
 - 0s - loss: 0.0668 - accuracy: 0.9548
Epoch 368/400
 - 0s - loss: 0.0836 - accuracy: 0.9570
Epoch 369/400
 - 0s - loss: 0.0769 - accuracy: 0.9613
Epoch 370/400
 - 0s - loss: 0.0707 - accuracy: 0.9570
Epoch 371/400
 - 0s - loss: 0.0775 - accuracy: 0.9634
Epoch 372/400
 - 0s - loss: 0.0623 - accuracy: 0.9677
Epoch 373/400
 - 0s - loss: 0.0784 - accuracy: 0.9527
Epoch 374/400
 - 0s - loss: 0.0675 - accuracy: 0.9613
Epoch 375/400
 - 0s - loss: 0.1009 - accuracy: 0.9548
Epoch 376/400
 - 0s - loss: 0.0597 - accuracy: 0.9699
Epoch 377/400
 - 0s - loss: 0.0616 - accuracy: 0.9699
Epoch 378/400
 - 0s - loss: 0.0578 - accuracy: 0.9656
Epoch 379/400
 - 0s - loss: 0.0916 - accuracy: 0.9570
Epoch 380/400
 - 0s - loss: 0.0634 - accuracy: 0.9699
Epoch 381/400
 - 0s - loss: 0.0747 - accuracy: 0.9591
Epoch 382/400
 - 0s - loss: 0.0648 - accuracy: 0.9591
Epoch 383/400
 - 0s - loss: 

Epoch 119/400
 - 0s - loss: 0.1340 - accuracy: 0.9485
Epoch 120/400
 - 0s - loss: 0.1953 - accuracy: 0.9313
Epoch 121/400
 - 0s - loss: 0.1543 - accuracy: 0.9421
Epoch 122/400
 - 0s - loss: 0.1702 - accuracy: 0.9270
Epoch 123/400
 - 0s - loss: 0.1419 - accuracy: 0.9335
Epoch 124/400
 - 0s - loss: 0.1515 - accuracy: 0.9528
Epoch 125/400
 - 0s - loss: 0.1470 - accuracy: 0.9399
Epoch 126/400
 - 0s - loss: 0.1622 - accuracy: 0.9399
Epoch 127/400
 - 0s - loss: 0.1546 - accuracy: 0.9335
Epoch 128/400
 - 0s - loss: 0.1283 - accuracy: 0.9421
Epoch 129/400
 - 0s - loss: 0.1492 - accuracy: 0.9335
Epoch 130/400
 - 0s - loss: 0.1357 - accuracy: 0.9378
Epoch 131/400
 - 0s - loss: 0.1429 - accuracy: 0.9485
Epoch 132/400
 - 0s - loss: 0.1540 - accuracy: 0.9485
Epoch 133/400
 - 0s - loss: 0.1430 - accuracy: 0.9528
Epoch 134/400
 - 0s - loss: 0.1223 - accuracy: 0.9592
Epoch 135/400
 - 0s - loss: 0.1088 - accuracy: 0.9657
Epoch 136/400
 - 0s - loss: 0.1425 - accuracy: 0.9442
Epoch 137/400
 - 0s - loss: 

Epoch 271/400
 - 0s - loss: 0.0845 - accuracy: 0.9614
Epoch 272/400
 - 0s - loss: 0.0648 - accuracy: 0.9764
Epoch 273/400
 - 0s - loss: 0.0542 - accuracy: 0.9764
Epoch 274/400
 - 0s - loss: 0.0773 - accuracy: 0.9764
Epoch 275/400
 - 0s - loss: 0.0559 - accuracy: 0.9700
Epoch 276/400
 - 0s - loss: 0.0539 - accuracy: 0.9785
Epoch 277/400
 - 0s - loss: 0.0588 - accuracy: 0.9721
Epoch 278/400
 - 0s - loss: 0.0649 - accuracy: 0.9764
Epoch 279/400
 - 0s - loss: 0.0445 - accuracy: 0.9807
Epoch 280/400
 - 0s - loss: 0.0927 - accuracy: 0.9721
Epoch 281/400
 - 0s - loss: 0.0731 - accuracy: 0.9700
Epoch 282/400
 - 0s - loss: 0.0637 - accuracy: 0.9785
Epoch 283/400
 - 0s - loss: 0.0535 - accuracy: 0.9785
Epoch 284/400
 - 0s - loss: 0.0376 - accuracy: 0.9828
Epoch 285/400
 - 0s - loss: 0.0619 - accuracy: 0.9614
Epoch 286/400
 - 0s - loss: 0.0537 - accuracy: 0.9742
Epoch 287/400
 - 0s - loss: 0.0723 - accuracy: 0.9742
Epoch 288/400
 - 0s - loss: 0.0450 - accuracy: 0.9850
Epoch 289/400
 - 0s - loss: 