In [1]:
#Importing 3 data sets from csv
from pandas import read_csv
import pandas as pd
import numpy as np
from sklearn.dummy import DummyClassifier

terran_data = read_csv('../Output/TerranMarch.csv', header=0, index_col=False)
protoss_data = read_csv('../Output/ProtossMarch.csv', header=0, index_col=False)
zerg_data = read_csv('../Output/ZergMarch.csv', header=0, index_col=False)

In [2]:
#Normalization 
from sklearn.preprocessing import MinMaxScaler

no_normalize = ['match_id','map_name','region','race','enemy_race','frame','second','player','game_length']

#Names of Columns that we want to Normalize (Numerical variables only)
terran_normalize_columns = [x for x in list(terran_data) if x not in no_normalize]
protoss_normalize_columns = [x for x in list(protoss_data) if x not in no_normalize]
zerg_normalize_columns = [x for x in list(zerg_data) if x not in no_normalize]

#Normalizing these columns
terran_norm = terran_data[terran_normalize_columns].values
protoss_norm = protoss_data[protoss_normalize_columns].values
zerg_norm = zerg_data[zerg_normalize_columns].values

terran_norm_scaled = MinMaxScaler().fit_transform(terran_norm)
protoss_norm_scaled = MinMaxScaler().fit_transform(protoss_norm)
zerg_norm_scaled = MinMaxScaler().fit_transform(zerg_norm)

terran_data_normalized = pd.DataFrame(terran_norm_scaled, columns=terran_normalize_columns, index=terran_data.index)
protoss_data_normalized = pd.DataFrame(protoss_norm_scaled, columns=protoss_normalize_columns, index=protoss_data.index)
zerg_data_normalized = pd.DataFrame(zerg_norm_scaled, columns=zerg_normalize_columns, index=zerg_data.index)

#One-hot Encoding the categorical variables
terran_data_normalized = terran_data_normalized.join(pd.get_dummies(terran_data['map_name'], prefix='Map'))
terran_data_normalized = terran_data_normalized.join(pd.get_dummies(terran_data['region'], prefix='Region'))
terran_data_normalized = terran_data_normalized.join(pd.get_dummies(terran_data['enemy_race'], prefix='Enemy'))
terran_data_normalized['win'] = terran_data_normalized['win'].astype(int)

protoss_data_normalized = protoss_data_normalized.join(pd.get_dummies(protoss_data['map_name'], prefix='Map'))
protoss_data_normalized = protoss_data_normalized.join(pd.get_dummies(protoss_data['region'], prefix='Region'))
protoss_data_normalized = protoss_data_normalized.join(pd.get_dummies(protoss_data['enemy_race'], prefix='Enemy'))
protoss_data_normalized['win'] = protoss_data_normalized['win'].astype(int)

zerg_data_normalized = zerg_data_normalized.join(pd.get_dummies(zerg_data['map_name'], prefix='Map'))
zerg_data_normalized = zerg_data_normalized.join(pd.get_dummies(zerg_data['region'], prefix='Region'))
zerg_data_normalized = zerg_data_normalized.join(pd.get_dummies(zerg_data['enemy_race'], prefix='Enemy'))
zerg_data_normalized['win'] = zerg_data_normalized['win'].astype(int)

In [3]:
#Standardization

from sklearn.preprocessing import StandardScaler

no_standardize = ['match_id','map_name','region','race','enemy_race','frame','second','player','game_length','win']

#Names of Columns that we do want to Standardize
terran_standardize_columns = [x for x in list(terran_data) if x not in no_standardize]
protoss_standardize_columns = [x for x in list(protoss_data) if x not in no_standardize]
zerg_standardize_columns = [x for x in list(zerg_data) if x not in no_standardize]

#Standardizing these columns
terran_stand = terran_data[terran_standardize_columns].values
protoss_stand = protoss_data[protoss_standardize_columns].values
zerg_stand = zerg_data[zerg_standardize_columns].values

terran_stand_scaled = StandardScaler().fit_transform(terran_stand)
protoss_stand_scaled = StandardScaler().fit_transform(protoss_stand)
zerg_stand_scaled = StandardScaler().fit_transform(zerg_stand)

terran_data_standardized = pd.DataFrame(terran_stand_scaled, columns=terran_standardize_columns, index=terran_data.index)
protoss_data_standardized = pd.DataFrame(protoss_stand_scaled, columns=protoss_standardize_columns, index=protoss_data.index)
zerg_data_standardized = pd.DataFrame(zerg_stand_scaled, columns=zerg_standardize_columns, index=zerg_data.index)

#One-hot Encoding the categorical variables
terran_data_standardized = terran_data_standardized.join(pd.get_dummies(terran_data['map_name'], prefix='Map'))
terran_data_standardized = terran_data_standardized.join(pd.get_dummies(terran_data['region'], prefix='Region'))
terran_data_standardized = terran_data_standardized.join(pd.get_dummies(terran_data['enemy_race'], prefix='Enemy'))
terran_data_standardized = terran_data_standardized.join(terran_data['win'])
terran_data_standardized['win'] = terran_data_standardized['win'].astype(int)

protoss_data_standardized = protoss_data_standardized.join(pd.get_dummies(protoss_data['map_name'], prefix='Map'))
protoss_data_standardized = protoss_data_standardized.join(pd.get_dummies(protoss_data['region'], prefix='Region'))
protoss_data_standardized = protoss_data_standardized.join(pd.get_dummies(protoss_data['enemy_race'], prefix='Enemy'))
protoss_data_standardized = protoss_data_standardized.join(protoss_data['win'])
protoss_data_standardized['win'] = protoss_data_standardized['win'].astype(int)

zerg_data_standardized = zerg_data_standardized.join(pd.get_dummies(zerg_data['map_name'], prefix='Map'))
zerg_data_standardized = zerg_data_standardized.join(pd.get_dummies(zerg_data['region'], prefix='Region'))
zerg_data_standardized = zerg_data_standardized.join(pd.get_dummies(zerg_data['enemy_race'], prefix='Enemy'))
zerg_data_standardized = zerg_data_standardized.join(zerg_data['win'])
zerg_data_standardized['win'] = zerg_data_standardized['win'].astype(int)

In [4]:
#Split data into dependent and independent variables
no_X = ['win']
terran_yes_X = [x for x in list(terran_data_normalized) if x not in no_X]
protoss_yes_X = [x for x in list(protoss_data_normalized) if x not in no_X]
zerg_yes_X = [x for x in list(zerg_data_normalized) if x not in no_X]

terran_Y = terran_data_normalized['win']
terran_X_normalized = terran_data_normalized[terran_yes_X]
terran_X_standardized = terran_data_standardized[terran_yes_X]

protoss_Y = protoss_data_normalized['win']
protoss_X_normalized = protoss_data_normalized[protoss_yes_X]
protoss_X_standardized = protoss_data_standardized[protoss_yes_X]

zerg_Y = zerg_data_normalized['win']
zerg_X_normalized = zerg_data_normalized[zerg_yes_X]
zerg_X_standardized = zerg_data_standardized[zerg_yes_X]

In [5]:
#Split data into training, and testing set
terran_train_split = 0.8
protoss_train_split = 0.8
zerg_train_split = 0.8

terran_num_data = len(terran_X_normalized)
terran_num_train = int(terran_train_split * terran_num_data)
terran_num_test = terran_num_data - int(terran_train_split * terran_num_data)

protoss_num_data = len(protoss_X_normalized)
protoss_num_train = int(protoss_train_split * protoss_num_data)
protoss_num_test = protoss_num_data - int(protoss_train_split * protoss_num_data)

zerg_num_data = len(zerg_X_normalized)
zerg_num_train = int(zerg_train_split * zerg_num_data)
zerg_num_test = zerg_num_data - int(zerg_train_split * zerg_num_data)

terran_Y_train = terran_Y[0:terran_num_train]
terran_Y_test = terran_Y[terran_num_train:]

protoss_Y_train = protoss_Y[0:protoss_num_train]
protoss_Y_test = protoss_Y[protoss_num_train:]

zerg_Y_train = zerg_Y[0:zerg_num_train]
zerg_Y_test = zerg_Y[zerg_num_train:]

terran_X_normalized_train = terran_X_normalized[0:terran_num_train]
terran_X_normalized_test = terran_X_normalized[terran_num_train:]
terran_X_standardized_train = terran_X_standardized[0:terran_num_train]
terran_X_standardized_test = terran_X_standardized[terran_num_train:]

protoss_X_normalized_train = protoss_X_normalized[0:protoss_num_train]
protoss_X_normalized_test = protoss_X_normalized[protoss_num_train:]
protoss_X_standardized_train = protoss_X_standardized[0:protoss_num_train]
protoss_X_standardized_test = protoss_X_standardized[protoss_num_train:]

zerg_X_normalized_train = zerg_X_normalized[0:zerg_num_train]
zerg_X_normalized_test = zerg_X_normalized[zerg_num_train:]
zerg_X_standardized_train = zerg_X_standardized[0:zerg_num_train]
zerg_X_standardized_test = zerg_X_standardized[zerg_num_train:]

In [6]:
#Zero Rule Baseline

#Terran Normalized
terranN_most_frequent_dummy = DummyClassifier(strategy="most_frequent")
terranN_most_frequent_dummy.fit(terran_X_normalized_train, terran_Y_train)
terranN_most_frequent_dummy.predict(terran_X_normalized_test)
terranN_most_frequent_score = terranN_most_frequent_dummy.score(terran_X_normalized_test, terran_Y_test)

#Terran Standardized
terranS_most_frequent_dummy = DummyClassifier(strategy="most_frequent")
terranS_most_frequent_dummy.fit(terran_X_standardized_train, terran_Y_train)
terranS_most_frequent_dummy.predict(terran_X_standardized_test)
terranS_most_frequent_score = terranN_most_frequent_dummy.score(terran_X_standardized_test, terran_Y_test)

#Protoss Normalized
protossN_most_frequent_dummy = DummyClassifier(strategy="most_frequent")
protossN_most_frequent_dummy.fit(protoss_X_normalized_train, protoss_Y_train)
protossN_most_frequent_dummy.predict(protoss_X_normalized_test)
protossN_most_frequent_score = protossN_most_frequent_dummy.score(protoss_X_normalized_test, protoss_Y_test)

#Protoss Standardized
protossS_most_frequent_dummy = DummyClassifier(strategy="most_frequent")
protossS_most_frequent_dummy.fit(protoss_X_standardized_train, protoss_Y_train)
protossS_most_frequent_dummy.predict(protoss_X_standardized_test)
protossS_most_frequent_score = protossN_most_frequent_dummy.score(protoss_X_standardized_test, protoss_Y_test)

#Zerg Normalized
zergN_most_frequent_dummy = DummyClassifier(strategy="most_frequent")
zergN_most_frequent_dummy.fit(zerg_X_normalized_train, zerg_Y_train)
zergN_most_frequent_dummy.predict(zerg_X_normalized_test)
zergN_most_frequent_score = zergN_most_frequent_dummy.score(zerg_X_normalized_test, zerg_Y_test)

#Zerg Standardized
zergS_most_frequent_dummy = DummyClassifier(strategy="most_frequent")
zergS_most_frequent_dummy.fit(zerg_X_standardized_train, zerg_Y_train)
zergS_most_frequent_dummy.predict(zerg_X_standardized_test)
zergS_most_frequent_score = zergN_most_frequent_dummy.score(zerg_X_standardized_test, zerg_Y_test)

In [7]:
#Random Prediction Baseline

#Terran Normalized
terranN_uniform_dummy = DummyClassifier(strategy="uniform")
terranN_uniform_dummy.fit(terran_X_normalized_train, terran_Y_train)
terranN_uniform_dummy.predict(terran_X_normalized_test)
terranN_uniform_score = terranN_uniform_dummy.score(terran_X_normalized_test, terran_Y_test)

#Terran Standardized
terranS_uniform_dummy = DummyClassifier(strategy="uniform")
terranS_uniform_dummy.fit(terran_X_standardized_train, terran_Y_train)
terranS_uniform_dummy.predict(terran_X_standardized_test)
terranS_uniform_score = terranS_uniform_dummy.score(terran_X_standardized_test, terran_Y_test)

#Protoss Normalized
protossN_uniform_dummy = DummyClassifier(strategy="uniform")
protossN_uniform_dummy.fit(protoss_X_normalized_train, protoss_Y_train)
protossN_uniform_dummy.predict(protoss_X_normalized_test)
protossN_uniform_score = protossN_uniform_dummy.score(protoss_X_normalized_test, protoss_Y_test)

#Protoss Standardized
protossS_uniform_dummy = DummyClassifier(strategy="uniform")
protossS_uniform_dummy.fit(protoss_X_standardized_train, protoss_Y_train)
protossS_uniform_dummy.predict(protoss_X_standardized_test)
protossS_uniform_score = protossS_uniform_dummy.score(protoss_X_standardized_test, protoss_Y_test)

#Zerg Normalized
zergN_uniform_dummy = DummyClassifier(strategy="uniform")
zergN_uniform_dummy.fit(zerg_X_normalized_train, zerg_Y_train)
zergN_uniform_dummy.predict(zerg_X_normalized_test)
zergN_uniform_score = zergN_uniform_dummy.score(zerg_X_normalized_test, zerg_Y_test)

#Zerg Standardized
zergS_uniform_dummy = DummyClassifier(strategy="uniform")
zergS_uniform_dummy.fit(zerg_X_standardized_train, zerg_Y_train)
zergS_uniform_dummy.predict(zerg_X_standardized_test)
zergS_uniform_score = zergS_uniform_dummy.score(zerg_X_standardized_test, zerg_Y_test)

In [8]:
table = pd.DataFrame ({'Baseline Algorithm': ['Zero Rule','Random Prediction'],
                   'Normalized Terran': [terranN_most_frequent_score, terranN_uniform_score],
                      'Standardized Terran': [terranS_most_frequent_score, terranS_uniform_score],
                      'Normalized Protoss': [protossN_most_frequent_score, protossN_uniform_score],
                      'Standardized Protoss': [protossS_most_frequent_score, protossS_uniform_score],
                    'Normalized Zerg': [zergN_most_frequent_score, zergN_uniform_score],
                      'Standardized Zerg': [zergS_most_frequent_score, zergS_uniform_score]})

In [9]:
print('The score in the table is the Mean Accuracy for that Baseline and Data Set')
display(table)

The score in the table is the Mean Accuracy for that Baseline and Data Set


Unnamed: 0,Baseline Algorithm,Normalized Terran,Standardized Terran,Normalized Protoss,Standardized Protoss,Normalized Zerg,Standardized Zerg
0,Zero Rule,0.551063,0.551063,0.490206,0.490206,0.452728,0.452728
1,Random Prediction,0.501753,0.502684,0.504553,0.489944,0.501569,0.49511
