In [89]:
#Importing data from csv
from pandas import read_csv
import pandas as pd

zerg_data = read_csv('../Output/ZergFeb.csv', header=0, index_col=False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [90]:
from sklearn.preprocessing import MinMaxScaler

no_normalize = ['match_id','map_name','region','race','enemy_race','frame','second','player','game_length']

#Names of Columns that we want to Normalize (Numerical variables only)
normalize_columns = [x for x in list(zerg_data) if x not in no_normalize]

#Normalizing these columns
norm = zerg_data[normalize_columns].values
norm_scaled = MinMaxScaler().fit_transform(norm)
zerg_data_normalized = pd.DataFrame(norm_scaled, columns=normalize_columns, index=zerg_data.index)

#One-hot Encoding the categorical variables
zerg_data_normalized = zerg_data_normalized.join(pd.get_dummies(zerg_data['map_name'], prefix='Map'))
zerg_data_normalized = zerg_data_normalized.join(pd.get_dummies(zerg_data['region'], prefix='Region'))
zerg_data_normalized = zerg_data_normalized.join(pd.get_dummies(zerg_data['enemy_race'], prefix='Enemy'))

In [91]:
#Standardization

from sklearn.preprocessing import StandardScaler

no_standardize = ['match_id','map_name','region','race','enemy_race','frame','second','player','game_length','win']

#Names of Columns that we do want to Standardize
standardize_columns = [x for x in list(zerg_data) if x not in no_standardize]

#Standardizing these columns
stand = zerg_data[standardize_columns].values
stand_scaled = StandardScaler().fit_transform(stand)
zerg_data_standardized = pd.DataFrame(stand_scaled, columns=standardize_columns, index=zerg_data.index)

#One-hot Encoding the categorical variables
zerg_data_standardized = zerg_data_standardized.join(pd.get_dummies(zerg_data['map_name'], prefix='Map'))
zerg_data_standardized = zerg_data_standardized.join(pd.get_dummies(zerg_data['region'], prefix='Region'))
zerg_data_standardized = zerg_data_standardized.join(pd.get_dummies(zerg_data['enemy_race'], prefix='Enemy'))
zerg_data_standardized = zerg_data_standardized.join(zerg_data['win'])

In [92]:
#Split data into dependent and independent variables
no_X = ['win']
yes_X = [x for x in list(zerg_data_normalized) if x not in no_X]

Y_normalized = zerg_data_normalized['win']
X_normalized = zerg_data_normalized[yes_X]

Y_standardized = zerg_data_standardized['win']
X_standardized = zerg_data_standardized[yes_X]

In [93]:
from sklearn.linear_model import Lasso

lassoRegN = Lasso(normalize=False,alpha=0.001)
lassoRegN = lassoRegN.fit(X_normalized, Y_normalized)

In [94]:
seqN = sorted(lassoRegN.coef_, key=abs)
index = [seqN.index(v) for v in lassoRegN.coef_]
print(seqN)

[0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0007651386291683815, 0.0021711969891387564, -0.0027115182557167477, 0.004258499738127188, -0.005452880203365363, -0.007578526486292912, 0.009338056842568627, -0.013079453269818558, -0.01942015683279893, 0.024146304278564217, 0.03206567841062474, 0.03253727481992128, -0.0349440950605696, -0.04180534064661614, 0.046065184753192066, 0.04616765308035772, 0.047617139337977274, 0.05049444988447192, 0.051596953492938566, -0.05252454773781015, 0.05597591893034307, -0.05683793140762591, -0.06243910556533429, -0.06348398078277727, -0.06506703923834557, -0.06893875362012371, 0.072081214

In [95]:
df_N = pd.DataFrame([lassoRegN.coef_], columns = X_normalized.columns)
display(df_N.T)

Unnamed: 0,0
ap30s,0.0
workers_active_count,-0.0
food_used,0.0
food_made,-0.013079
minerals_current,0.112556
minerals_collection_rate,0.249198
minerals_used_in_progress,0.0
minerals_used_current,0.0
minerals_used_active_forces,0.224632
minerals_lost,-0.110966


In [96]:
lassoRegS = Lasso(normalize=False, alpha=0.001)
lassoRegS = lassoRegS.fit(X_standardized, Y_standardized)

In [97]:
seqS = sorted(lassoRegS.coef_, key=abs)
index = [seqS.index(v) for v in lassoRegS.coef_]
print(seqS)

[-0.0, 0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0004299047948844979, -0.0007237532646686447, -0.0008388530278972189, -0.0016543029465449356, -0.00182286906567466, -0.0021044407153641078, 0.0024916332982956976, -0.002644194856239304, 0.0026891167349056356, 0.003505453618530808, 0.004069676517291305, -0.004262163357707965, 0.004766761505379742, 0.0057092540778542165, 0.0060974637086086865, -0.00932905212765651, 0.00936452911835763, 0.009933782130117, 0.009949619563273003, 0.010417306079704789, -0.015169836058824293, 0.01603293633316203, -0.01626801372239618, 0.016933269097446, 0.017444791156986564, -0.019484014750076717, 0.01971679415935218, -0.021515531785984525, -0.021714127378307753, 0.021926724578510595, -0.0223712169

In [98]:
df_S = pd.DataFrame([lassoRegS.coef_], columns = X_standardized.columns)
display(df_S.T)

Unnamed: 0,0
ap30s,-0.001823
workers_active_count,-0.0
food_used,0.0
food_made,-0.056435
minerals_current,0.017445
minerals_collection_rate,0.070278
minerals_used_in_progress,0.016933
minerals_used_current,0.0
minerals_used_active_forces,0.060676
minerals_lost,-0.058935
