In [1]:
'''This notebook contains the GRU model with word embeddings using Hero Types'''

In [2]:
#Credits https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
#https://machinelearningmastery.com/how-to-make-classification-and-regression-predictions-for-deep-learning-models-in-keras/
#https://machinelearningmastery.com/how-to-choose-loss-functions-when-training-deep-learning-neural-networks/

In [3]:
'''Import the necessary Libraries'''
import pandas as pd
import numpy as np
import datetime

import tensorflow as tf
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

from itertools import chain

import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

import nltk
import sklearn
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import learning_curve, GridSearchCV

from keras.utils import to_categorical

In [4]:
from keras import backend as K


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [5]:
'''Now we load the required data sets'''
df = pd.read_csv("dataset_without_duplicates_Bans analysis _with_types.csv")
matrix1 =np.array(df.loc[:,['match_id','type1','type2','type3','type4']])
df1 = pd.read_csv("hero_names.csv")
matrix2 =np.array(df1.loc[:,['hero_id', 'Strength','Agility','Intelligence']])



def dataprep1(index):
    my_input = matrix1[:,1:index]
    my_output = matrix1[:,index]
    train_x = my_input[:40000,:]
    train_x = np.asarray(train_x).astype(np.float32)
    #train_x = tf.convert_to_tensor(train_x)
    train_y = my_output[:40000]
    train_y = np.asarray(train_y).astype(np.float32)
    #train_y = tf.convert_to_tensor(train_y)
    test_x = my_input[40000:50000,:]
    test_x = np.asarray(test_x).astype(np.float32)
    test_y = my_output[40000:50000]
    test_y = np.asarray(test_y).astype(np.float32)
    train_y_cat = to_categorical(train_y)
    #train_y_cat = tf.convert_to_tensor(train_y_cat)
    test_y_cat = to_categorical(test_y)
    #test_y_cat = tf.convert_to_tensor(test_y_cat)
    return train_x,train_y_cat,test_x,test_y_cat
#train_x,train_y_cat,test_x,test_y_cat = dataprep1(2)
#train_x


In [22]:
'''Defining the model with embeddings along with CNN and LSTM. I have used adam optimizer with categorical crossentropy as our model predicts multiple classes.'''
numpy.random.seed(7)
# create the model
def my_model(input_length_val,pool_size_val):
    embedding_vecor_length = 32
    model = Sequential()
    model.add(Embedding(114, embedding_vecor_length, input_length=input_length_val))
    model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=pool_size_val))
    model.add(GRU(100,return_sequences=True,unroll=True))
    model.add(GRU(100,return_sequences=True))
    model.add(GRU(100,return_sequences=True))
    model.add(GRU(100))
    model.add(Dense(4, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    print(model.summary())
    return model

In [23]:
import random as ra
for i in range(2,5):
    train_x,train_y_cat,test_x,test_y_cat = dataprep1(i)
    input_length_val = i-1
    if i==3:    
        pool_size_val = 2
        batch_size_val = 64
    elif i==4:
        pool_size_val = 3
        batch_size_val = 64
    elif i==2:
        pool_size_val = 1
        batch_size_val = 64
    
    model = my_model(input_length_val,pool_size_val)
    # fit the model
    model.fit(train_x, train_y_cat, epochs=2, batch_size=batch_size_val)
    
    for j in range(0,7):
        small = ra.randint(0,9999)
        big = ra.randint(0,9999)
        while ((small>=big) and ((big-small)<500)):
            small = ra.randint(0,9999)
            big = ra.randint(0,9999)

    # evaluate the model
        print("Iteration Number: ",j)
        loss, accuracy, precision, recall = model.evaluate(test_x[small:big,], test_y_cat[small:big], verbose=1)
    #res = model.predict(test_x)
    #print("Res is:",res)
        print("loss is: ", loss)
        print("Accuracy is: ",accuracy)
        print("Precision is: ",precision)
        print("Recall is: ",recall)
        print("F1 is : ", 2*((precision*recall)/(precision+recall+K.epsilon())))


Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, 1, 32)             3648      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1, 32)             3104      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 1, 32)             0         
_________________________________________________________________
gru_57 (GRU)                 (None, 1, 100)            40200     
_________________________________________________________________
gru_58 (GRU)                 (None, 1, 100)            60600     
_________________________________________________________________
gru_59 (GRU)                 (None, 1, 100)            60600     
_________________________________________________________________
gru_60 (GRU)                 (None, 100)             

Epoch 2/2
Iteration Number:  0
loss is:  1.1060982942581177
Accuracy is:  0.3815489709377289
Precision is:  0.4353741407394409
Recall is:  0.1457858830690384
F1 is :  0.2184300022416546
Iteration Number:  1
loss is:  1.1091059446334839
Accuracy is:  0.38254937529563904
Precision is:  0.42350059747695923
Recall is:  0.14750604331493378
F1 is :  0.21880266207961938
Iteration Number:  2
loss is:  1.1113512516021729
Accuracy is:  0.3771204948425293
Precision is:  0.42125481367111206
Recall is:  0.14310570061206818
F1 is :  0.21363633010847188
Iteration Number:  3
loss is:  1.113257884979248
Accuracy is:  0.37590137124061584
Precision is:  0.4166666567325592
Recall is:  0.14480111002922058
F1 is :  0.21491450527706613
Iteration Number:  4
loss is:  1.1096664667129517
Accuracy is:  0.37995150685310364
Precision is:  0.4209601879119873
Recall is:  0.14531123638153076
F1 is :  0.21604563446558106
Iteration Number:  5
loss is:  1.1073757410049438
Accuracy is:  0.3828556537628174
Precision is:  

In [None]:
'''The cells below are to be ignored as they are testing cells only'''

In [7]:

#model.fit(train_x, train_y_cat, epochs=10, batch_size=32)
# Final evaluation of the model
##scores = model.evaluate(test_x, test_y_cat, verbose=1)
##print("Accuracy: %.2f%%" % (scores[1]*100))
#predict_classes(object, x, batch_size = NULL, verbose = 0, steps = NULL)
#class_vals = model.predict_classes(test_x, test_y_cat,verbose=1)
##print(scores)
#res = model.predict(test_x)


In [25]:
res

array([[4.6302646e-11, 2.0412136e-02, 5.5758455e-03, ..., 8.3727943e-04,
        5.6127925e-11, 1.7126616e-02],
       [2.5977874e-08, 2.4595298e-02, 8.7316809e-03, ..., 3.7359309e-03,
        4.0153285e-08, 1.0456971e-02],
       [2.5422263e-11, 1.6976409e-02, 7.7993367e-03, ..., 1.7016078e-04,
        2.9651653e-11, 6.8962099e-03],
       ...,
       [5.0971623e-11, 2.0336319e-02, 3.6610698e-03, ..., 4.0176904e-04,
        6.3429768e-11, 1.1689910e-02],
       [1.1040675e-10, 2.7636470e-02, 3.2927773e-03, ..., 5.5798527e-04,
        1.2071649e-10, 3.1962667e-02],
       [2.1676995e-11, 3.9491843e-02, 4.2277668e-03, ..., 4.8325117e-05,
        1.9527213e-11, 4.7792410e-03]], dtype=float32)

In [29]:
ypred4 = np.argmax(res, axis=1)
print("Ypred4", ypred4)
#total_predictions_correct_4 = sum(p == t for p, t in zip(test_y, ypred4))
#total_predictions_correct_4 = sum(p == t for p, t in zip(test_y_cat, res))
counter = 0 
for i in range(test_y_cat.shape[0]):
    if test_y_cat[i].all()==ypred4[i].all():
        counter=counter+1
#accuracy_4 = total_predictions_correct_4/test_y.shape[0]
accuracy_4 = counter/test_y_cat.shape[0]
print(accuracy_4)


Ypred4 [85 73 30 ... 69 85  7]
0.0


In [53]:
print(scores)

[3.610165596008301, 0.1526000052690506]


In [46]:
class_vals = np.argmax(model.predict(test_x), axis=-1)
class_vals.shape
#test_y.shape
count=0
for i in class_vals:
    if (class_vals[i]==test_y[i]):
        count=count+1
my_acc = count/class_vals.shape[0]
my_acc

0.0144