In [None]:
!pip install scikeras --quiet
import warnings
warnings.filterwarnings('ignore')

NUM_FEATURES = 31
TIMESTEPS = 1

import tensorflow as tf
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/gdrive')

# # X = np.expand_dims(X, 0)
# shape_X = X.shape

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


##### Initialise Seed

In [2]:
import numpy as np
import pandas as pd

# random seed for reproducibility
seed = 10
np.random.seed(seed)

# loading of dataset
df = pd.read_csv("https://raw.githubusercontent.com/ehandywhyy/loot-box/main/overall.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0,0.142176,0.15688,0.014704,0.127937,0.142299,0.014361,0.155919,0.116134,-0.039784,0.124244,0.276458,0.152215,0.137177,0.641011,0.503834,0.125197,0.124527,-0.00067,0.122092,0.100097,-0.021995,0.128263,0.085967,-0.042296,0.120442,0.209372,0.08893,0.116911,0.165437,0.048526,0.160258,andy
1,0.141793,0.164635,0.022842,0.121625,0.152146,0.03052,0.137039,0.121543,-0.015496,0.104216,0.218603,0.114388,0.156909,0.346147,0.189238,0.138544,0.156244,0.017699,0.120454,0.088308,-0.032146,0.14449,0.082397,-0.062093,0.101188,0.126066,0.024878,0.098424,0.130661,0.032238,0.173819,andy
2,0.157152,0.14722,-0.009932,0.134796,0.14579,0.010994,0.124892,0.101461,-0.023431,0.112165,0.202102,0.089937,0.14086,0.274559,0.133699,0.142558,0.309424,0.166866,0.121442,0.101698,-0.019745,0.135452,0.082423,-0.053029,0.108598,0.155843,0.047244,0.118759,0.14655,0.027791,0.173512,andy
3,0.158673,0.142487,-0.016186,0.118908,0.159179,0.040271,0.133175,0.060895,-0.072279,0.106036,0.333517,0.227481,0.145889,0.28738,0.141491,0.125951,0.13623,0.010279,0.108067,0.114496,0.006429,0.114266,0.072556,-0.041711,0.101937,0.145692,0.043755,0.0963,0.12258,0.02628,0.190808,andy
4,0.166583,0.157102,-0.00948,0.115094,0.141103,0.026009,0.135121,0.082097,-0.053024,0.128655,0.192888,0.064234,0.165007,0.286537,0.12153,0.126328,0.119997,-0.006331,0.113122,0.082637,-0.030484,0.125474,0.089247,-0.036227,0.114211,0.149136,0.034925,0.121197,0.230113,0.108917,0.143299,andy


##### Check missing values

In [3]:
# print(df.isna().sum())
# # Remove missing values IF AVAILABLE and print first 10 samples
# # df = df.dropna()

##### Divide dataset into X and Y
##### Normalise features within range 0 (minimum) and 1 (maximum)


In [4]:
from sklearn.preprocessing import MinMaxScaler, minmax_scale

dataset = df.values

# divide data into features X and target (Classes) Y
X = dataset[:,0:NUM_FEATURES].astype(float)
Y = dataset[:,NUM_FEATURES]

# check for class imbalance
print(df.groupby(Y).size())

andy     100
azfar    100
chris    100
dtype: int64


In [5]:
from sklearn.preprocessing import OneHotEncoder
from keras.utils.np_utils import to_categorical 

# encode Label for model 
# (Convert target Y to one hot encoded Y for Neural Network)
Y = Y.reshape(-1, 1)
encoder = OneHotEncoder().fit(Y)

# get all the encoded class
print(encoder.get_feature_names_out())

# print X and Y shape
print("X dataset shape: " + str(X.shape))
print("Y dataset shape: " + str(Y.shape))

['x0_andy' 'x0_azfar' 'x0_chris']
X dataset shape: (300, 31)
Y dataset shape: (300, 1)


##### Preparing dataset

In [6]:
from sklearn.model_selection import train_test_split

# split dataset into train and test of 0.8/0.2 ratio
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=seed)

# normalisation to 0 to 1
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# reshaping the dataset to include LSTM
X_train = np.asarray(X_train, dtype=np.float32)
X_train = np.reshape(X_train, (X_train.shape[0], TIMESTEPS, X_train.shape[1]))
X_test = np.asarray(X_test, dtype=np.float32)
X_test = np.reshape(X_test, (X_test.shape[0], TIMESTEPS, X_test.shape[1]))

# converting y data to encoding
y_train = encoder.transform(y_train).toarray()
y_test = encoder.transform(y_test).toarray()

num_classes = y_train.shape[1]

print("X train shape: " + str(X_train.shape))
print("Y train shape: " + str(y_train.shape))
print("X test shape: " + str(X_test.shape))
print("Y test shape: " + str(y_test.shape))

X train shape: (240, 1, 31)
Y train shape: (240, 3)
X test shape: (60, 1, 31)
Y test shape: (60, 3)


### Create Model

In [7]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization,\
Flatten, LSTM

def create_model():
	# define model
  model = Sequential()
  model.add(LSTM(units=128, return_sequences=True, 
                 input_shape=(TIMESTEPS,NUM_FEATURES)))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())
  model.add(LSTM(units=128, return_sequences=True))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())
  model.add(LSTM(units=64, return_sequences=True))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())
  # Softmax for multi-class classification
  model.add(Flatten())
  model.add(Dense(num_classes, activation='softmax'))

  model.compile(loss='categorical_crossentropy', optimizer='adam',
                metrics=['accuracy'])
  return model

##### Wrap Model in KerasClassifier

In [8]:
# from scikeras.wrappers import KerasClassifier
from keras.wrappers.scikit_learn import KerasClassifier
model = KerasClassifier(build_fn=create_model, epochs=100, 
                            batch_size=10)

### Perform KFold Validation

In [9]:
# from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

num_folds = 10
kfold = KFold(n_splits=num_folds, 
              shuffle=True,
              random_state=seed)

##### Get Accuracy from KFold Validation

In [10]:
results = cross_val_score(model, X_train, y_train, 
                          cv=kfold, error_score="raise", verbose=1)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:  7.0min finished


##### View Model Summary

In [11]:
# model = create_model()


In [12]:
print("Accuracy of %.2f%% (with standard deviation of %.2f%%)" % 
      (results.mean()*100, results.std()*100))

from tensorflow.keras.callbacks import EarlyStopping

# fit the model
es = EarlyStopping(monitor='loss', mode='min', min_delta=0.001, patience=50,
                   verbose=0)
model.fit(X_train, y_train, callbacks=es)

Accuracy of 98.33% (with standard deviation of 2.04%)
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 

<keras.callbacks.History at 0x7f5e00869910>

In [13]:
# view model summary
model.model.summary()

# save model
model.model.save(F"/content/gdrive/My Drive/Colab Notebooks/overall_key_classifier.h5")

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_30 (LSTM)              (None, 1, 128)            81920     
                                                                 
 dropout_30 (Dropout)        (None, 1, 128)            0         
                                                                 
 batch_normalization_30 (Bat  (None, 1, 128)           512       
 chNormalization)                                                
                                                                 
 lstm_31 (LSTM)              (None, 1, 128)            131584    
                                                                 
 dropout_31 (Dropout)        (None, 1, 128)            0         
                                                                 
 batch_normalization_31 (Bat  (None, 1, 128)           512       
 chNormalization)                                    

In [25]:
from keras.models import load_model
from sklearn.metrics import accuracy_score, confusion_matrix

# load model
model.model = load_model("/content/gdrive/My Drive/Colab Notebooks/overall_key_classifier.h5")

y_pred = model.predict(X_test)
y_pred = to_categorical(y_pred)

# evaluate predictions
acc = accuracy_score(y_test, y_pred)
print('Accuracy: %.3f' % acc)

# Making the Confusion Matrix
# import sys
# np.set_printoptions(threshold=sys.maxsize)
# cm = confusion_matrix(y_test, y_pred)
# print(cm)

# from sklearn.metrics import roc_curve
# fpr_RF, tpr_RF, thresholds_RF = roc_curve(df.actual_label.values, df.model_RF.values)
# fpr_LR, tpr_LR, thresholds_LR = roc_curve(df.actual_label.values, df.model_LR.values)

Accuracy: 0.967


In [31]:
# import unseen data to check if model works
new_df = pd.read_csv("https://raw.githubusercontent.com/ehandywhyy/loot-box/main/overall_testa.csv", header=None)

test_row=new_df.iloc[0:17,:-1]
# print("check name")
# print(new_df.iloc[2:3,30:32])
# print(test_row)

# convert to
test = test_row.values.tolist()
test = scaler.transform(test)
test = np.asarray(test, dtype=np.float32)
test = np.reshape(test, (test.shape[0], TIMESTEPS, test.shape[1]))

(17, 31)
(17, 1, 31)


In [36]:
x_predict = model.predict(test)
x_predict = to_categorical(x_predict)
x_predict = encoder.inverse_transform(x_predict)
print(x_predict)

[['andy']
 ['andy']
 ['andy']
 ['andy']
 ['andy']
 ['chris']
 ['chris']
 ['chris']
 ['chris']
 ['chris']
 ['azfar']
 ['azfar']
 ['azfar']
 ['azfar']
 ['azfar']
 ['chris']
 ['chris']]
