In [1]:
"""keras_model.py: 

This model is the implementation of Gaussian Naive Bayes Classification of KDD datasets.
"""

__author__ = 'Youngseok Joung'
__copyright__ = "Copyright 2007, The Cogent Project"
__credits__ = ["Youngseok Joung"]
__license__ = "GPL"
__version__ = "1.0.1"
__maintainer__ = "Youngseok Joung"
__email__ = "none"
__status__ = "Production"


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as splitter
from keras import regularizers
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import cProfile
import pstats
import os
import sys
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import pickle
from joblib import dump, load

In [2]:
def labelEncoding(model_name, data):
    for column in data.columns:
        # If the data type of the cell is 'object'(Categorical), it will be transformed as a numerical 
        if data[column].dtype == type(object):
            le_file_path = 'result/' + model_name + '/' + model_name + '_' + column + '_encoder.pkl'
            print(os.path.exists(le_file_path))
            if os.path.exists(le_file_path):
                pkl_file = open(le_file_path, 'rb')
                le = pickle.load(pkl_file) 
                pkl_file.close()
                data[column] = le.transform(data[column])            
            else:
                le = LabelEncoder()
                data[column] = le.fit_transform(data[column])
                #exporting the departure encoder
                output = open(le_file_path, 'wb')
                pickle.dump(le, output)
                output.close()
            if column == 'result':
                le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
                print(le_name_mapping)
                
    return data, le

In [3]:
def Preprocessing(model_name, data):
    y = data.result
    x = data.drop('result', axis=1)
    
    # Preprocessing: Split 7:3 Train: Test
    x_train, x_test, y_train, y_test = splitter(x, y, test_size=0.3)
    
    return x_train, x_test, y_train, y_test

In [4]:
def train_and_test(model_name, x_train, x_test, y_train, y_test):
    # Profile: Start 
    profile = cProfile.Profile()
    profile.enable()
    
    # train and test
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    val_indices = 200
    x_val = x_train[-val_indices:]
    y_val = y_train[-val_indices:]
    # train and test
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_dim=x_train.shape[1], kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(5, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=15, batch_size=512, validation_data=(x_val, y_val))
    y_pred = model.predict(x_test)

    y_pred = np.argmax(y_pred, axis=1)
    y_test = np.argmax(y_test, axis=1)

    # Profile: End 
    profile.disable()
    profile.dump_stats('output.prof')
    stream = open('result/' + model_name + '/' + model_name + '_profiling.txt', 'w')
    stats = pstats.Stats('output.prof', stream=stream)
    stats.sort_stats('cumtime')
    stats.print_stats()
    os.remove('output.prof')
    
    # Estimation: Confusion Matrix & classification-report 
    _confusion_matrix = confusion_matrix(y_test, y_pred)
    _classification_report = classification_report(y_test, y_pred)
    
    with open('result/' + model_name + '/' + model_name + '_output.txt', 'w') as f:
        f.write("\n---Confusion Matrix---\n")
        f.write(np.array2string(_confusion_matrix, separator=', '))
        f.write("\n---Classification Report---\n")
        f.write(_classification_report)

    # Freezing model for production 
    dump(model, 'result/' + model_name + '/' + model_name + '_model.joblib') 
    
    return _confusion_matrix, _classification_report

In [5]:
model_name = 'keras_kdd'
# model_name = 'keras_nsl_kdd'
dataset_name = 'kdd_prediction'
# dataset_name = 'kdd_prediction_NSL'

data = pd.read_csv('./dataset/' + dataset_name + '.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
print(data.head)

<bound method NDFrame.head of        duration protocol_type   service  flag  src_bytes  dst_bytes      land  \
0     -0.106216           tcp      smtp    SF  -0.003736  -0.040352 -0.011722   
1     -0.107850           tcp      http    SF  -0.004276  -0.036652 -0.011722   
2     -0.107850           tcp      http    SF  -0.004262   0.005956 -0.011722   
3     -0.107033           tcp       ftp    SF  -0.003699  -0.006723 -0.011722   
4     -0.107850           udp  domain_u    SF  -0.004368  -0.044940 -0.011722   
...         ...           ...       ...   ...        ...        ...       ...   
13446 -0.107850           tcp      http    SF  -0.004225   0.049683 -0.011722   
13447 -0.107850           tcp      nntp  RSTO  -0.004392  -0.047028 -0.011722   
13448 -0.107033           tcp      smtp    SF  -0.003734  -0.041519 -0.011722   
13449 -0.107850           tcp      nnsp   REJ  -0.004392  -0.047028 -0.011722   
13450 -0.107850           tcp      link    S0  -0.004392  -0.047028 -0.011722  

In [6]:
# labeling
data, _ = labelEncoding(model_name, data)

True
True
True
True
{'dos': 0, 'normal': 1, 'probe': 2, 'r2l': 3, 'u2r': 4}




In [7]:
# Preprocessing
x_train, x_test, y_train, y_test = Preprocessing(model_name, data)

In [8]:
# Train and Test
cm, cr = train_and_test(model_name, x_train, x_test, y_train, y_test)
print('\n-----Confusion Matrix-----\n')
print(cm)
print('\n-----Classification Report-----\n')
print(cr)

Epoch 1/15


 1/19 [>.............................] - ETA: 0s - loss: 7.8889 - accuracy: 0.3203

 2/19 [==>...........................] - ETA: 0s - loss: 49.7952 - accuracy: 0.4395

 3/19 [===>..........................] - ETA: 1s - loss: 35.7169 - accuracy: 0.4264

 4/19 [=====>........................] - ETA: 1s - loss: 28.5571 - accuracy: 0.4097

































Epoch 2/15
 1/19 [>.............................] - ETA: 0s - loss: 5.1772 - accuracy: 0.7129

 2/19 [==>...........................] - ETA: 0s - loss: 5.5001 - accuracy: 0.6436

 3/19 [===>..........................] - ETA: 1s - loss: 5.4403 - accuracy: 0.5410

 4/19 [=====>........................] - ETA: 1s - loss: 5.3651 - accuracy: 0.5015

































Epoch 3/15
 1/19 [>.............................] - ETA: 0s - loss: 3.8891 - accuracy: 0.7578

 2/19 [==>...........................] - ETA: 0s - loss: 3.9071 - accuracy: 0.7588

 3/19 [===>..........................] - ETA: 1s - loss: 3.8965 - accuracy: 0.7188

 4/19 [=====>........................] - ETA: 1s - loss: 3.8964 - accuracy: 0.7144

































Epoch 4/15
 1/19 [>.............................] - ETA: 0s - loss: 3.0777 - accuracy: 0.8223

 2/19 [==>...........................] - ETA: 0s - loss: 2.9834 - accuracy: 0.8408

 3/19 [===>..........................] - ETA: 1s - loss: 2.9157 - accuracy: 0.8555

 4/19 [=====>........................] - ETA: 1s - loss: 2.8544 - accuracy: 0.8657

































Epoch 5/15
 1/19 [>.............................] - ETA: 0s - loss: 2.3243 - accuracy: 0.9023

 2/19 [==>...........................] - ETA: 0s - loss: 2.2882 - accuracy: 0.9043

 3/19 [===>..........................] - ETA: 1s - loss: 2.3207 - accuracy: 0.9004

 4/19 [=====>........................] - ETA: 1s - loss: 2.2993 - accuracy: 0.9082

































Epoch 6/15
 1/19 [>.............................] - ETA: 0s - loss: 1.7609 - accuracy: 0.9141

 2/19 [==>...........................] - ETA: 0s - loss: 1.7121 - accuracy: 0.9199

 3/19 [===>..........................] - ETA: 1s - loss: 1.6783 - accuracy: 0.9258

 4/19 [=====>........................] - ETA: 1s - loss: 1.6783 - accuracy: 0.9243

































Epoch 7/15


 1/19 [>.............................] - ETA: 0s - loss: 1.3489 - accuracy: 0.9141

 2/19 [==>...........................] - ETA: 0s - loss: 1.2869 - accuracy: 0.9219

 3/19 [===>..........................] - ETA: 1s - loss: 1.2826 - accuracy: 0.9115

 4/19 [=====>........................] - ETA: 1s - loss: 1.2739 - accuracy: 0.9131

































Epoch 8/15


 1/19 [>.............................] - ETA: 0s - loss: 1.0073 - accuracy: 0.8984

 2/19 [==>...........................] - ETA: 0s - loss: 0.9566 - accuracy: 0.9111

 3/19 [===>..........................] - ETA: 1s - loss: 0.9214 - accuracy: 0.9193

 4/19 [=====>........................] - ETA: 1s - loss: 0.9118 - accuracy: 0.9219

































Epoch 9/15


 1/19 [>.............................] - ETA: 0s - loss: 0.8407 - accuracy: 0.9336

 2/19 [==>...........................] - ETA: 0s - loss: 0.8098 - accuracy: 0.9316

 3/19 [===>..........................] - ETA: 1s - loss: 0.7780 - accuracy: 0.9303

 4/19 [=====>........................] - ETA: 1s - loss: 0.7435 - accuracy: 0.9297

































Epoch 10/15


 1/19 [>.............................] - ETA: 0s - loss: 0.5499 - accuracy: 0.9316

 2/19 [==>...........................] - ETA: 1s - loss: 0.5572 - accuracy: 0.9268

 3/19 [===>..........................] - ETA: 1s - loss: 0.5814 - accuracy: 0.9258

 4/19 [=====>........................] - ETA: 1s - loss: 0.5930 - accuracy: 0.9292

































Epoch 11/15
 1/19 [>.............................] - ETA: 0s - loss: 0.7918 - accuracy: 0.8770

 2/19 [==>...........................] - ETA: 0s - loss: 0.7087 - accuracy: 0.8887

 3/19 [===>..........................] - ETA: 1s - loss: 0.6792 - accuracy: 0.8965

 4/19 [=====>........................] - ETA: 1s - loss: 0.6224 - accuracy: 0.9062

































Epoch 12/15
 1/19 [>.............................] - ETA: 0s - loss: 0.4820 - accuracy: 0.9453

 2/19 [==>...........................] - ETA: 0s - loss: 0.4474 - accuracy: 0.9443

 3/19 [===>..........................] - ETA: 1s - loss: 0.4385 - accuracy: 0.9434

 4/19 [=====>........................] - ETA: 1s - loss: 0.4294 - accuracy: 0.9448

































Epoch 13/15
 1/19 [>.............................] - ETA: 0s - loss: 0.6189 - accuracy: 0.8789

 2/19 [==>...........................] - ETA: 0s - loss: 0.8719 - accuracy: 0.8408

 3/19 [===>..........................] - ETA: 1s - loss: 0.8463 - accuracy: 0.8444

 4/19 [=====>........................] - ETA: 1s - loss: 0.7488 - accuracy: 0.8667































 - ETA: 0s - loss: 0.4649 - accuracy: 0.9248



Epoch 14/15


 1/19 [>.............................] - ETA: 0s - loss: 0.3570 - accuracy: 0.9355

 2/19 [==>...........................] - ETA: 0s - loss: 0.3602 - accuracy: 0.9346

 3/19 [===>..........................] - ETA: 1s - loss: 0.3699 - accuracy: 0.9355

 4/19 [=====>........................] - ETA: 1s - loss: 0.3886 - accuracy: 0.9326











KeyboardInterrupt: 

In [None]:
def production(model_name, data):
    real_data, le = labelEncoding(model_name, data)
    real_y = real_data.result
    real_x = real_data.drop('result', axis=1)
#     print(real_y)
#     print(real_x)

    clf = load('result/' + model_name + '/' + model_name + '_model.joblib')
    yy_pred = clf.predict(real_x)
    pred_label = le.inverse_transform(yy_pred)
    real_label = le.inverse_transform(real_y)

    return pred_label, real_label

In [None]:
# Production
real_data = pd.read_csv('./dataset/kdd_prediction.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
real_data = real_data.head(1)

pred_label, real_label = production(model_name, real_data)
print(pred_label, real_label)