In [1]:
"""keras_model.py: 

This model is the implementation of Gaussian Naive Bayes Classification of KDD datasets.
"""

__author__ = 'Youngseok Joung'
__copyright__ = "Copyright 2007, The Cogent Project"
__credits__ = ["Youngseok Joung"]
__license__ = "GPL"
__version__ = "1.0.1"
__maintainer__ = "Youngseok Joung"
__email__ = "none"
__status__ = "Production"


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as splitter
from keras import regularizers
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import cProfile
import pstats
import os
import sys
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import pickle
from joblib import dump, load

In [2]:
def labelEncoding(model_name, data):
    for column in data.columns:
        # If the data type of the cell is 'object'(Categorical), it will be transformed as a numerical 
        if data[column].dtype == type(object):
            le_file_path = 'result/' + model_name + '/' + model_name + '_' + column + '_encoder.pkl'
            print(os.path.exists(le_file_path))
            if os.path.exists(le_file_path):
                pkl_file = open(le_file_path, 'rb')
                le = pickle.load(pkl_file) 
                pkl_file.close()
                data[column] = le.transform(data[column])            
            else:
                le = LabelEncoder()
                data[column] = le.fit_transform(data[column])
                #exporting the departure encoder
                output = open(le_file_path, 'wb')
                pickle.dump(le, output)
                output.close()
            if column == 'result':
                le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
                print(le_name_mapping)
                
    return data, le

In [3]:
def Preprocessing(model_name, data):
    y = data.result
    x = data.drop('result', axis=1)
    
    # Preprocessing: Split 7:3 Train: Test
    x_train, x_test, y_train, y_test = splitter(x, y, test_size=0.3)
    
    return x_train, x_test, y_train, y_test

In [4]:
def train_and_test(model_name, x_train, x_test, y_train, y_test):
    # Profile: Start 
    profile = cProfile.Profile()
    profile.enable()
    
    # train and test
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    val_indices = 200
    x_val = x_train[-val_indices:]
    y_val = y_train[-val_indices:]
    # train and test
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_dim=x_train.shape[1], kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(5, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=15, batch_size=512, validation_data=(x_val, y_val))
    y_pred = model.predict(x_test)

    y_pred = np.argmax(y_pred, axis=1)
    y_test = np.argmax(y_test, axis=1)

    # Profile: End 
    profile.disable()
    profile.dump_stats('output.prof')
    stream = open('result/' + model_name + '/' + model_name + '_profiling.txt', 'w')
    stats = pstats.Stats('output.prof', stream=stream)
    stats.sort_stats('cumtime')
    stats.print_stats()
    os.remove('output.prof')
    
    # Estimation: Confusion Matrix & classification-report 
    _confusion_matrix = confusion_matrix(y_test, y_pred)
    _classification_report = classification_report(y_test, y_pred)
    
    with open('result/' + model_name + '/' + model_name + '_output.txt', 'w') as f:
        f.write("\n---Confusion Matrix---\n")
        f.write(np.array2string(_confusion_matrix, separator=', '))
        f.write("\n---Classification Report---\n")
        f.write(_classification_report)

    # Freezing model for production 
    dump(model, 'result/' + model_name + '/' + model_name + '_model.joblib') 
    
    return _confusion_matrix, _classification_report

In [5]:
model_name = 'keras_kdd'
# model_name = 'keras_nsl_kdd'
dataset_name = 'kdd_prediction'
# dataset_name = 'kdd_prediction_NSL'

data = pd.read_csv('./dataset/' + dataset_name + '.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
print(data.head)

<bound method NDFrame.head of        duration protocol_type   service  flag  src_bytes  dst_bytes      land  \
0     -0.106216           tcp      smtp    SF  -0.003736  -0.040352 -0.011722   
1     -0.107850           tcp      http    SF  -0.004276  -0.036652 -0.011722   
2     -0.107850           tcp      http    SF  -0.004262   0.005956 -0.011722   
3     -0.107033           tcp       ftp    SF  -0.003699  -0.006723 -0.011722   
4     -0.107850           udp  domain_u    SF  -0.004368  -0.044940 -0.011722   
...         ...           ...       ...   ...        ...        ...       ...   
13446 -0.107850           tcp      http    SF  -0.004225   0.049683 -0.011722   
13447 -0.107850           tcp      nntp  RSTO  -0.004392  -0.047028 -0.011722   
13448 -0.107033           tcp      smtp    SF  -0.003734  -0.041519 -0.011722   
13449 -0.107850           tcp      nnsp   REJ  -0.004392  -0.047028 -0.011722   
13450 -0.107850           tcp      link    S0  -0.004392  -0.047028 -0.011722  

In [6]:
# labeling
data, _ = labelEncoding(model_name, data)

True
True
True
True
{'dos': 0, 'normal': 1, 'probe': 2, 'r2l': 3, 'u2r': 4}


In [7]:
# Preprocessing
x_train, x_test, y_train, y_test = Preprocessing(model_name, data)

In [8]:
# Train and Test
cm, cr = train_and_test(model_name, x_train, x_test, y_train, y_test)
print('\n-----Confusion Matrix-----\n')
print(cm)
print('\n-----Classification Report-----\n')
print(cr)

Epoch 1/15


 1/19 [>.............................] - ETA: 22s - loss: 7.9794 - accuracy: 0.2051

 2/19 [==>...........................] - ETA: 1s - loss: 30.7762 - accuracy: 0.3750

 3/19 [===>..........................] - ETA: 1s - loss: 23.0001 - accuracy: 0.3796

 4/19 [=====>........................] - ETA: 1s - loss: 19.0111 - accuracy: 0.3818

































Epoch 2/15
 1/19 [>.............................] - ETA: 1s - loss: 5.2795 - accuracy: 0.3340

 2/19 [==>...........................] - ETA: 1s - loss: 5.1928 - accuracy: 0.3496

 3/19 [===>..........................] - ETA: 1s - loss: 5.1230 - accuracy: 0.4421

 4/19 [=====>........................] - ETA: 1s - loss: 5.0695 - accuracy: 0.4253

































Epoch 3/15
 1/19 [>.............................] - ETA: 1s - loss: 3.7448 - accuracy: 0.5859

 2/19 [==>...........................] - ETA: 1s - loss: 3.6815 - accuracy: 0.6318

 3/19 [===>..........................] - ETA: 1s - loss: 3.7616 - accuracy: 0.6751

 4/19 [=====>........................] - ETA: 1s - loss: 3.7502 - accuracy: 0.6533

































Epoch 4/15


 1/19 [>.............................] - ETA: 1s - loss: 3.3709 - accuracy: 0.8223

 2/19 [==>...........................] - ETA: 1s - loss: 3.2648 - accuracy: 0.8105

 3/19 [===>..........................] - ETA: 1s - loss: 3.1655 - accuracy: 0.8223

 4/19 [=====>........................] - ETA: 1s - loss: 3.0820 - accuracy: 0.7690

































Epoch 5/15
 1/19 [>.............................] - ETA: 1s - loss: 2.0837 - accuracy: 0.8750

 2/19 [==>...........................] - ETA: 1s - loss: 2.1413 - accuracy: 0.8359

 3/19 [===>..........................] - ETA: 1s - loss: 2.4106 - accuracy: 0.8294

 4/19 [=====>........................] - ETA: 1s - loss: 2.3826 - accuracy: 0.7676

































Epoch 6/15
 1/19 [>.............................] - ETA: 1s - loss: 1.5457 - accuracy: 0.9219

 2/19 [==>...........................] - ETA: 1s - loss: 1.4770 - accuracy: 0.9287

 3/19 [===>..........................] - ETA: 1s - loss: 1.4650 - accuracy: 0.9271

 4/19 [=====>........................] - ETA: 1s - loss: 1.4613 - accuracy: 0.9253

































Epoch 7/15
 1/19 [>.............................] - ETA: 1s - loss: 1.1255 - accuracy: 0.8906

 2/19 [==>...........................] - ETA: 1s - loss: 1.1083 - accuracy: 0.9082

 3/19 [===>..........................] - ETA: 1s - loss: 1.0892 - accuracy: 0.9167

 4/19 [=====>........................] - ETA: 1s - loss: 1.1017 - accuracy: 0.9155

































Epoch 8/15
 1/19 [>.............................] - ETA: 1s - loss: 0.8460 - accuracy: 0.9277

 2/19 [==>...........................] - ETA: 1s - loss: 0.8607 - accuracy: 0.9336

 3/19 [===>..........................] - ETA: 1s - loss: 0.8729 - accuracy: 0.9310

 4/19 [=====>........................] - ETA: 1s - loss: 0.8442 - accuracy: 0.9297

































Epoch 9/15
 1/19 [>.............................] - ETA: 1s - loss: 0.5594 - accuracy: 0.9355

 2/19 [==>...........................] - ETA: 1s - loss: 0.5726 - accuracy: 0.9365

 3/19 [===>..........................] - ETA: 1s - loss: 0.5647 - accuracy: 0.9375

 4/19 [=====>........................] - ETA: 1s - loss: 0.5567 - accuracy: 0.9385

































Epoch 10/15


 1/19 [>.............................] - ETA: 1s - loss: 0.5950 - accuracy: 0.9375

 2/19 [==>...........................] - ETA: 1s - loss: 0.5573 - accuracy: 0.9307

 3/19 [===>..........................] - ETA: 1s - loss: 0.5329 - accuracy: 0.9375

 4/19 [=====>........................] - ETA: 1s - loss: 0.5273 - accuracy: 0.9346

































Epoch 11/15


 1/19 [>.............................] - ETA: 1s - loss: 0.4860 - accuracy: 0.9355

 2/19 [==>...........................] - ETA: 1s - loss: 0.4604 - accuracy: 0.9404

 3/19 [===>..........................] - ETA: 1s - loss: 0.4350 - accuracy: 0.9447

 4/19 [=====>........................] - ETA: 1s - loss: 0.4243 - accuracy: 0.9458

































Epoch 12/15
 1/19 [>.............................] - ETA: 1s - loss: 0.5104 - accuracy: 0.9316

 2/19 [==>...........................] - ETA: 1s - loss: 0.4603 - accuracy: 0.9375

 3/19 [===>..........................] - ETA: 1s - loss: 0.4483 - accuracy: 0.9362

 4/19 [=====>........................] - ETA: 1s - loss: 0.4382 - accuracy: 0.9360

































Epoch 13/15
 1/19 [>.............................] - ETA: 1s - loss: 0.5129 - accuracy: 0.9336

 2/19 [==>...........................] - ETA: 1s - loss: 0.4369 - accuracy: 0.9385

 3/19 [===>..........................] - ETA: 1s - loss: 0.4064 - accuracy: 0.9427

 4/19 [=====>........................] - ETA: 1s - loss: 0.3935 - accuracy: 0.9419

































Epoch 14/15
 1/19 [>.............................] - ETA: 1s - loss: 0.3447 - accuracy: 0.9414

 2/19 [==>...........................] - ETA: 1s - loss: 0.3331 - accuracy: 0.9473

 3/19 [===>..........................] - ETA: 1s - loss: 0.3561 - accuracy: 0.9427

 4/19 [=====>........................] - ETA: 1s - loss: 0.3572 - accuracy: 0.9429

































Epoch 15/15


 1/19 [>.............................] - ETA: 1s - loss: 0.4065 - accuracy: 0.9160

 2/19 [==>...........................] - ETA: 1s - loss: 0.3869 - accuracy: 0.9307

KeyboardInterrupt: 

In [None]:
def production(model_name, data):
    real_data, le = labelEncoding(model_name, data)
    real_y = real_data.result
    real_x = real_data.drop('result', axis=1)
#     print(real_y)
#     print(real_x)

    clf = load('result/' + model_name + '/' + model_name + '_model.joblib')
    yy_pred = clf.predict(real_x)
    pred_label = le.inverse_transform(yy_pred)
    real_label = le.inverse_transform(real_y)

    return pred_label, real_label

In [None]:
# Production
real_data = pd.read_csv('./dataset/kdd_prediction.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
real_data = real_data.head(1)

pred_label, real_label = production(model_name, real_data)
print(pred_label, real_label)