In [1]:
"""keras_model.py: 

This model is the implementation of Gaussian Naive Bayes Classification of KDD datasets.
"""

__author__ = 'Youngseok Joung'
__copyright__ = "Copyright 2007, The Cogent Project"
__credits__ = ["Youngseok Joung"]
__license__ = "GPL"
__version__ = "1.0.1"
__maintainer__ = "Youngseok Joung"
__email__ = "none"
__status__ = "Production"


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as splitter
from keras import regularizers
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import cProfile
import pstats
import os
import sys
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import pickle
from joblib import dump, load

In [2]:
def labelEncoding(model_name, data):
    for column in data.columns:
        # If the data type of the cell is 'object'(Categorical), it will be transformed as a numerical 
        if data[column].dtype == type(object):
            le_file_path = 'result/' + model_name + '/' + model_name + '_' + column + '_encoder.pkl'
            print(os.path.exists(le_file_path))
            if os.path.exists(le_file_path):
                pkl_file = open(le_file_path, 'rb')
                le = pickle.load(pkl_file) 
                pkl_file.close()
                data[column] = le.transform(data[column])            
            else:
                le = LabelEncoder()
                data[column] = le.fit_transform(data[column])
                #exporting the departure encoder
                output = open(le_file_path, 'wb')
                pickle.dump(le, output)
                output.close()
            if column == 'result':
                le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
                print(le_name_mapping)
                
    return data, le

In [3]:
def Preprocessing(model_name, data):
    y = data.result
    x = data.drop('result', axis=1)
    
    # Preprocessing: Split 7:3 Train: Test
    x_train, x_test, y_train, y_test = splitter(x, y, test_size=0.3)
    
    return x_train, x_test, y_train, y_test

In [4]:
def train_and_test(model_name, x_train, x_test, y_train, y_test):
    # Profile: Start 
    profile = cProfile.Profile()
    profile.enable()
    
    # train and test
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    val_indices = 200
    x_val = x_train[-val_indices:]
    y_val = y_train[-val_indices:]
    # train and test
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_dim=x_train.shape[1], kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(5, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=15, batch_size=512, validation_data=(x_val, y_val))
    y_pred = model.predict(x_test)

    y_pred = np.argmax(y_pred, axis=1)
    y_test = np.argmax(y_test, axis=1)

    # Profile: End 
    profile.disable()
    profile.dump_stats('output.prof')
    stream = open('result/' + model_name + '/' + model_name + '_profiling.txt', 'w')
    stats = pstats.Stats('output.prof', stream=stream)
    stats.sort_stats('cumtime')
    stats.print_stats()
    os.remove('output.prof')
    
    # Estimation: Confusion Matrix & classification-report 
    _confusion_matrix = confusion_matrix(y_test, y_pred)
    _classification_report = classification_report(y_test, y_pred)
    
    with open('result/' + model_name + '/' + model_name + '_output.txt', 'w') as f:
        f.write("\n---Confusion Matrix---\n")
        f.write(np.array2string(_confusion_matrix, separator=', '))
        f.write("\n---Classification Report---\n")
        f.write(_classification_report)

    # Freezing model for production 
    dump(model, 'result/' + model_name + '/' + model_name + '_model.joblib') 
    
    return _confusion_matrix, _classification_report

In [5]:
model_name = 'keras_kdd'
# model_name = 'keras_nsl_kdd'
dataset_name = 'kdd_prediction'
# dataset_name = 'kdd_prediction_NSL'

data = pd.read_csv('./dataset/' + dataset_name + '.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
print(data.head)

<bound method NDFrame.head of        duration protocol_type   service  flag  src_bytes  dst_bytes      land  \
0     -0.106216           tcp      smtp    SF  -0.003736  -0.040352 -0.011722   
1     -0.107850           tcp      http    SF  -0.004276  -0.036652 -0.011722   
2     -0.107850           tcp      http    SF  -0.004262   0.005956 -0.011722   
3     -0.107033           tcp       ftp    SF  -0.003699  -0.006723 -0.011722   
4     -0.107850           udp  domain_u    SF  -0.004368  -0.044940 -0.011722   
...         ...           ...       ...   ...        ...        ...       ...   
13446 -0.107850           tcp      http    SF  -0.004225   0.049683 -0.011722   
13447 -0.107850           tcp      nntp  RSTO  -0.004392  -0.047028 -0.011722   
13448 -0.107033           tcp      smtp    SF  -0.003734  -0.041519 -0.011722   
13449 -0.107850           tcp      nnsp   REJ  -0.004392  -0.047028 -0.011722   
13450 -0.107850           tcp      link    S0  -0.004392  -0.047028 -0.011722  

In [6]:
# labeling
data, _ = labelEncoding(model_name, data)

True
True
True
True
{'dos': 0, 'normal': 1, 'probe': 2, 'r2l': 3, 'u2r': 4}


In [7]:
# Preprocessing
x_train, x_test, y_train, y_test = Preprocessing(model_name, data)

In [8]:
# Train and Test
cm, cr = train_and_test(model_name, x_train, x_test, y_train, y_test)
print('\n-----Confusion Matrix-----\n')
print(cm)
print('\n-----Classification Report-----\n')
print(cr)

Epoch 1/15


 1/19 [>.............................] - ETA: 21s - loss: 8.2621 - accuracy: 0.0234

 2/19 [==>...........................] - ETA: 1s - loss: 13.3132 - accuracy: 0.2793

 3/19 [===>..........................] - ETA: 1s - loss: 11.4331 - accuracy: 0.3086

 4/19 [=====>........................] - ETA: 1s - loss: 10.3450 - accuracy: 0.3247

































Epoch 2/15
 1/19 [>.............................] - ETA: 1s - loss: 5.0419 - accuracy: 0.3672

 2/19 [==>...........................] - ETA: 1s - loss: 4.9399 - accuracy: 0.3711

 3/19 [===>..........................] - ETA: 1s - loss: 4.8679 - accuracy: 0.4290

 4/19 [=====>........................] - ETA: 1s - loss: 4.7894 - accuracy: 0.4595

































Epoch 3/15


 1/19 [>.............................] - ETA: 2s - loss: 3.5328 - accuracy: 0.7520

 2/19 [==>...........................] - ETA: 2s - loss: 3.4128 - accuracy: 0.7441

 3/19 [===>..........................] - ETA: 2s - loss: 3.3364 - accuracy: 0.7220

 4/19 [=====>........................] - ETA: 1s - loss: 3.3178 - accuracy: 0.7271

































Epoch 4/15


 1/19 [>.............................] - ETA: 2s - loss: 2.0916 - accuracy: 0.8691

 2/19 [==>...........................] - ETA: 2s - loss: 2.1403 - accuracy: 0.8379

 3/19 [===>..........................] - ETA: 1s - loss: 2.3250 - accuracy: 0.8249

 4/19 [=====>........................] - ETA: 1s - loss: 2.3769 - accuracy: 0.7500

































Epoch 5/15


 1/19 [>.............................] - ETA: 2s - loss: 1.7098 - accuracy: 0.8477

 2/19 [==>...........................] - ETA: 1s - loss: 1.6105 - accuracy: 0.8740

 3/19 [===>..........................] - ETA: 1s - loss: 1.5705 - accuracy: 0.8789

 4/19 [=====>........................] - ETA: 1s - loss: 1.5664 - accuracy: 0.8857

































Epoch 6/15


 1/19 [>.............................] - ETA: 2s - loss: 1.0473 - accuracy: 0.9375

 2/19 [==>...........................] - ETA: 2s - loss: 1.0805 - accuracy: 0.9268

 3/19 [===>..........................] - ETA: 2s - loss: 1.1485 - accuracy: 0.8906

 4/19 [=====>........................] - ETA: 1s - loss: 1.2523 - accuracy: 0.8530

































Epoch 7/15


 1/19 [>.............................] - ETA: 1s - loss: 0.7542 - accuracy: 0.9258

 2/19 [==>...........................] - ETA: 1s - loss: 0.8039 - accuracy: 0.9209

 3/19 [===>..........................] - ETA: 1s - loss: 0.8686 - accuracy: 0.9036

 4/19 [=====>........................] - ETA: 1s - loss: 0.9131 - accuracy: 0.8838

































Epoch 8/15


 1/19 [>.............................] - ETA: 1s - loss: 0.5709 - accuracy: 0.9336

 2/19 [==>...........................] - ETA: 1s - loss: 0.5524 - accuracy: 0.9365

 3/19 [===>..........................] - ETA: 1s - loss: 0.5499 - accuracy: 0.9329

 4/19 [=====>........................] - ETA: 1s - loss: 0.5486 - accuracy: 0.9360

































Epoch 9/15


 1/19 [>.............................] - ETA: 1s - loss: 0.4383 - accuracy: 0.9492

 2/19 [==>...........................] - ETA: 2s - loss: 0.4541 - accuracy: 0.9453

 3/19 [===>..........................] - ETA: 1s - loss: 0.4611 - accuracy: 0.9434

 4/19 [=====>........................] - ETA: 1s - loss: 0.4720 - accuracy: 0.9399

































Epoch 10/15


 1/19 [>.............................] - ETA: 2s - loss: 0.6661 - accuracy: 0.9160

 2/19 [==>...........................] - ETA: 2s - loss: 0.5675 - accuracy: 0.9229

 3/19 [===>..........................] - ETA: 1s - loss: 0.5196 - accuracy: 0.9271

 4/19 [=====>........................] - ETA: 1s - loss: 0.4817 - accuracy: 0.9321

































Epoch 11/15


 1/19 [>.............................] - ETA: 2s - loss: 0.4025 - accuracy: 0.9316

 2/19 [==>...........................] - ETA: 1s - loss: 0.3964 - accuracy: 0.9385

 3/19 [===>..........................] - ETA: 1s - loss: 0.3890 - accuracy: 0.9408

 4/19 [=====>........................] - ETA: 1s - loss: 0.3859 - accuracy: 0.9409

































Epoch 12/15


 1/19 [>.............................] - ETA: 2s - loss: 0.3623 - accuracy: 0.9375

 2/19 [==>...........................] - ETA: 1s - loss: 0.3515 - accuracy: 0.9443

 3/19 [===>..........................] - ETA: 1s - loss: 0.3483 - accuracy: 0.9440

 4/19 [=====>........................] - ETA: 1s - loss: 0.3454 - accuracy: 0.9458

































Epoch 13/15


 1/19 [>.............................] - ETA: 2s - loss: 0.5021 - accuracy: 0.9375

 2/19 [==>...........................] - ETA: 1s - loss: 0.7714 - accuracy: 0.8408

 3/19 [===>..........................] - ETA: 1s - loss: 0.7653 - accuracy: 0.8366

KeyboardInterrupt: 

In [None]:
def production(model_name, data):
    real_data, le = labelEncoding(model_name, data)
    real_y = real_data.result
    real_x = real_data.drop('result', axis=1)
#     print(real_y)
#     print(real_x)

    clf = load('result/' + model_name + '/' + model_name + '_model.joblib')
    yy_pred = clf.predict(real_x)
    pred_label = le.inverse_transform(yy_pred)
    real_label = le.inverse_transform(real_y)

    return pred_label, real_label

In [None]:
# Production
real_data = pd.read_csv('./dataset/kdd_prediction.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
real_data = real_data.head(1)

pred_label, real_label = production(model_name, real_data)
print(pred_label, real_label)