In [1]:
"""keras_model.py: 

This model is the implementation of Gaussian Naive Bayes Classification of KDD datasets.
"""

__author__ = 'Youngseok Joung'
__copyright__ = "Copyright 2007, The Cogent Project"
__credits__ = ["Youngseok Joung"]
__license__ = "GPL"
__version__ = "1.0.1"
__maintainer__ = "Youngseok Joung"
__email__ = "none"
__status__ = "Production"


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as splitter
from keras import regularizers
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import cProfile
import pstats
import os
import sys
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import pickle
from joblib import dump, load

In [2]:
def labelEncoding(model_name, data):
    for column in data.columns:
        # If the data type of the cell is 'object'(Categorical), it will be transformed as a numerical 
        if data[column].dtype == type(object):
            le_file_path = 'result/' + model_name + '/' + model_name + '_' + column + '_encoder.pkl'
            print(os.path.exists(le_file_path))
            if os.path.exists(le_file_path):
                pkl_file = open(le_file_path, 'rb')
                le = pickle.load(pkl_file) 
                pkl_file.close()
                data[column] = le.transform(data[column])            
            else:
                le = LabelEncoder()
                data[column] = le.fit_transform(data[column])
                #exporting the departure encoder
                output = open(le_file_path, 'wb')
                pickle.dump(le, output)
                output.close()
            if column == 'result':
                le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
                print(le_name_mapping)
                
    return data, le

In [3]:
def Preprocessing(model_name, data):
    y = data.result
    x = data.drop('result', axis=1)
    
    # Preprocessing: Split 7:3 Train: Test
    x_train, x_test, y_train, y_test = splitter(x, y, test_size=0.3)
    
    return x_train, x_test, y_train, y_test

In [4]:
def train_and_test(model_name, x_train, x_test, y_train, y_test):
    # Profile: Start 
    profile = cProfile.Profile()
    profile.enable()
    
    # train and test
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    val_indices = 200
    x_val = x_train[-val_indices:]
    y_val = y_train[-val_indices:]
    # train and test
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_dim=x_train.shape[1], kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(5, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=15, batch_size=512, validation_data=(x_val, y_val))
    y_pred = model.predict(x_test)

    y_pred = np.argmax(y_pred, axis=1)
    y_test = np.argmax(y_test, axis=1)

    # Profile: End 
    profile.disable()
    profile.dump_stats('output.prof')
    stream = open('result/' + model_name + '/' + model_name + '_profiling.txt', 'w')
    stats = pstats.Stats('output.prof', stream=stream)
    stats.sort_stats('cumtime')
    stats.print_stats()
    os.remove('output.prof')
    
    # Estimation: Confusion Matrix & classification-report 
    _confusion_matrix = confusion_matrix(y_test, y_pred)
    _classification_report = classification_report(y_test, y_pred)
    
    with open('result/' + model_name + '/' + model_name + '_output.txt', 'w') as f:
        f.write("\n---Confusion Matrix---\n")
        f.write(np.array2string(_confusion_matrix, separator=', '))
        f.write("\n---Classification Report---\n")
        f.write(_classification_report)

    # Freezing model for production 
    dump(model, 'result/' + model_name + '/' + model_name + '_model.joblib') 
    
    return _confusion_matrix, _classification_report

In [5]:
model_name = 'keras_kdd'
# model_name = 'keras_nsl_kdd'
dataset_name = 'kdd_prediction'
# dataset_name = 'kdd_prediction_NSL'

data = pd.read_csv('./dataset/' + dataset_name + '.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
print(data.head)

<bound method NDFrame.head of        duration protocol_type   service  flag  src_bytes  dst_bytes      land  \
0     -0.106216           tcp      smtp    SF  -0.003736  -0.040352 -0.011722   
1     -0.107850           tcp      http    SF  -0.004276  -0.036652 -0.011722   
2     -0.107850           tcp      http    SF  -0.004262   0.005956 -0.011722   
3     -0.107033           tcp       ftp    SF  -0.003699  -0.006723 -0.011722   
4     -0.107850           udp  domain_u    SF  -0.004368  -0.044940 -0.011722   
...         ...           ...       ...   ...        ...        ...       ...   
13446 -0.107850           tcp      http    SF  -0.004225   0.049683 -0.011722   
13447 -0.107850           tcp      nntp  RSTO  -0.004392  -0.047028 -0.011722   
13448 -0.107033           tcp      smtp    SF  -0.003734  -0.041519 -0.011722   
13449 -0.107850           tcp      nnsp   REJ  -0.004392  -0.047028 -0.011722   
13450 -0.107850           tcp      link    S0  -0.004392  -0.047028 -0.011722  

In [6]:
# labeling
data, _ = labelEncoding(model_name, data)

True
True
True
True
{'dos': 0, 'normal': 1, 'probe': 2, 'r2l': 3, 'u2r': 4}




In [7]:
# Preprocessing
x_train, x_test, y_train, y_test = Preprocessing(model_name, data)

In [8]:
# Train and Test
cm, cr = train_and_test(model_name, x_train, x_test, y_train, y_test)
print('\n-----Confusion Matrix-----\n')
print(cm)
print('\n-----Classification Report-----\n')
print(cr)

Epoch 1/15


 1/19 [>.............................] - ETA: 0s - loss: 8.0970 - accuracy: 0.2461

 2/19 [==>...........................] - ETA: 0s - loss: 24.9630 - accuracy: 0.4014

 3/19 [===>..........................] - ETA: 1s - loss: 19.0564 - accuracy: 0.3848

 4/19 [=====>........................] - ETA: 1s - loss: 16.0372 - accuracy: 0.3833

































Epoch 2/15
 1/19 [>.............................] - ETA: 0s - loss: 5.0132 - accuracy: 0.5762

 2/19 [==>...........................] - ETA: 0s - loss: 4.9561 - accuracy: 0.5811

 3/19 [===>..........................] - ETA: 1s - loss: 4.8927 - accuracy: 0.5703

 4/19 [=====>........................] - ETA: 1s - loss: 4.8592 - accuracy: 0.5815





































Epoch 3/15
 1/19 [>.............................] - ETA: 0s - loss: 3.7520 - accuracy: 0.7148

 2/19 [==>...........................] - ETA: 0s - loss: 3.6509 - accuracy: 0.7432

 3/19 [===>..........................] - ETA: 1s - loss: 3.5861 - accuracy: 0.7533

 4/19 [=====>........................] - ETA: 1s - loss: 3.6422 - accuracy: 0.7471

































Epoch 4/15
 1/19 [>.............................] - ETA: 0s - loss: 2.7804 - accuracy: 0.7617

 2/19 [==>...........................] - ETA: 0s - loss: 2.7564 - accuracy: 0.8027

 3/19 [===>..........................] - ETA: 1s - loss: 2.7098 - accuracy: 0.7982

 4/19 [=====>........................] - ETA: 1s - loss: 2.6953 - accuracy: 0.8047

































Epoch 5/15
 1/19 [>.............................] - ETA: 0s - loss: 1.8266 - accuracy: 0.9062

 2/19 [==>...........................] - ETA: 0s - loss: 1.8465 - accuracy: 0.9033

 3/19 [===>..........................] - ETA: 1s - loss: 1.8472 - accuracy: 0.9004

 4/19 [=====>........................] - ETA: 1s - loss: 1.8498 - accuracy: 0.9009

































Epoch 6/15
 1/19 [>.............................] - ETA: 0s - loss: 2.1199 - accuracy: 0.8086

 2/19 [==>...........................] - ETA: 0s - loss: 2.1770 - accuracy: 0.6494

 3/19 [===>..........................] - ETA: 1s - loss: 1.9615 - accuracy: 0.7122

 4/19 [=====>........................] - ETA: 1s - loss: 1.8360 - accuracy: 0.7603

































Epoch 7/15
 1/19 [>.............................] - ETA: 0s - loss: 1.1415 - accuracy: 0.8809

 2/19 [==>...........................] - ETA: 0s - loss: 1.0748 - accuracy: 0.8955

 3/19 [===>..........................] - ETA: 1s - loss: 1.0258 - accuracy: 0.9056

 4/19 [=====>........................] - ETA: 1s - loss: 1.0080 - accuracy: 0.9077

































Epoch 8/15


 1/19 [>.............................] - ETA: 0s - loss: 0.7582 - accuracy: 0.9199

 2/19 [==>...........................] - ETA: 0s - loss: 0.7362 - accuracy: 0.9258

 3/19 [===>..........................] - ETA: 1s - loss: 0.7311 - accuracy: 0.9251

 4/19 [=====>........................] - ETA: 1s - loss: 0.7115 - accuracy: 0.9292

































Epoch 9/15
 1/19 [>.............................] - ETA: 0s - loss: 0.5485 - accuracy: 0.9355

 2/19 [==>...........................] - ETA: 0s - loss: 0.5583 - accuracy: 0.9355

 3/19 [===>..........................] - ETA: 1s - loss: 0.5601 - accuracy: 0.9342

 4/19 [=====>........................] - ETA: 1s - loss: 0.5527 - accuracy: 0.9370

































Epoch 10/15


 1/19 [>.............................] - ETA: 0s - loss: 0.7734 - accuracy: 0.8555

 2/19 [==>...........................] - ETA: 0s - loss: 0.7652 - accuracy: 0.8555

 3/19 [===>..........................] - ETA: 1s - loss: 0.7080 - accuracy: 0.8776

 4/19 [=====>........................] - ETA: 1s - loss: 0.6428 - accuracy: 0.8921

































Epoch 11/15


 1/19 [>.............................] - ETA: 0s - loss: 0.4522 - accuracy: 0.9316

 2/19 [==>...........................] - ETA: 0s - loss: 0.4365 - accuracy: 0.9385

 3/19 [===>..........................] - ETA: 1s - loss: 0.4130 - accuracy: 0.9447

 4/19 [=====>........................] - ETA: 1s - loss: 0.4186 - accuracy: 0.9409

































Epoch 12/15


 1/19 [>.............................] - ETA: 0s - loss: 0.3962 - accuracy: 0.9336

 2/19 [==>...........................] - ETA: 0s - loss: 0.3915 - accuracy: 0.9336

 3/19 [===>..........................] - ETA: 1s - loss: 0.3985 - accuracy: 0.9310

 4/19 [=====>........................] - ETA: 1s - loss: 0.3808 - accuracy: 0.9355

































Epoch 13/15
 1/19 [>.............................] - ETA: 0s - loss: 0.3209 - accuracy: 0.9473

 2/19 [==>...........................] - ETA: 0s - loss: 0.3306 - accuracy: 0.9443

 3/19 [===>..........................] - ETA: 1s - loss: 0.3379 - accuracy: 0.9453

 4/19 [=====>........................] - ETA: 1s - loss: 0.4066 - accuracy: 0.9248

































Epoch 14/15
 1/19 [>.............................] - ETA: 0s - loss: 0.4666 - accuracy: 0.9258

 2/19 [==>...........................] - ETA: 0s - loss: 0.7254 - accuracy: 0.8809

 3/19 [===>..........................] - ETA: 1s - loss: 0.7352 - accuracy: 0.8646

 4/19 [=====>........................] - ETA: 1s - loss: 0.6604 - accuracy: 0.8799



























KeyboardInterrupt: 

In [None]:
def production(model_name, data):
    real_data, le = labelEncoding(model_name, data)
    real_y = real_data.result
    real_x = real_data.drop('result', axis=1)
#     print(real_y)
#     print(real_x)

    clf = load('result/' + model_name + '/' + model_name + '_model.joblib')
    yy_pred = clf.predict(real_x)
    pred_label = le.inverse_transform(yy_pred)
    real_label = le.inverse_transform(real_y)

    return pred_label, real_label

In [None]:
# Production
real_data = pd.read_csv('./dataset/kdd_prediction.csv', delimiter=',', dtype={'protocol_type': str, 'service': str, 'flag': str, 'result': str})
real_data = real_data.head(1)

pred_label, real_label = production(model_name, real_data)
print(pred_label, real_label)