# 04_Autoencoder followed by FFNN

### In this section we will apply semi-supervised learning mechanism to the network intrusion detection task, try to find best hyper-parameters and get a better performance. First we use autoencoder to do unsupervised learning with unlabeled data, and then use FFNN to do classification.

### We first start by importing the required packages.

In [1]:
import pickle
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import load_model
import csv
from tensorflow.keras.utils import plot_model
from matplotlib import pyplot
from tensorflow.keras.models import Sequential
import numpy
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### fix random seed for reproducibility

In [2]:
seed=7
numpy.random.seed(seed)

### load dataset

In [3]:
train=pd.read_csv("CICIDS2017_Wed_train.zip",header=None)
test=pd.read_csv("CICIDS2017_Wed_test.zip",header=None)

### split training dataset into an unlabeled one and a labeled one to do autoencoder

In [4]:
train75,train25=train_test_split(train,test_size=0.25,random_state=42)

### split into input and output variables

In [5]:
train75x=train75.drop([78],axis=1)

In [6]:
train25x=train25.drop([78],axis=1)

In [7]:
train25y=train25[78]

In [8]:
testx=test.drop([78],axis=1)

In [9]:
testy=test[78]

In [10]:
n_inputs=train75x.shape[1]

### define encoder

In [11]:
ini=Input(shape=(n_inputs,))

### encoder level 1

In [12]:
e=Dense(n_inputs*2)(ini)
e=BatchNormalization()(e)
e=LeakyReLU()(e)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


### encoder level 2

In [13]:
e=Dense(n_inputs)(e)
e=BatchNormalization()(e)
e=LeakyReLU()(e)

### bottleneck

In [14]:
n_bottleneck=n_inputs
bottleneck=Dense(n_bottleneck)(e)

### define decoder, level 1

In [15]:
d=Dense(n_inputs)(bottleneck)
d=BatchNormalization()(d)
d=LeakyReLU()(d)

### decoder level 2

In [16]:
d=Dense(n_inputs*2)(d)
d=BatchNormalization()(d)
d=LeakyReLU()(d)

### output layer

In [17]:
output=Dense(n_inputs,activation='linear')(d)

### define autoencoder model

In [18]:
automodel=Model(inputs=ini,outputs=output)

### compile autoencoder model

In [19]:
automodel.compile(optimizer='adam',loss='mse')

### fit the autoencoder model to reconstruct input

In [20]:
history=automodel.fit(train75x,train75x,epochs=1,batch_size=64,verbose=2,validation_data=(testx,testx))

Train on 362988 samples, validate on 207422 samples
362988/362988 - 20s - loss: 0.1360 - val_loss: 0.2517


### define an encoder model (without the decoder)

In [21]:
encoder=Model(inputs=ini,outputs=bottleneck)

### save the encoder to file

In [22]:
encoder.save('encoder.h5')

### encode the 25% of the train data

In [24]:
train25xen=encoder.predict(train25x)

### encode the test data

In [25]:
testxen=encoder.predict(testx)

### hyper-parameter tuning 

In [26]:
# Function to create model, required for KerasClassifier
def create_model(optimizer='rmsprop'):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=78, activation='relu')) 
    model.add(Dense(8,  activation='relu')) 
    model.add(Dense(6,  activation='softmax'))
    # Compile model
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) 
    return model
train25y = train25[78]
# create model
model = KerasClassifier(build_fn=create_model)
# grid search epochs, batch size and optimizer
optimizers = ['rmsprop', 'adam']
epochs = numpy.array([1, 2, 3])
batches = numpy.array([64,128])
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(train25xen, train25y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))




Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Best: 0.980710 using {'batch_size': 64, 'epochs': 3, 'optimizer': 'adam'}


### from above we find the best parameter are:'batch_size': 64, 'epochs': 3, 'optimizer': 'adam'. However, the results of last run was:'batch_size': 64, 'epochs': 3, 'optimizer': 'rmsprop',we finally chose the previous one. And below is the 25% training dataset which is labeled trained with the best parameters.

In [27]:
model = Sequential()
model.add(Dense(12, input_dim=78, activation='relu'))
model.add(Dense(8,  activation='relu'))
model.add(Dense(6,  activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(train25xen, train25y, epochs=3, batch_size=64)
trainscores = model.evaluate(train25xen, train25y)
print("\n%s: %.2f%%" % (model.metrics_names[1], trainscores[1]*100))
    #testscores = model.evaluate(testxen, testy)

Epoch 1/3
Epoch 2/3
Epoch 3/3

acc: 98.26%


In [28]:
predtest = model.predict(testxen)
predtesty = predtest.argmax(axis=1)
pickle.dump(predtesty, open( "predtest.p", "wb" ) )

### reference:
    Autoencoder:
        https://yishuihancheng.blog.csdn.net/article/details/112292291?utm_term=%E8%87%AA%E5%8A%A8%E7%BC%96%E7%A0%81%E5%99%A8%E7%89%B9%E5%BE%81%E6%8F%90%E5%8F%96&utm_medium=distribute.pc_aggpage_search_result.none-task-blog-2~all~sobaiduweb~default-1-112292291&spm=3001.4430
    Hyper-parameter tuning: 
        https://cnbeining.github.io/deep-learning-with-python-cn/3-multi-layer-perceptrons/ch9-use-keras-models-with-scikit-learn-for-general-machine-learning.html
    source code:
        https://github.com/sgamage2/dl_ids_survey
    NN:
        https://dsbristol.github.io/dst/coursebook/09.html