In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd "/content/drive/MyDrive/Thesis/NIDS_MODEL/vians_model"

/content/drive/MyDrive/Thesis/NIDS_MODEL/vians_model


In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
import itertools
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
  
    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8*int(np.sqrt(len(target_names))), 6*int(np.sqrt(len(target_names)))))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.rc('font', size=16)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=-90)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

from gc import freeze


In [None]:
filename = './data/datasample_multiclass_v4.csv'

In [None]:
# Load data
df = pd.read_csv(filename)

# Feature engineering
df['Dst Port'].value_counts()[:20]
common_port = [8080,80,21,53,443,3389,445,22,500,0]
df['Dst Port'] = list(map(lambda x: 10000 if x not in common_port else x, df['Dst Port']))

# Label encode
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder() 
df['Label'] = encoder.fit_transform(df['Label'])

# Check nan values
df.isna().sum().sum()

# Get features
X, y = df.drop(['Label'],1), df.Label

# Train test validation split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)



# KNN

In [None]:
%%time
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier()
clf.fit(X_train, y_train)


CPU times: user 35.8 ms, sys: 926 µs, total: 36.7 ms
Wall time: 42 ms


KNeighborsClassifier()

In [None]:
%%time
y_pred = clf.predict(X_test)
from sklearn.metrics import classification_report
target_names = encoder.inverse_transform(np.arange(df['Label'].value_counts().shape[0]))
report = classification_report(y_test, y_pred,target_names=target_names)
print(report)

                        precision    recall  f1-score   support

                Benign       0.97      0.95      0.96      2000
                   Bot       1.00      1.00      1.00      2000
      Brute Force -Web       0.77      0.84      0.80       122
      Brute Force -XSS       0.74      0.70      0.72        46
      DDOS attack-HOIC       1.00      1.00      1.00      2000
  DDOS attack-LOIC-UDP       0.99      1.00      1.00       346
DDoS attacks-LOIC-HTTP       0.97      0.96      0.96      2000
 DoS attacks-GoldenEye       0.96      0.97      0.96      2000
      DoS attacks-Hulk       0.98      0.99      0.98      2000
 DoS attacks-Slowloris       0.99      0.99      0.99      2000
        FTP-BruteForce       1.00      1.00      1.00      2000
         SQL Injection       0.78      0.39      0.52        18
        SSH-Bruteforce       1.00      1.00      1.00      2000

              accuracy                           0.98     18532
             macro avg       0.93     

# SVM

In [None]:
%%time
from sklearn.svm import SVC
clf = SVC()
clf.fit(X_train, y_train)


CPU times: user 11min 38s, sys: 1.8 s, total: 11min 40s
Wall time: 11min 39s


SVC()

In [None]:
%%time
y_pred = clf.predict(X_test)
from sklearn.metrics import classification_report
target_names = encoder.inverse_transform(np.arange(df['Label'].value_counts().shape[0]))
report = classification_report(y_test, y_pred,target_names=target_names)
print(report)

                        precision    recall  f1-score   support

                Benign       0.94      0.27      0.42      2000
                   Bot       0.00      0.00      0.00      2000
      Brute Force -Web       0.63      0.37      0.47       122
      Brute Force -XSS       0.00      0.00      0.00        46
      DDOS attack-HOIC       0.22      1.00      0.36      2000
  DDOS attack-LOIC-UDP       0.99      0.98      0.99       346
DDoS attacks-LOIC-HTTP       0.76      0.93      0.84      2000
 DoS attacks-GoldenEye       0.85      0.63      0.72      2000
      DoS attacks-Hulk       0.80      0.00      0.00      2000
 DoS attacks-Slowloris       0.95      0.93      0.94      2000
        FTP-BruteForce       0.99      0.78      0.87      2000
         SQL Injection       0.00      0.00      0.00        18
        SSH-Bruteforce       0.86      0.36      0.51      2000

              accuracy                           0.55     18532
             macro avg       0.62     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# MLP

In [None]:
%%time
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier()
clf.fit(X_train, y_train)


CPU times: user 1min 1s, sys: 44.9 s, total: 1min 46s
Wall time: 57 s


MLPClassifier()

In [None]:
%%time
y_pred = clf.predict(X_test)
from sklearn.metrics import classification_report
target_names = encoder.inverse_transform(np.arange(df['Label'].value_counts().shape[0]))
report = classification_report(y_test, y_pred,target_names=target_names)
print(report)

                        precision    recall  f1-score   support

                Benign       0.56      0.91      0.70      2000
                   Bot       0.99      1.00      1.00      2000
      Brute Force -Web       0.70      0.57      0.62       122
      Brute Force -XSS       0.27      0.54      0.36        46
      DDOS attack-HOIC       0.99      1.00      1.00      2000
  DDOS attack-LOIC-UDP       1.00      1.00      1.00       346
DDoS attacks-LOIC-HTTP       0.98      0.49      0.65      2000
 DoS attacks-GoldenEye       0.96      0.83      0.89      2000
      DoS attacks-Hulk       0.98      0.90      0.94      2000
 DoS attacks-Slowloris       0.91      0.98      0.94      2000
        FTP-BruteForce       1.00      0.99      1.00      2000
         SQL Injection       0.83      0.28      0.42        18
        SSH-Bruteforce       1.00      1.00      1.00      2000

              accuracy                           0.90     18532
             macro avg       0.86     

# DT

In [None]:
%%time
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)


CPU times: user 1.2 s, sys: 11.5 ms, total: 1.21 s
Wall time: 1.22 s


DecisionTreeClassifier()

In [None]:
%%time
y_pred = clf.predict(X_test)
from sklearn.metrics import classification_report
target_names = encoder.inverse_transform(np.arange(df['Label'].value_counts().shape[0]))
report = classification_report(y_test, y_pred,target_names=target_names)
print(report)

                        precision    recall  f1-score   support

                Benign       1.00      0.99      1.00      2000
                   Bot       1.00      1.00      1.00      2000
      Brute Force -Web       0.93      0.94      0.93       122
      Brute Force -XSS       0.90      0.98      0.94        46
      DDOS attack-HOIC       1.00      1.00      1.00      2000
  DDOS attack-LOIC-UDP       1.00      1.00      1.00       346
DDoS attacks-LOIC-HTTP       1.00      1.00      1.00      2000
 DoS attacks-GoldenEye       1.00      1.00      1.00      2000
      DoS attacks-Hulk       1.00      1.00      1.00      2000
 DoS attacks-Slowloris       1.00      1.00      1.00      2000
        FTP-BruteForce       1.00      1.00      1.00      2000
         SQL Injection       0.75      0.67      0.71        18
        SSH-Bruteforce       1.00      1.00      1.00      2000

              accuracy                           1.00     18532
             macro avg       0.97     