In [1]:
import numpy as np
import pandas as pd
from numpy.linalg import inv, pinv
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.utils import resample
from sklearn.base import BaseEstimator, ClassifierMixin
from typing import Optional
from matplotlib import pyplot as plt

In [8]:
week1 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week1/week1.csv", sep=";", header=None)
week2 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week2/week2.csv", sep=";", header=None)
week3 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week3/week3.csv", sep=";", header=None)
week4 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week4/week4.csv", sep=";", header=None)
week5 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week5/week5.csv", sep=";", header=None)
week6 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week6/week6.csv", sep=";", header=None)
week7 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week7/week7.csv", sep=";", header=None)
week8_true = pd.read_csv("/home/vxofi/Datasets/Intrusions/week8_true/week8_true.csv", sep=";", header=None)
week9_true = pd.read_csv("/home/vxofi/Datasets/Intrusions/week9_true/week9_true.csv", sep=";", header=None)

In [9]:
week1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1,06.02.1998,00:00:07,00:00:01,http,2127,80,172.016.114.207,152.163.214.011,0,-
1,2,06.02.1998,00:00:07,00:00:01,http,2139,80,172.016.114.207,152.163.212.172,0,-
2,3,06.02.1998,00:00:07,00:00:01,http,2128,80,172.016.114.207,152.163.214.011,0,-
3,4,06.02.1998,00:00:07,00:00:01,http,2129,80,172.016.114.207,152.163.214.011,0,-
4,5,06.02.1998,00:00:07,00:00:01,http,2130,80,172.016.114.207,152.163.214.011,0,-


In [2]:
train = pd.concat([week1, week2, week3, week4, week5, week6, week7], ignore_index=True)
test = pd.concat([week8_true, week9_true], ignore_index=True)

train.drop(columns=train.columns[0], axis=1,  inplace=True) #train is 1-7 weeks
test.drop(columns=test.columns[0], axis=1,  inplace=True) #test is 8 and 9 weeks as suggested by authors

NameError: name 'week1' is not defined

In [19]:
labels = ["Start Date", "Start Time", "Duration", "Serv", "Src Port",
           "Dest Port", "Src IP", "Dest IP", "Attack Score", "Name"] #from readme
train.columns = labels
test.columns = labels

In [20]:
test.head()

Unnamed: 0,Start Date,Start Time,Duration,Serv,Src Port,Dest Port,Src IP,Dest IP,Attack Score,Name
0,07/20/1998,08:00:01,00:00:01,domain/u,1114,53,192.168.001.010,172.016.112.020,0,-
1,07/20/1998,08:00:01,00:00:01,domain/u,1059,53,192.168.001.010,172.016.112.020,0,-
2,07/20/1998,08:00:31,00:00:01,snmp/u,1195,161,194.027.251.021,192.168.001.001,0,-
3,07/20/1998,08:00:31,00:00:01,urp/i,-,-,192.168.001.001,194.027.251.021,0,-
4,07/20/1998,08:00:36,00:00:01,snmp/u,1197,161,194.027.251.021,192.168.001.001,0,-


New Dataset

In [2]:
train = pd.read_csv("/home/vxofi/Datasets/KDD-CUP/kdd_train.csv", sep=",")
test = pd.read_csv("/home/vxofi/Datasets/KDD-CUP/kdd_test.csv", sep=",")

In [3]:
data = pd.concat([train, test], ignore_index=True)
X = pd.get_dummies(data.iloc[:, :-1], columns=['protocol_type', 'service', 'flag'], dtype=int)
y = pd.get_dummies(data['labels'], dtype=int)

# paper says, they use 2000/4000/8000 random connections from dataset

# Extract the sampled records from X and Y datasets
sampled_X = X.sample(n=8000, random_state=42)
sampled_y = y.loc[sampled_X.index]

sampled_y_bin = (sampled_y['normal'] == 1).astype(int) # 1 is attack, 0 is normal
# train test split
#scaling

In [4]:
scaler = StandardScaler()

# then they split train and test equally
X_train = sampled_X[:4000]
onhs_tr = X_train.loc[:, "protocol_type_icmp":]
X_train = scaler.fit_transform(X_train.loc[:, :"dst_host_srv_rerror_rate"])
X_train = np.concatenate([X_train, onhs_tr.to_numpy()], axis=1)

X_test = sampled_X[4000:]
onhs_te = X_test.loc[:, "protocol_type_icmp":]
X_test = scaler.fit_transform(X_test.loc[:, :"dst_host_srv_rerror_rate"])
X_test = np.concatenate([X_test, onhs_te.to_numpy()], axis=1)

y_train = sampled_y[:4000]
y_train_bin = sampled_y_bin[:4000]

y_test = sampled_y[4000:]
y_test_bin = sampled_y_bin[4000:]

In [57]:
class ELM(BaseEstimator, ClassifierMixin):
    def __init__(self, num_input_nodes : int=None, num_hidden_units : int=None, num_out_units : int=None,
                 param_C : float=None,
                 beta_init : np.ndarray = None,
                 w_init : np.ndarray = None,
                 bias_init : np.ndarray = None):
        self._num_input_nodes = num_input_nodes
        self._num_hidden_units = num_hidden_units
        self._num_out_units = num_out_units
        self.param_C = param_C

        if isinstance(beta_init, np.ndarray):
            self._beta = beta_init
        else:
            self._beta = np.random.uniform(-1., 1., size=(self._num_hidden_units, self._num_out_units))

        if isinstance(w_init, np.ndarray):
            self._w = w_init
        else:
            self._w = np.random.uniform(-1, 1, size=(self._num_input_nodes, self._num_hidden_units))

        if isinstance(bias_init, np.ndarray):
            self._bias = bias_init
        else:
            self._bias = np.zeros(shape=(self._num_hidden_units,))


    def _sigmoid(self, x : float) ->  float:
        return 1. / (1. + np.exp(-x))


    def fit(self, X : np.ndarray, Y : np.ndarray) -> None:
        m, n = X.shape
        
        H = self._sigmoid(X.dot(self._w) + self._bias)

        I = np.eye(m)

        self._beta = inv(I / self.param_C + H.T @ H) @ H.T @ Y


    def predict(self, X : np.ndarray) -> np.ndarray:
        H = self._activation(X.dot(self._w) + self._bias)

        return H.dot(self._beta)

In [6]:
Cs = [2**i for i in range(-24, 25 + 1)]
gs = [2**i for i in range(-24, 25 + 1)]

param_grid_SVC = [
  {'C': Cs, 'gamma': gs, 'kernel': ['rbf']},
 ]
param_grid_ELM_bin = [
  {'num_input_nodes': [41], 'num_out_units': [2], 'param_C': Cs, 'gamma': gs},
 ]
param_grid_ELM = [
  {'num_input_nodes': [41], 'num_out_units': [23], 'param_C': Cs, 'gamma': gs},
 ]

In [59]:
class KELM(BaseEstimator, ClassifierMixin):
    def __init__(self, num_input_nodes : int=None, num_out_units : int=None,
                 param_C : float=None, gamma : float=None):
        self.num_input_nodes = num_input_nodes
        self.num_out_units = num_out_units
        self.param_C = param_C
        self.gamma = gamma
        self.Wi = None
        self.Zt = None
        self.W = None


    def _sigmoid(self, x : float) ->  float:
        return 1. / (1. + np.exp(-x))
    
    def _g_rbf(self, u : float, v : float) -> float:
        return np.exp(-self.gamma * np.linalg.norm(u - v)**2)
    
    '''
    def _cal_K(self, X : np.ndarray, Y : np.ndarray) -> np.ndarray:
        n_samples, n_features = X.shape
        K = np.zeros((n_samples, n_samples)) # therefore shape 4000,4000
        for i in range(n_samples):
            for j in range(n_samples):
                K[i, j] = self._g_rbf(X[i], Y[j])
        return K
    '''
    def _cal_K(self, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
        gamma = self.gamma
        dist_matrix = np.sum(X**2, axis=1)[:, np.newaxis] - 2 * np.dot(X, Y.T) + np.sum(Y**2, axis=1)
        K = np.exp(-gamma * dist_matrix)
        return K

    

    def fit(self, X : np.ndarray, Y : np.ndarray) -> None:
        n_samples, n_features = X.shape
        self.Wi = np.random.uniform(-1, 1, size=(n_features, 200)) # add h=200?
        self.Zt = self._sigmoid((X @ self.Wi))

        self.K = self._cal_K(self.Zt, self.Zt)
        #self.K += np.eye(m) / self.param_C # I / C
        self.W = Y @ np.linalg.inv(self.K.T @ self.K + np.multiply(np.eye(*self.K.shape), 1 / self.param_C)) @ self.K.T

    def predict(self, X : np.ndarray) -> np.ndarray:
        Z = self._sigmoid(X @ self.Wi)
        K = self._cal_K(self.Zt, Z)  # Kernel between new data and training data
        y_pred = self.W @ K
        return y_pred


In [28]:
#SVM for binary classification
grid_SVC_bin = GridSearchCV(SVC(), param_grid_SVC, cv=10, verbose=1, n_jobs=-1) # scoring?, cv=10 as in other* paper
grid_SVC_bin.fit(X_train, y_train_bin)
C, g, ker = grid_SVC_bin.best_params_.values()
SVM_classifier = SVC(kernel=ker, C=C, gamma=g)
print("best params: ", grid_SVC_bin.best_params_.values())

Fitting 10 folds for each of 2500 candidates, totalling 25000 fits


[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.9s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   5.1s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.9s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.9s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.6s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.9s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.3s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   5.3s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-08, kernel=rbf; total time=   4.3s
[CV] END C=5.960464477539063e-08, gamma=1.1920928955078125e-07, kernel=rbf; total time=   3.8s
[CV] END C=5.960464477539063e-08, gamma=5.960464477539063e-

KeyboardInterrupt: 

In [None]:
#SVM for multi label classification
grid_SVC = GridSearchCV(OneVsOneClassifier(SVC()), param_grid_SVC, cv=10, verbose=5, n_jobs=-1)
grid_SVC.fit(X_train, y_train)
C, g, ker = grid_SVC.best_params_.values()
SVM_multic = OneVsOneClassifier(SVC(kernel=ker, C=C, gamma=g))
print("best params: ", grid_SVC.best_params_.values())

In [60]:
test = KELM(122, 2, 2, 2)
test.fit(X_train, y_train_bin)

In [61]:
preds = test.predict(X_test)

In [62]:
preds_int = np.array([int(i >= 0.5) for i in preds])

In [63]:
accuracy_score(preds_int, y_test_bin)

0.8775

In [58]:
test2 = ELM(122, 200, 2, 2)
test2.fit(X_train, y_train_bin)

ValueError: operands could not be broadcast together with shapes (4000,4000) (200,200) 

In [19]:
#KELM for binary classification
grid_ELM_bin = GridSearchCV(KELM(), param_grid_ELM_bin,
                             cv=10, verbose=2, n_jobs=-1, scoring=accuracy_score)
grid_ELM_bin.fit(X_train, y_train_bin)
ni, no, C, g = grid_ELM_bin.best_params_.values()
ELM_kernel_bic = KELM(ni, no, param_C=C, gamma=g)
print("best params: ", grid_ELM_bin.best_params_.values())

Fitting 10 folds for each of 2500 candidates, totalling 25000 fits


: 

In [None]:
#KELM for multi class classification
grid_ELM = GridSearchCV(KELM(), param_grid_ELM, cv=10, verbose=5, n_jobs=-1)
grid_ELM.fit(X_train, y_train)
ni, no, C, g = grid_ELM.best_params_.values()
ELM_kernel_multic = KELM(ni, no, param_C=C, gamma=g)
print("best params: ", grid_ELM.best_params_.values())

In [2]:
ELM_basic_bic = ELM(41, 2, 400, param_C=2)
ELM_basic_multic = ELM(41, 23, 400, param_C=2) #paper uses C=2 for basic ELM

#ELM_kernel_bic = KELM(41, 2, param_C=C, gamma=g)
#ELM_kernel_multic = KELM(41, 23, param_C=C, gamma=g)

# SVM_classifier = SVC(kernel='rbf', C=C, gamma=g)
#SVM_multic = OneVsOneClassifier(SVC(kernel='rbf', C=C, gamma=g))

NameError: name 'ELM' is not defined