In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from numpy.linalg import inv, pinv
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.svm import SVC
from typing import Optional

In [8]:
week1 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week1/week1.csv", sep=";", header=None)
week2 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week2/week2.csv", sep=";", header=None)
week3 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week3/week3.csv", sep=";", header=None)
week4 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week4/week4.csv", sep=";", header=None)
week5 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week5/week5.csv", sep=";", header=None)
week6 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week6/week6.csv", sep=";", header=None)
week7 = pd.read_csv("/home/vxofi/Datasets/Intrusions/week7/week7.csv", sep=";", header=None)
week8_true = pd.read_csv("/home/vxofi/Datasets/Intrusions/week8_true/week8_true.csv", sep=";", header=None)
week9_true = pd.read_csv("/home/vxofi/Datasets/Intrusions/week9_true/week9_true.csv", sep=";", header=None)

In [9]:
week1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1,06.02.1998,00:00:07,00:00:01,http,2127,80,172.016.114.207,152.163.214.011,0,-
1,2,06.02.1998,00:00:07,00:00:01,http,2139,80,172.016.114.207,152.163.212.172,0,-
2,3,06.02.1998,00:00:07,00:00:01,http,2128,80,172.016.114.207,152.163.214.011,0,-
3,4,06.02.1998,00:00:07,00:00:01,http,2129,80,172.016.114.207,152.163.214.011,0,-
4,5,06.02.1998,00:00:07,00:00:01,http,2130,80,172.016.114.207,152.163.214.011,0,-


In [15]:
train = pd.concat([week1, week2, week3, week4, week5, week6, week7], ignore_index=True)
test = pd.concat([week8_true, week9_true], ignore_index=True)

train.drop(columns=train.columns[0], axis=1,  inplace=True) #train is 1-7 weeks
test.drop(columns=test.columns[0], axis=1,  inplace=True) #test is 8 and 9 weeks as suggested by authors

In [19]:
labels = ["Start Date", "Start Time", "Duration", "Serv", "Src Port",
           "Dest Port", "Src IP", "Dest IP", "Attack Score", "Name"] #from readme
train.columns = labels
test.columns = labels

In [20]:
test.head()

Unnamed: 0,Start Date,Start Time,Duration,Serv,Src Port,Dest Port,Src IP,Dest IP,Attack Score,Name
0,07/20/1998,08:00:01,00:00:01,domain/u,1114,53,192.168.001.010,172.016.112.020,0,-
1,07/20/1998,08:00:01,00:00:01,domain/u,1059,53,192.168.001.010,172.016.112.020,0,-
2,07/20/1998,08:00:31,00:00:01,snmp/u,1195,161,194.027.251.021,192.168.001.001,0,-
3,07/20/1998,08:00:31,00:00:01,urp/i,-,-,192.168.001.001,194.027.251.021,0,-
4,07/20/1998,08:00:36,00:00:01,snmp/u,1197,161,194.027.251.021,192.168.001.001,0,-


In [None]:
class ELM:
    def __init__(self, num_input_nodes : int, num_hidden_units : int, num_out_units : int,
                 param_C : float = 1000.,
                 beta_init : np.ndarray = None,
                 w_init : np.ndarray = None,
                 bias_init : np.ndarray = None):
        self._num_input_nodes = num_input_nodes
        self._num_hidden_units = num_hidden_units
        self._num_out_units = num_out_units
        self.param_C = param_C

        if isinstance(beta_init, np.ndarray):
            self._beta = beta_init
        else:
            self._beta = np.random.uniform(-1., 1., size=(self._num_hidden_units, self._num_out_units))

        if isinstance(w_init, np.ndarray):
            self._w = w_init
        else:
            self._w = np.random.uniform(-1, 1, size=(self._num_input_nodes, self._num_hidden_units))

        if isinstance(bias_init, np.ndarray):
            self._bias = bias_init
        else:
            self._bias = np.zeros(shape=(self._num_hidden_units,))


    def _sigmoid(x : float) ->  float:
        return 1. / (1. + np.exp(-x))


    def fit(self, X : np.ndarray, Y : np.ndarray) -> None:
        m, n = X.shape
        
        H = self._sigmoid(X.dot(self._w) + self._bias)

        I = np.eye(m)

        self._beta = inv(I / self.param_C + H.T @ H) @ H.T @ Y


    def predict(self, X : np.ndarray) -> np.ndarray:
        H = self._activation(X.dot(self._w) + self._bias)

        return H.dot(self._beta)

In [None]:
class KELM:
    def __init__(self, num_input_nodes : int, num_out_units : int,
                 param_C : float = 1000., gamma : float = 1000,
                 beta_init : np.ndarray = None,
                 w_init : np.ndarray = None,
                 bias_init : np.ndarray = None):
        self._num_input_nodes = num_input_nodes
        self._num_out_units = num_out_units
        self.param_C = param_C
        self.param_gamma = gamma

        if isinstance(beta_init, np.ndarray):
            self._beta = beta_init
        else:
            self._beta = np.random.uniform(-1., 1., size=(self._num_hidden_units, self._num_out_units))

        if isinstance(w_init, np.ndarray):
            self._w = w_init
        else:
            self._w = np.random.uniform(-1, 1, size=(self._num_input_nodes, self._num_hidden_units))

        if isinstance(bias_init, np.ndarray):
            self._bias = bias_init
        else:
            self._bias = np.zeros(shape=(self._num_hidden_units,))


    def _sigmoid(x : float) ->  float:
        return 1. / (1. + np.exp(-x))
        
    def _g_rbf(self, u : float, v : float) -> float:
        return np.exp(-self.param_gamma * np.linalg.norm(u - v)**2)
    
    def _cal_K(self, X : np.ndarray) -> np.ndarray:
        m, n = X.shape
        K = np.zeros((m, m))
        
        for i in range(m):
            for j in range(i, m):
                kernel_value = self.g_rbf(X[i], X[j])
                K[i, j] = kernel_value
                K[j, i] = kernel_value

        return K
    

    def fit(self, X : np.ndarray, Y : np.ndarray) -> None:
        m, n = X.shape

        self.K = self._cal_K(X)
        self.K += np.eye(m) / self.param_C # I / C

        self.beta = np.linalg.inv(self.K) @ Y


    def predict(self, X : np.ndarray) -> np.ndarray:
        K_new = self._g_rbf(X, self.K.T)  # Kernel between new data and training data
        y_pred = K_new @ self.beta
        return y_pred

In [None]:
ELM_basic = ELM(41, 2, 400, param_C=2)
ELM_kernel = KELM(41, 2) #find C and gamma as in SVM block