# 1. Loading libraries


In [None]:
!pip uninstall scikit-learn
!pip install scikit-learn==1.2.2

In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn.cluster import DBSCAN
from tabulate import tabulate
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
import seaborn as sns
import numpy as np
# importing required libraries for normalizing data
from sklearn.preprocessing import StandardScaler,LabelBinarizer,MinMaxScaler
from sklearn.neighbors import NearestNeighbors
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from keras.layers import Dense, MaxPool1D, Flatten, Dropout,Conv1D , BatchNormalization,Input
from keras.models import Sequential,Model # importing dense layer
# representation of model layers
from keras.utils import plot_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import xgboost
import os
import csv
import time

In [2]:
def change_label(df):
  df.label.replace(['DDoS-ICMP_Flood','DDoS-UDP_Flood','DDoS-TCP_Flood','DDoS-PSHACK_Flood','DDoS-SYN_Flood','DDoS-RSTFINFlood','DDoS-SynonymousIP_Flood','DDoS-ICMP_Fragmentation','DDoS-UDP_Fragmentation','DDoS-ACK_Fragmentation','DDoS-HTTP_Flood','DDoS-SlowLoris'],'DDos',inplace=True)
  df.label.replace(['DoS-UDP_Flood','DoS-TCP_Flood','DoS-SYN_Flood','DoS-HTTP_Flood'],'DoS',inplace=True)      
  df.label.replace(['Recon-HostDiscovery','Recon-OSScan','Recon-PortScan','Recon-PingSweep','VulnerabilityScan'],'Recon',inplace=True)
  df.label.replace(['MITM-ArpSpoofing','DNS_Spoofing'],'Spoofing',inplace=True)
  df.label.replace(['DictionaryBruteForce'],'BruteForce',inplace=True)
  df.label.replace(['BrowserHijacking','XSS','Uploading_Attack','SqlInjection','CommandInjection','Backdoor_Malware'],'Web-based',inplace=True)
  df.label.replace(['Mirai-greeth_flood','Mirai-udpplain','Mirai-greip_flood'],'Mirai',inplace=True)
  df.label.replace(['BenignTraffic'],'BENIGN',inplace=True)
    
def scaleStandardData(dataFrame, numeric_cols):
  scaler = preprocessing.StandardScaler()
  for col in numeric_cols:
    arr = dataFrame[col]
    arr = np.array(arr)
    dataFrame[col] = scaler.fit_transform(arr.reshape(len(arr),1))
  return dataFrame

def scaleMinMaxData(dataFrame, numeric_cols):
  scaler = preprocessing.MinMaxScaler()
  for col in numeric_cols:
    arr = dataFrame[col]
    arr = np.array(arr)
    dataFrame[col] = scaler.fit_transform(arr.reshape(len(arr),1))
  return dataFrame

def scaleData(dataFrame, numeric_cols):
  dataFrame = scaleStandardData(dataFrame, numeric_cols)
  dataFrame = scaleMinMaxData(dataFrame, numeric_cols)
  return dataFrame

# 2. Loading data

In [3]:
output_file = 'combined_file_test.csv'
df = pd.read_csv(output_file)
change_label(df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df.label.replace(['DDoS-ICMP_Flood','DDoS-UDP_Flood','DDoS-TCP_Flood','DDoS-PSHACK_Flood','DDoS-SYN_Flood','DDoS-RSTFINFlood','DDoS-SynonymousIP_Flood','DDoS-ICMP_Fragmentation','DDoS-UDP_Fragmentation','DDoS-ACK_Fragmentation','DDoS-HTTP_Flood','DDoS-SlowLoris'],'DDos',inplace=True)


In [4]:
# Dividing the dataset into 2 parts for training and testing
from sklearn.model_selection import train_test_split

# Assuming 'df' is your pandas dataframe
# Separate features (X) and target variable (y)
features = df.drop('label', axis=1)  # Replace 'target_column_name' with your actual target column name
target = df['label']

# Split data into training and testing sets with 70% for training
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)

In [5]:
features

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,AVG,Std,Tot size,IAT,Number,Magnitue,Radius,Covariance,Variance,Weight
0,0.000000,54.00,6.00,64.00,0.329807,0.329807,0.0,1.0,0.0,1.0,...,54.000000,0.000000,54.00,8.334383e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55
1,0.000000,57.04,6.33,64.00,4.290556,4.290556,0.0,0.0,0.0,0.0,...,54.796404,2.822973,57.04,8.292607e+07,9.5,10.464666,4.010353,160.987842,0.05,141.55
2,0.000000,0.00,1.00,64.00,33.396799,33.396799,0.0,0.0,0.0,0.0,...,42.000000,0.000000,42.00,8.312799e+07,9.5,9.165151,0.000000,0.000000,0.00,141.55
3,0.328175,76175.00,17.00,64.00,4642.133010,4642.133010,0.0,0.0,0.0,0.0,...,50.000000,0.000000,50.00,8.301570e+07,9.5,10.000000,0.000000,0.000000,0.00,141.55
4,0.117320,101.73,6.11,65.91,6.202211,6.202211,0.0,0.0,1.0,0.0,...,67.959230,23.113111,57.88,8.297300e+07,9.5,11.346876,32.716243,3016.808286,0.19,141.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
732745,0.000000,54.00,6.00,64.00,9.917136,9.917136,0.0,0.0,1.0,0.0,...,54.000000,0.000000,54.00,8.336134e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55
732746,0.000000,54.00,6.00,64.00,132.393870,132.393870,0.0,0.0,0.0,0.0,...,54.000000,0.000000,54.00,8.294316e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55
732747,0.000000,54.00,6.00,64.00,0.589620,0.589620,0.0,0.0,1.0,0.0,...,54.000000,0.000000,54.00,8.308924e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55
732748,0.067048,25725.00,17.00,64.00,9488.812264,9488.812264,0.0,0.0,0.0,0.0,...,50.000000,0.000000,50.00,8.348741e+07,9.5,10.000000,0.000000,0.000000,0.00,141.55


In [6]:
numeric_features = X_train.select_dtypes(include='number').columns
# Scaling data
X_train = scaleData(X_train, numeric_features)
X_test = scaleData(X_test, numeric_features)
# Setting the label to number to get through the feature selection. They only allow numeric data
y_train = y_train.replace({'BENIGN': 0, 'DDos': 1,'DoS':2,'Mirai':3,'Spoofing':4,'Recon':5,'Web-based':6,'BruteForce':7})
y_test = y_test.replace({'BENIGN': 0, 'DDos': 1,'DoS':2,'Mirai':3,'Spoofing':4,'Recon':5,'Web-based':6,'BruteForce':7})

  y_train = y_train.replace({'BENIGN': 0, 'DDos': 1,'DoS':2,'Mirai':3,'Spoofing':4,'Recon':5,'Web-based':6,'BruteForce':7})
  y_test = y_test.replace({'BENIGN': 0, 'DDos': 1,'DoS':2,'Mirai':3,'Spoofing':4,'Recon':5,'Web-based':6,'BruteForce':7})


In [7]:
# Dividing data due to feature elimination with each methods
# Random Forest Regression
rf_features_5 = ['IAT', 'rst_count', 'flow_duration', 'Srate', 'Rate']
rf_features_15 = ['IAT', 'rst_count', 'flow_duration', 'Srate', 'Rate', 'Weight', 'Header_Length', 'urg_count', 'Number', 'Duration', 'Protocol Type', 'syn_count', 'Min', 'Tot size', 'fin_count']
rf_features_30 = ['IAT', 'rst_count', 'flow_duration', 'Srate', 'Rate', 'Weight', 'Header_Length', 'urg_count', 'Number', 'Duration', 'Protocol Type', 'syn_count', 'Min', 'Tot size', 'fin_count', 'Tot sum', 'HTTPS', 'Max', 'ack_count', 'Covariance', 'Magnitue', 'AVG', 'Std', 'Radius', 'HTTP', 'Variance', 'SSH', 'syn_flag_number', 'TCP', 'ack_flag_number']

# Recursive Feature Elimination
rfe_features_5 = ['flow_duration', 'Header_Length', 'Srate', 'rst_count', 'IAT']
rfe_features_15 = ['flow_duration', 'Header_Length', 'Protocol Type', 'Duration', 'Srate', 'syn_count', 'fin_count', 'urg_count', 'rst_count', 'HTTPS', 'Min', 'Tot size', 'IAT', 'Covariance', 'Weight']
rfe_features_30 = ['flow_duration', 'Header_Length', 'Protocol Type', 'Duration', 'Rate', 'Srate', 'syn_flag_number', 'psh_flag_number', 'ack_count', 'syn_count', 'fin_count', 'urg_count', 'rst_count', 'HTTP', 'HTTPS', 'SSH', 'UDP', 'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue', 'Radius', 'Covariance', 'Variance', 'Weight']

# Logistic Regression
lg_features_5 = ['Header_Length', 'Magnitue', 'Variance', 'Duration', 'Min']
lg_features_15 = ['Header_Length', 'Magnitue', 'Variance', 'Duration', 'Min', 'Protocol Type', 'Tot sum', 'rst_count', 'TCP', 'AVG', 'syn_flag_number', 'fin_flag_number', 'ICMP', 'ack_flag_number', 'psh_flag_number']
lg_features_30 = ['Header_Length', 'Magnitue', 'Variance', 'Duration', 'Min', 'Protocol Type', 'Tot sum', 'rst_count', 'TCP', 'AVG', 'syn_flag_number', 'fin_flag_number', 'ICMP', 'ack_flag_number', 'psh_flag_number', 'IPv', 'LLC', 'UDP', 'urg_count', 'HTTP', 'syn_count', 'rst_flag_number', 'Tot size', 'Radius', 'Std', 'HTTPS', 'Max', 'IAT', 'ack_count', 'Weight']

# XGBoost Regression
xg_features_5 = ['IAT', 'rst_count', 'Number', 'HTTPS', 'SSH']
xg_features_15 = ['IAT', 'rst_count', 'Number', 'HTTPS', 'SSH', 'flow_duration', 'Weight', 'fin_count', 'Rate', 'HTTP', 'Tot sum', 'Header_Length', 'syn_count', 'Magnitue', 'UDP']
xg_features_30 = ['IAT', 'rst_count', 'Number', 'HTTPS', 'SSH', 'flow_duration', 'Weight', 'fin_count', 'Rate', 'HTTP', 'Tot sum', 'Header_Length', 'syn_count', 'Magnitue', 'UDP', 'Min', 'urg_count', 'TCP', 'Protocol Type', 'Duration', 'ack_count', 'DNS', 'Tot size', 'Variance', 'AVG', 'Max', 'ack_flag_number', 'Covariance', 'Std', 'Radius']

# Information Gain
ig_features_5 = ['IAT', 'Tot size', 'Max', 'Magnitue', 'AVG']
ig_features_15 = ['IAT', 'Tot size', 'Max', 'Magnitue', 'AVG', 'Tot sum', 'Min', 'Header_Length', 'Protocol Type', 'Number', 'Weight', 'Duration', 'flow_duration', 'Std', 'Radius']
ig_features_30 = ['IAT', 'Tot size', 'Max', 'Magnitue', 'AVG', 'Tot sum', 'Min', 'Header_Length', 'Protocol Type', 'Number', 'Weight', 'Duration', 'flow_duration', 'Std', 'Radius', 'Covariance', 'rst_count', 'Variance', 'urg_count', 'Rate', 'Srate', 'TCP', 'syn_count', 'ack_flag_number', 'IPv', 'LLC', 'ICMP', 'HTTPS', 'ack_count', 'fin_count']

In [8]:
# Getting new dataset after deleting features
# Random Forest
X_train_rf_5 = X_train.loc[:, rf_features_5]
X_train_rf_15 = X_train.loc[:, rf_features_15]
X_train_rf_30 = X_train.loc[:, rf_features_30]


# RFE
X_train_rfe_5 = X_train.loc[:, rfe_features_5]
X_train_rfe_15 = X_train.loc[:, rfe_features_15]
X_train_rfe_30 = X_train.loc[:, rfe_features_30]


# Logistic Regression
X_train_lg_5 = X_train.loc[:, lg_features_5]
X_train_lg_15 = X_train.loc[:, lg_features_15]
X_train_lg_30 = X_train.loc[:, lg_features_30]


# XGBoost
X_train_xg_5 = X_train.loc[:, xg_features_5]
X_train_xg_15 = X_train.loc[:, xg_features_15]
X_train_xg_30 = X_train.loc[:, xg_features_30]


# Information Gain
X_train_ig_5 = X_train.loc[:, ig_features_5]
X_train_ig_15 = X_train.loc[:, ig_features_15]
X_train_ig_30 = X_train.loc[:, ig_features_30]

In [9]:
# Getting new dataset after deleting features
# Random Forest
X_test_rf_5 = X_test.loc[:, rf_features_5]
X_test_rf_15 = X_test.loc[:, rf_features_15]
X_test_rf_30 = X_test.loc[:, rf_features_30]

# RFE
X_test_rfe_5 = X_test.loc[:, rfe_features_5]
X_test_rfe_5 = X_test.loc[:, rfe_features_15]
X_test_rfe_5 = X_test.loc[:, rfe_features_30]

# Logistic Regression
X_test_lg_5 = X_test.loc[:, lg_features_5]
X_test_lg_15 = X_test.loc[:, lg_features_15]
X_test_lg_30 = X_test.loc[:, lg_features_30]

# XGBoost
X_test_xg_5 = X_test.loc[:, xg_features_5]
X_test_xg_15 = X_test.loc[:, xg_features_15]
X_test_xg_30 = X_test.loc[:, xg_features_30]

# Information Gain
X_test_ig_5 = X_test.loc[:, ig_features_5]
X_test_ig_15 = X_test.loc[:, ig_features_15]
X_test_ig_30 = X_test.loc[:, ig_features_30]

In [34]:
# Compute how much data is left after feature selection, for 5 selected features only
import sys
def compute_percentage_size(df1, df2):
    """
    Compute the percentage that the size of df1 makes up of the size of df2.
    
    Parameters:
    df1 (pandas.DataFrame): The first DataFrame.
    df2 (pandas.DataFrame): The second DataFrame.
    
    Returns:
    float: The percentage that the size of df1 makes up of the size of df2.
    """
    size_1_bytes = df1.memory_usage(deep=True).sum()
    size_2_bytes = df2.memory_usage(deep=True).sum()
    
    size_1_gb = size_1_bytes / (1024 ** 3)
    size_2_gb = size_2_bytes / (1024 ** 3)
    
    if size_2_gb == 0:
        raise ValueError("The size of the second dataset cannot be zero.")
    
    percentage = (size_1_gb / size_2_gb) * 100
    print(percentage)

In [37]:
compute_percentage_size(X_train_ig_5, X_train)
compute_percentage_size(X_train_rfe_15, X_train)
compute_percentage_size(X_train_rf_30, X_train)
compute_percentage_size(X_train_lg_5, X_train)
compute_percentage_size(X_train_xg_5, X_train)

12.76595744680851
34.04255319148936
65.95744680851064
12.76595744680851
12.76595744680851


# 3. Machine Learning algorithms

In [10]:
def DT(X_train, y_train, X_test, y_test):
    
    # Loading library
    from sklearn.tree import DecisionTreeClassifier
    
    # Define the classifier with desired parameters (optional)
    clf = DecisionTreeClassifier(max_depth=None, random_state=42)  # Adjust parameters as needed
    
    # Train the model on the training data
    start_time = time.time()
    clf.fit(X_train, y_train)
    end_time = time.time()

    # Calculate training time
    training_time = end_time - start_time
    
    # Predicting the Test set results
    y_pred = clf.predict(X_test)
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')

    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

In [11]:
def RF(X_train, y_train, X_test, y_test):
    # Loading library
    from sklearn.ensemble import RandomForestClassifier
    
    # Define the classifier with desired parameters (optional)
    clf = RandomForestClassifier(n_estimators = 100, criterion = 'gini', random_state = 42)  # Adjust parameters as needed
    
    # Train the model on the training data
    start_time = time.time()
    clf.fit(X_train, y_train)
    end_time = time.time()
    
    # Calculate training time
    training_time = end_time - start_time
    
    # Predicting the Test set results
    y_pred = clf.predict(X_test)
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')
    
    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

In [12]:
def KNN(X_train, y_train, X_test, y_test):
    # Loading library
    from sklearn.neighbors import KNeighborsClassifier
    
    knn = KNeighborsClassifier(n_neighbors=5)
    # Train the model on the training data
    start_time = time.time()
    knn.fit(X_train, y_train)
    end_time = time.time()

    # Calculate training time
    training_time = end_time - start_time
    y_pred = knn.predict(X_test.values)
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')
    
    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

In [13]:
def GB(X_train, y_train, X_test, y_test):
    # Loading library
    from sklearn.ensemble import GradientBoostingClassifier
    
    # Create the Gradient Boosting Classifier
    clf = GradientBoostingClassifier(n_estimators=100,  # Number of boosting stages
                                learning_rate=0.1,  # Learning rate
                                max_depth=3,        # Maximum depth of individual trees
                                random_state=42)   # Set random seed for reproducibility
    
    # Train the model on the training data
    start_time = time.time()
    clf.fit(X_train, y_train)
    end_time = time.time()
    
    # Calculate training time
    training_time = end_time - start_time
    # Predicting the Test set results
    y_pred = clf.predict(X_test)
    
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')
    
    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

In [14]:
def QDA(X_train, y_train, X_test, y_test):
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    # Create the Gradient Boosting Classifier
    qda = QuadraticDiscriminantAnalysis()
    
    # Train the model on the training data
    start_time = time.time()
    qda.fit(X_train, y_train)
    end_time = time.time()
    
    # Calculate training time
    training_time = end_time - start_time
    # Predicting the Test set results
    y_pred = qda.predict(X_test)
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')
    
    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

In [15]:
def MLP(X_train, y_train, X_test, y_test):
    from sklearn.neural_network import MLPClassifier
    from sklearn.model_selection import train_test_split  # Optional for data splitting

    # Hyperparameter tuning (adjust as needed)
    solver = 'lbfgs'  # Solver for weight optimization (options: 'lbfgs', 'sgd', 'adam')
    hidden_layer_sizes = (100, 50)  # Tuple representing the structure of hidden layers (number of neurons)
    activation = 'relu'  # Activation function for hidden layers ('relu', 'tanh', 'logistic')
    max_iter = 200  # Maximum number of training iterations

    # Create the MLP classifier (adjust hyperparameters)
    mlp = MLPClassifier(solver=solver, hidden_layer_sizes=hidden_layer_sizes, activation=activation, max_iter=max_iter)

    # Train the model on the training data
    start_time = time.time()
    mlp.fit(X_train, y_train)
    end_time = time.time()

    # Calculate training time
    training_time = end_time - start_time
    # Make predictions on the test set
    y_pred = mlp.predict(X_test)
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')
    
    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

In [16]:
def SVM(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC

    # Choose a multi-class strategy (e.g., 'ovr' or 'ovo')
    multi_class = 'ovr'  # One-vs-Rest strategy

    # Create the SVM classifier (adjust hyperparameters as needed)
    svm_clf = SVC(kernel='linear', C=1.0, decision_function_shape='ovo')  # Adjust kernel and other parameters

    # Train the model on the training data
    start_time = time.time()
    svm_clf.fit(X_train, y_train)
    end_time = time.time()

    # Calculate training time
    training_time = end_time - start_time
    # Make predictions on the test set
    y_pred = svm_clf.predict(X_test)
    
    print(pd.crosstab(y_test, y_pred, rownames=['Actual attack'], colnames=['Predicted attack']))
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted average for multi-class
    recall = recall_score(y_test, y_pred, average='weighted')
    
    print("Training Time:", training_time, "seconds")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall: ", recall)
    print("F1-score:", f1)

# 4. Training without selecting features

In [17]:
# Decision Tree without feature selection
DT(X_train, y_train, X_test, y_test)

Predicted attack     0       1      2      3     4    5   6   7
Actual attack                                                  
0                 4329     138     76     81   260  243   6   7
1                    0  159884      6      5     1    9   0   0
2                    0       8  38032      1     2    0   0   0
3                    1       0      0  12560     1    1   0   0
4                  156      95     70    295  1663   65   9   0
5                   99     326     64    101    68  985  10   3
6                    4      21      5      9     7    7  56   0
7                    5       8      8      3     8    1   0  23
Accuracy: 0.9895689753212783
Precision: 0.9887955608440084
Recall:  0.9895689753212783
F1-score: 0.9889569895318405


In [18]:
# Random Forest without feature selection
RF(X_train, y_train, X_test, y_test)

Predicted attack     0       1      2      3     4     5  6   7
Actual attack                                                  
0                 4938       0      0      0   123    79  0   0
1                    0  159872     20      0     1    12  0   0
2                    0      21  38019      1     2     0  0   0
3                    0       9      2  12552     0     0  0   0
4                  266       1      0      0  2000    86  0   0
5                  208      30      0      0   132  1286  0   0
6                   24       0      0      0    54    26  5   0
7                   11       0      0      0    12    19  0  14
Accuracy: 0.9948186057090868
Precision: 0.9948316424434516
Recall:  0.9948186057090868
F1-score: 0.9945198526065532


In [19]:
# KNN without feature selection
KNN(X_train, y_train, X_test, y_test)



Predicted attack     0       1      2      3     4    5   6  7
Actual attack                                                 
0                 4584       1      0      0   347  196  10  2
1                    2  156352   3520     12     1   18   0  0
2                    1   11129  26890      6     1   16   0  0
3                    0      62      8  12488     0    5   0  0
4                  906       4      2      0  1346   95   0  0
5                  607      41     24      2   126  852   4  0
6                   64       0      0      0    19   24   2  0
7                   30       0      0      0    11    6   0  9
Accuracy: 0.9212919367678836
Precision: 0.9192157446493204
Recall:  0.9212919367678836
F1-score: 0.9177271284380387


In [20]:
# GB without feature selection
GB(X_train, y_train, X_test, y_test)

In [None]:
# QDA without feature selection
QDA(X_train, y_train, X_test, y_test)



Predicted attack      0    1   2   3      4      5  7
Actual attack                                        
0                  4995    0   0   0    126     19  0
1                 45120  433  21   3  58998  55330  0
2                 15745   34  13  12  12632   9607  0
3                  1701    0   0   5  10857      0  0
4                  1564    0   0   0    769     20  0
5                   825    0   0   0    340    491  0
6                    81    0   0   0     25      3  0
7                    41    0   0   0      6      0  9
Accuracy: 0.03054702604344365
Precision: 0.7569936088113675
Recall:  0.03054702604344365
F1-score: 0.007569291692961416


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# MLP without feature selection
MLP(X_train, y_train, X_test, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3    4    5
Actual attack                                        
0                 4513      13     1      1  494  118
1                   24  156831  3034      2    9    5
2                   12   30257  7750      3    9   12
3                   15     101    15  12426    6    0
4                 1272      10    28     46  959   38
5                  718     127   103      0  116  592
6                   75       0     0      2   22   10
7                   39       0     0      0   13    4
Accuracy: 0.8328033663141134
Precision: 0.8164168418262238
Recall:  0.8328033663141134
F1-score: 0.7952508636017652


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# SVM without feature selection
SVM(X_train, y_train, X_test, y_test)

# 5. 5 selected features

## 5.1 Decision Tree

In [17]:
# Decision Tree with 5 selected features
X_test_ig_5 = X_test.loc[:, ig_features_5]
DT(X_train_ig_5, y_train, X_test_ig_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 3716      14      1      2   887   467  39  14
1                    1  159830     50      0     1    22   1   0
2                    0       6  38032      0     1     4   0   0
3                    0      53      1  12489    20     0   0   0
4                  459       4      0      0  1689   164  26  11
5                  270      13      1      5   233  1104  19  11
6                   22       1      0      0    19     8  59   0
7                    8       1      0      0    14     6   1  26
Training Time: 0.4009745121002197 seconds
Accuracy: 0.9868986693961106
Precision: 0.9876572442749527
Recall:  0.9868986693961106
F1-score: 0.9871129238422957


In [18]:
# Decision Tree with 5 selected features
X_test_rfe_5 = X_test.loc[:, rfe_features_5]
DT(X_train_rfe_5, y_train, X_test_rfe_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4557       2      0      0   290   264  15  12
1                    0  159865      9     13     4    14   0   0
2                    2       9  38030      1     1     0   0   0
3                    2      25      1  12528     7     0   0   0
4                  271       4      0      0  1923   119  29   7
5                  172      12      0      0   125  1319  18  10
6                   10       1      0      0    15    17  63   3
7                   15       0      0      0     7     5   3  26
Training Time: 0.8076374530792236 seconds
Accuracy: 0.9931127032867053
Precision: 0.9932204218413225
Recall:  0.9931127032867053
F1-score: 0.9931587222434798


In [19]:
# Decision Tree with 5 selected features
X_test_rf_5 = X_test.loc[:, rf_features_5]
DT(X_train_rf_5, y_train, X_test_rf_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4517       1      0      0   305   283  14  20
1                    0  159854     12     22     4    13   0   0
2                    1       7  38029      2     2     1   1   0
3                    2      29      0  12522     7     3   0   0
4                  314       4      1      0  1832   167  28   7
5                  184      23      0      2   143  1268  29   7
6                    7       0      0      0    16    20  64   2
7                   12       0      0      1     7     9   1  26
Training Time: 1.0136735439300537 seconds
Accuracy: 0.9922074377345615
Precision: 0.9923456388236475
Recall:  0.9922074377345615
F1-score: 0.9922646119626849


In [20]:
# Decision Tree with 5 selected features
X_test_lg_5 = X_test.loc[:, lg_features_5]
DT(X_train_lg_5, y_train, X_test_lg_5, y_test)

Predicted attack     0       1      2     3     4    5   6   7
Actual attack                                                 
0                 3514      43     26     3   985  455  76  38
1                   58  122004  37693    43    38   68   0   1
2                   27   16468  21415    31    23   79   0   0
3                    9      61   3930  8540     9   13   1   0
4                  855      32     31     9  1152  227  34  13
5                  471      75    106    22   221  720  26  15
6                   37       3      2     0    32   28   6   1
7                   18       0      4     0    11   16   5   2
Training Time: 1.099799394607544 seconds
Accuracy: 0.7158103036506311
Precision: 0.7798557313202943
Recall:  0.7158103036506311
F1-score: 0.7383650370421545


In [21]:
# Decision Tree with 5 selected features
X_test_xg_5 = X_test.loc[:, xg_features_5]
DT(X_train_xg_5, y_train, X_test_xg_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4288      29     98      0   372   336  13   4
1                    0  159845     10     50     0     0   0   0
2                    1       7  38022     11     1     1   0   0
3                    2      28      3  12530     0     0   0   0
4                  506      56    267      0  1347   175   1   1
5                  199     222    147      0    33  1052   3   0
6                   13       8     13      0     0    17  58   0
7                   10       0      8      0     1    11   0  26
Training Time: 0.23443365097045898 seconds
Accuracy: 0.9879131127032867
Precision: 0.9870858105085002
Recall:  0.9879131127032867
F1-score: 0.9873086740447191


## 5.2 Random Forest

In [38]:
# RF with 5 selected features
RF(X_train_ig_5, y_train, X_test_ig_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4350       0      0      0   717    71   2   0
1                    1  159806     82      0     2    14   0   0
2                    0       2  38037      0     2     2   0   0
3                    0      53      3  12487    20     0   0   0
4                  659       0      1      0  1661    32   0   0
5                  352       6      0      3   169  1126   0   0
6                   30       0      0      0    24     8  47   0
7                   16       0      0      0    12     2   0  26
Training Time: 19.49922776222229 seconds
Accuracy: 0.9896053679062891
Precision: 0.9900651899391
Recall:  0.9896053679062891
F1-score: 0.9896158839275475


In [39]:
# RF with 5 selected features
X_test_rfe_5 = X_test.loc[:, rfe_features_5]
RF(X_train_rfe_5, y_train, X_test_rfe_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4840       1      0      0   154   145   0   0
1                    0  159870      9     19     2     5   0   0
2                    0       7  38034      0     2     0   0   0
3                    0      25      3  12528     7     0   0   0
4                  288       1      0      0  1940   122   0   2
5                  195      13      1      0    96  1345   6   0
6                   12       0      0      0    23    29  45   0
7                   14       0      0      0     6    10   0  26
Training Time: 25.503950357437134 seconds
Accuracy: 0.9945547594677584
Precision: 0.9945322398999464
Recall:  0.9945547594677584
F1-score: 0.9944664634641049


In [40]:
# RF with 5 selected features
X_test_rf_5 = X_test.loc[:, rf_features_5]
RF(X_train_rf_5, y_train, X_test_rf_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4808       1      0      1   176   154   0   0
1                    0  159858     16     14     4    13   0   0
2                    0      10  38028      2     3     0   0   0
3                    1      38      5  12511     5     3   0   0
4                  317       3      1      0  1896   132   3   1
5                  202      28      1      1   134  1286   4   0
6                   12       0      0      0    18    30  48   1
7                    9       0      0      1    11     9   0  26
Training Time: 28.179960012435913 seconds
Accuracy: 0.9937950642556579
Precision: 0.9937477574581537
Recall:  0.9937950642556579
F1-score: 0.9937026321600602


In [41]:
# RF with 5 selected features
X_test_lg_5 = X_test.loc[:, lg_features_5]
RF(X_train_lg_5, y_train, X_test_lg_5, y_test)

Predicted attack     0       1      2      3     4    5  6
Actual attack                                             
0                 4464      13     13      0   427  217  6
1                   56  142020  17745     25    15   44  0
2                   22   21041  16925      8    12   35  0
3                    9      62     21  12452     9   10  0
4                 1038      14     24      3  1137  137  0
5                  618      73     71      9   139  742  4
6                   64       2      0      0    24   19  0
7                   31       1      4      0     5   15  0
Training Time: 39.203415632247925 seconds
Accuracy: 0.808552257477539
Precision: 0.8020515168005034
Recall:  0.808552257477539
F1-score: 0.8046709784176413


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [42]:
# RF with 5 selected features
X_test_xg_5 = X_test.loc[:, xg_features_5]
RF(X_train_xg_5, y_train, X_test_xg_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4338      28     20     14   447   267  21   5
1                    0  159836      2     67     0     0   0   0
2                    1       8  38022     12     0     0   0   0
3                    0      34      4  12525     0     0   0   0
4                  557      42     18    194  1418   116   7   1
5                  257     146     28     47   100  1074   4   0
6                   18       6      3      4     7    15  56   0
7                   16       0      1      1     5     3   0  30
Training Time: 12.527245044708252 seconds
Accuracy: 0.9885090412828387
Precision: 0.9878246992636307
Recall:  0.9885090412828387
F1-score: 0.9880761358319811


## 5.3 K-NN

In [43]:
# KNN with 5 selected features
KNN(X_train_ig_5, y_train, X_test_ig_5, y_test)



Predicted attack     0       1      2   3     4    5  6  7
Actual attack                                             
0                 3764       0      0   0  1235  132  6  3
1                   13  154717   5156  12     2    5  0  0
2                    2     664  37362  12     1    2  0  0
3                   15   12247    210  85     6    0  0  0
4                 1413       0      2   0   869   66  2  1
5                  835      98     48   9   233  431  2  0
6                   80       0      0   0    19    5  5  0
7                   41       0      0   0    12    3  0  0
Training Time: 0.41422152519226074 seconds
Accuracy: 0.8972273399294893
Precision: 0.8867179100001988
Recall:  0.8972273399294893
F1-score: 0.8701583752122565


In [44]:
# KNN with 5 selected features
X_test_rfe_5 = X_test.loc[:, rfe_features_5]
KNN(X_train_rfe_5, y_train, X_test_rfe_5, y_test)



Predicted attack     0       1      2      3     4     5   6  7
Actual attack                                                  
0                 4789       1      2      0   226   119   2  1
1                    1  159777     81     22     5    19   0  0
2                    1     122  37899     13     0     8   0  0
3                    0      76      7  12469     7     4   0  0
4                  844       5      0      1  1349   146   6  2
5                  412      47      1      2   179  1011   4  0
6                   27       0      0      0    28    30  24  0
7                   24       0      0      0    19    11   1  1
Training Time: 0.4629685878753662 seconds
Accuracy: 0.9886000227453656
Precision: 0.9882635799032875
Recall:  0.9886000227453656
F1-score: 0.9880351031712151


In [45]:
# KNN with 5 selected features
X_test_rf_5 = X_test.loc[:, rf_features_5]
KNN(X_train_rf_5, y_train, X_test_rf_5, y_test)



Predicted attack     0       1      2      3     4    5   6  7
Actual attack                                                 
0                 4788       1      0      0   246  103   0  2
1                    1  159786     64     18     6   30   0  0
2                    0      87  37944      3     2    7   0  0
3                    0      78      3  12470     3    9   0  0
4                  899       4      2      5  1261  172   4  6
5                  364      56      1      1   250  978   5  1
6                   23       0      0      0    36   26  24  0
7                   17       0      0      0    25   12   2  0
Training Time: 0.4237635135650635 seconds
Accuracy: 0.9882906857727738
Precision: 0.987767952624124
Recall:  0.9882906857727738
F1-score: 0.9876657314149838


In [46]:
# KNN with 5 selected features
X_test_lg_5 = X_test.loc[:, lg_features_5]
KNN(X_train_lg_5, y_train, X_test_lg_5, y_test)



Predicted attack     0      1      2      3    4    5   6  7
Actual attack                                               
0                 4348     35     23      3  470  242  12  7
1                  120  77565  82107     23   22   68   0  0
2                   81  14649  23243     12    6   52   0  0
3                   33     58     24  12426   12   10   0  0
4                 1298     41     34      6  845  127   1  1
5                  683     71     82      7  137  674   1  1
6                   76      1      0      0   13   19   0  0
7                   31      0      4      0    6   15   0  0
Training Time: 0.40781235694885254 seconds
Accuracy: 0.5417991584214716
Precision: 0.7309713744632257
Recall:  0.5417991584214716
F1-score: 0.5854240974685558


In [47]:
# KNN with 5 selected features
X_test_xg_5 = X_test.loc[:, xg_features_5]
KNN(X_train_xg_5, y_train, X_test_xg_5, y_test)



Predicted attack     0       1      2      3    4    5   6  7
Actual attack                                                
0                 4412       0    446      1  259   22   0  0
1                    0  159788     59     27    3   28   0  0
2                    0      51  37988      3    0    1   0  0
3                    0      65     24  12473    0    1   0  0
4                  872       0    442      1  970   63   0  5
5                  301      29    510      1  126  689   0  0
6                   19       0     41      0   11    9  29  0
7                   15       0     13      0   19    0   0  9
Training Time: 0.3515951633453369 seconds
Accuracy: 0.9842283634709428
Precision: 0.9829821899334528
Recall:  0.9842283634709428
F1-score: 0.9824123184537709


## 5.4 Gradient Boosting

In [48]:
# GB with 5 selected features
GB(X_train_ig_5, y_train, X_test_ig_5, y_test)

Predicted attack     0       1      2      3     4    5   6   7
Actual attack                                                  
0                 4773      15      0      1   318    2  26   5
1                    3  159874     10      0    15    2   0   1
2                    1      12  38025      0     3    2   0   0
3                   20     139      1  12393     0    3   0   7
4                  835       9      0      0  1482    8  13   6
5                  621      13      0      1   116  894   5   6
6                   48       0      0      0     2    0  59   0
7                   25       0      0      0     5    0   0  26
Training Time: 216.53049612045288 seconds
Accuracy: 0.9895416808825201
Precision: 0.9903376211186882
Recall:  0.9895416808825201
F1-score: 0.9891746173796421


In [49]:
# GB with 5 selected features
X_test_rfe_5 = X_test.loc[:, rfe_features_5]
GB(X_train_rfe_5, y_train, X_test_rfe_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4839       1      0      0   165   127   6   2
1                    0  159872      8      8     4    11   0   2
2                    1      11  38026      0     1     4   0   0
3                    0      28      0  12525     7     3   0   0
4                  326       1      0      0  1910   111   3   2
5                  201      13      0      0    96  1339   5   2
6                    8       0      0      0    22    29  50   0
7                    8       0      0      0     4    17   1  26
Training Time: 302.74483847618103 seconds
Accuracy: 0.9943682474695781
Precision: 0.9943089337301935
Recall:  0.9943682474695781
F1-score: 0.9942891429592887


In [50]:
# GB with 5 selected features
X_test_rf_5 = X_test.loc[:, rf_features_5]
GB(X_train_rf_5, y_train, X_test_rf_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4828       0      0      0   145   155   8   4
1                    0  159873      2      4     5    19   0   2
2                    0      12  38021      5     3     2   0   0
3                    0      28      0  12513     8    14   0   0
4                  488       0      0      0  1742   119   2   2
5                  327       9      0      0    61  1252   5   2
6                   28       0      0      0     6    19  56   0
7                   15       0      0      0     3    12   0  26
Training Time: 361.9152476787567 seconds
Accuracy: 0.9931127032867053
Precision: 0.9931610223046667
Recall:  0.9931127032867053
F1-score: 0.9929926898917534


In [51]:
# GB with 5 selected features
X_test_lg_5 = X_test.loc[:, lg_features_5]
GB(X_train_lg_5, y_train, X_test_lg_5, y_test)

Predicted attack     0      1      2      3    4    5  6   7
Actual attack                                               
0                 4495     32     38      0  379  185  5   6
1                   81  73765  85957     22   26   51  2   1
2                   29  12721  25208     15   16   54  0   0
3                   16     73     18  12423    9   10  0  14
4                 1140     54     63      4  946  137  3   6
5                  612    100    108      0  102  730  2   2
6                   68      2      0      0   16   23  0   0
7                   32      1      3      0    8   12  0   0
Training Time: 160.9734263420105 seconds
Accuracy: 0.53482088024565
Precision: 0.7422298916041148
Recall:  0.53482088024565
F1-score: 0.5774146924989445


In [52]:
# GB with 5 selected features
X_test_xg_5 = X_test.loc[:, xg_features_5]
GB(X_train_xg_5, y_train, X_test_xg_5, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4275      17      0     14     0   829   1   4
1                    0  159802      1    100     0     2   0   0
2                    1       5  38015     21     0     1   0   0
3                    0      26      2  12534     0     1   0   0
4                  526      44      0    193  1178   409   1   2
5                  119     209      0     47     0  1279   1   1
6                    1       7      0      4     0    39  58   0
7                    3       0      0      1     0    25   0  27
Training Time: 131.40112590789795 seconds
Accuracy: 0.9879131127032867
Precision: 0.9899365706769488
Recall:  0.9879131127032867
F1-score: 0.9877473952154756


## 5.5 MLP

In [53]:
# MLP with 5 selected features
MLP(X_train_ig_5, y_train, X_test_ig_5, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3    5
Actual attack                                   
0                 5121       7     0      0   12
1                    3  158354  1352    177   19
2                    2   33827  4120     91    3
3                   20      76    64  12403    0
4                 2339       1     0      0   13
5                 1214      26   120     12  284
6                  109       0     0      0    0
7                   55       0     0      0    1
Training Time: 316.496488571167 seconds
Accuracy: 0.820116001364722
Precision: 0.8009438814650042
Recall:  0.820116001364722
F1-score: 0.7621644032930971


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [54]:
# MLP with 5 selected features
X_test_rfe_5 = X_test.loc[:, rfe_features_5]
MLP(X_train_rfe_5, y_train, X_test_rfe_5, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1   2     3   4    5
Actual attack                                    
0                 4885       0   0     0  49  206
1                    1  159378  44   461   0   21
2                    0   37949   7    82   0    5
3                    6    9060  10  3473   0   14
4                 2099       0   0     0  85  169
5                 1057     130   0    28   0  441
6                   79       0   0     0   1   29
7                   42       0   0     0   0   14
Training Time: 364.2382080554962 seconds
Accuracy: 0.7654679858978733
Precision: 0.654738099905723
Recall:  0.7654679858978733
F1-score: 0.6772567386829914


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [55]:
# MLP with 5 selected features
X_test_rf_5 = X_test.loc[:, rf_features_5]
MLP(X_train_rf_5, y_train, X_test_rf_5, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1    2    3     4    5
Actual attack                                      
0                 3675       0    0    0  1189  276
1                    2  159350  346  178     7   22
2                    0   37928   73   37     0    5
3                    1   12398   60   80     4   20
4                  961       0    0    0   949  443
5                  344     149    0    1   694  468
6                   13       0    0    0    62   34
7                   13       0    0    0    25   18
Training Time: 418.85092782974243 seconds
Accuracy: 0.7487546912316615
Precision: 0.617617210213671
Recall:  0.7487546912316615
F1-score: 0.6515642769055039


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [56]:
# MLP with 5 selected features
X_test_lg_5 = X_test.loc[:, lg_features_5]
MLP(X_train_lg_5, y_train, X_test_lg_5, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3    4    5
Actual attack                                        
0                 4294     241   109     22  189  285
1                  353  158457  1006     11    9   69
2                  132   33307  4535     19    2   48
3                   90      70     6  12382    9    6
4                 1533     184   113     51  370  102
5                  976     268    72      5    2  333
6                   79       8     1      4    1   16
7                   37       8     1      0    0   10
Training Time: 290.60850405693054 seconds
Accuracy: 0.8205208688729672
Precision: 0.8126907284072288
Recall:  0.8205208688729672
F1-score: 0.7669090657261998


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [57]:
# MLP with 5 selected features
X_test_xg_5 = X_test.loc[:, xg_features_5]
MLP(X_train_xg_5, y_train, X_test_xg_5, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1    4    5
Actual attack                           
0                 4015       1  687  437
1                   16  159879    0   10
2                    1   38041    0    1
3                   11   12546    0    6
4                 1375       1  538  439
5                  433     149  566  508
6                   27       0   42   40
7                   21       0   13   22
Training Time: 362.35487842559814 seconds
Accuracy: 0.7503241214602525
Precision: 0.5738327824675065
Recall:  0.7503241214602525
F1-score: 0.6499637441429954


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# 6. 15 selected features

## 6.1 Decision Tree

In [16]:
# Decision Tree with 15 selected features
X_test_ig_15 = X_test.loc[:, ig_features_15]
DT(X_train_ig_15, y_train, X_test_ig_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4342       0      0      0   438   316  30  14
1                    0  159572     20      0     2   311   0   0
2                    3      13  38026      0     1     0   0   0
3                    0       1      0  12561     1     0   0   0
4                  304       1      0      0  1876   134  30   8
5                  167       4      0      0   260  1178  38   9
6                   10       0      0      0    15    20  62   2
7                    9       0      0      0     7    12   2  26
Accuracy: 0.9900739224383032
Precision: 0.9909781603237482
Recall:  0.9900739224383032
F1-score: 0.990439783302328


In [17]:
# Decision Tree with 15 selected features
X_test_rfe_15 = X_test.loc[:, rfe_features_15]
DT(X_train_rfe_15, y_train, X_test_rfe_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4445       1      0      0   335   329  18  12
1                    0  159882      5      4     5     9   0   0
2                    3       5  38035      0     0     0   0   0
3                    2       0      0  12559     1     1   0   0
4                  218       2      0      0  1987   108  29   9
5                  156      17      0      0   143  1299  25  16
6                   11       0      0      0    20    13  63   2
7                    8       1      0      0     7     9   4  27
Accuracy: 0.9930490162629364
Precision: 0.9933095717051883
Recall:  0.9930490162629364
F1-score: 0.9931423797946999


In [18]:
# Decision Tree with 15 selected features
X_test_rf_15 = X_test.loc[:, rf_features_15]
DT(X_train_rf_15, y_train, X_test_rf_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4305      63    155     19   271   309   9   9
1                    0  159880      8      5     1    11   0   0
2                    1       3  38038      0     1     0   0   0
3                    2       0      1  12558     2     0   0   0
4                  174      47    145    229  1641    92  22   3
5                   92     322    118     10    75  1022  12   5
6                    4       6     20      3     8    12  56   0
7                    4       4     10      1     5     6   0  26
Accuracy: 0.9895416808825201
Precision: 0.9888372100220354
Recall:  0.9895416808825201
F1-score: 0.9890312621232434


In [19]:
# Decision Tree with 15 selected features
X_test_lg_15 = X_test.loc[:, lg_features_15]
DT(X_train_lg_15, y_train, X_test_lg_15, y_test)

Predicted attack     0       1      2      3     4    5   6   7
Actual attack                                                  
0                 3729      51     24      5   741  507  46  37
1                   54  142905  16834     42    25   41   3   1
2                   20   18545  19418      9    22   28   1   0
3                    9      32     22  12476    17    6   0   1
4                  636      26     44      9  1380  198  42  18
5                  444      64     38      7   220  832  36  15
6                   45       3      1      0    36   20   4   0
7                   22       2      2      0    11   14   4   1
Accuracy: 0.8222222222222222
Precision: 0.8197742844546795
Recall:  0.8222222222222222
F1-score: 0.8209262557493892


In [20]:
# Decision Tree with 15 selected features
X_test_xg_15 = X_test.loc[:, xg_features_15]
DT(X_train_xg_15, y_train, X_test_xg_15, y_test)

Predicted attack     0       1      2      3     4    5   6   7
Actual attack                                                  
0                 4370     125    147     19   207  257   4  11
1                    0  159586     27      2     1  289   0   0
2                    1       4  38036      1     1    0   0   0
3                    1       0      2  12559     1    0   0   0
4                  167     118    202    139  1644   70  11   2
5                  115     308    166      7    56  994   6   4
6                    5      10     24      0     8    3  57   2
7                    3       9     10      0     4    3   0  27
Accuracy: 0.9883907653815535
Precision: 0.9878970720870439
Recall:  0.9883907653815535
F1-score: 0.9879867689597405


## 6.2 Random Forest

In [21]:
# Decision Tree with 15 selected features
X_test_ig_15 = X_test.loc[:, ig_features_15]
RF(X_train_ig_15, y_train, X_test_ig_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4908       0      0      0   154    77   1   0
1                    0  159861     26      0     1    17   0   0
2                    0      12  38029      0     2     0   0   0
3                    0       5      1  12557     0     0   0   0
4                  350       0      0      1  1920    81   1   0
5                  225      30      0      0   139  1262   0   0
6                   31       0      0      0    44    22  12   0
7                   12       0      0      0     7    15   0  22
Accuracy: 0.9942954622995565
Precision: 0.9942511840347397
Recall:  0.9942954622995565
F1-score: 0.9940446429962595


In [22]:
# RF with 15 selected features
X_test_rfe_15 = X_test.loc[:, rfe_features_15]
RF(X_train_rfe_15, y_train, X_test_rfe_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4930       0      0      0   137    73   0   0
1                    0  159884     14      0     2     5   0   0
2                    0       3  38038      0     2     0   0   0
3                    0       4      2  12554     3     0   0   0
4                  248       1      0      0  2036    68   0   0
5                  189      24      0      0   144  1298   1   0
6                   27       0      0      0    42    21  19   0
7                   11       0      0      0     9    17   0  19
Accuracy: 0.995237120436711
Precision: 0.9952388600908392
Recall:  0.995237120436711
F1-score: 0.9950305438195913


In [23]:
# RF with 15 selected features
X_test_rf_15 = X_test.loc[:, rf_features_15]
RF(X_train_rf_15, y_train, X_test_rf_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4907       0      0      0   160    73   0   0
1                    0  159873     18      0     2    12   0   0
2                    1       4  38037      0     1     0   0   0
3                    0      20      1  12541     1     0   0   0
4                  250       1      0      0  2035    67   0   0
5                  188      22      0      0   136  1310   0   0
6                   26       0      0      0    38    23  22   0
7                   11       0      0      0    10    11   0  24
Accuracy: 0.9951051973160469
Precision: 0.9951324304759109
Recall:  0.9951051973160469
F1-score: 0.9949300492808474


In [24]:
# RF with 15 selected features
X_test_lg_15 = X_test.loc[:, lg_features_15]
RF(X_train_lg_15, y_train, X_test_lg_15, y_test)

Predicted attack     0       1      2      3     4    5  6  7
Actual attack                                                
0                 4720       2      4      0   241  173  0  0
1                   39  133595  26225      9    10   27  0  0
2                   20   14380  23609      5    12   17  0  0
3                    5      50     11  12484     9    4  0  0
4                  873       6     10      0  1339  125  0  0
5                  605      39      9      0   117  886  0  0
6                   70       0      0      0    22   17  0  0
7                   32       0      1      0     9   11  1  2
Accuracy: 0.8035255316729216
Precision: 0.826358083414293
Recall:  0.8035255316729216
F1-score: 0.8117267555094585


In [25]:
# RF with 15 selected features
X_test_xg_15 = X_test.loc[:, xg_features_15]
RF(X_train_xg_15, y_train, X_test_xg_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4878       1      0      0   155   105   1   0
1                    1  159889      8      0     1     6   0   0
2                    1       7  38034      0     1     0   0   0
3                    0       5      3  12555     0     0   0   0
4                  273       1      0      0  1993    86   0   0
5                  186      28      0      0   124  1316   2   0
6                   15       0      0      0    41    28  25   0
7                    7       0      0      0    11    16   0  22
Accuracy: 0.9949368816103719
Precision: 0.9949007197779937
Recall:  0.9949368816103719
F1-score: 0.9947720284644582


## 6.3 KNN

In [26]:
# KNN with 15 selected features
X_test_ig_15 = X_test.loc[:, ig_features_15]
KNN(X_train_ig_15, y_train, X_test_ig_15, y_test)



Predicted attack     0       1      2      3     4    5  6  7
Actual attack                                                
0                 4464       0      0      0   450  219  4  3
1                    0  157144   2723     12     1   25  0  0
2                    1    1423  36597      6     0   16  0  0
3                    0      60     27  12469     0    7  0  0
4                 1040       1      0      0  1199  112  1  0
5                  690      47     22      1   134  759  2  1
6                   74       0      0      0    25   10  0  0
7                   40       0      0      0     9    6  0  1
Accuracy: 0.9672830660752871
Precision: 0.9669485131893317
Recall:  0.9672830660752871
F1-score: 0.9666079625764684


In [27]:
# KNN with 15 selected features
X_test_rfe_15 = X_test.loc[:, rfe_features_15]
KNN(X_train_rfe_15, y_train, X_test_rfe_15, y_test)



Predicted attack     0       1      2      3     4    5  6  7
Actual attack                                                
0                 4505       1      0      0   386  243  0  5
1                    1  154500   5369     13     4   18  0  0
2                    1    6158  31867      5     1   11  0  0
3                    0    1090     15  11452     1    5  0  0
4                  917       1      0      0  1327  107  0  1
5                  601      55     26      5   144  808  8  9
6                   62       0      0      0    23   21  0  3
7                   33       0      0      0     4   15  0  4
Accuracy: 0.9301171386330035
Precision: 0.9292888322201532
Recall:  0.9301171386330035
F1-score: 0.9292270016739272


In [28]:
# KNN with 15 selected features
X_test_rf_15 = X_test.loc[:, rf_features_15]
KNN(X_train_rf_15, y_train, X_test_rf_15, y_test)



Predicted attack     0       1      2      3     4    5  6  7
Actual attack                                                
0                 4524       0      0      0   385  228  1  2
1                    1  154578   5295      9     1   21  0  0
2                    2    5926  32099      5     0   11  0  0
3                    0    1082     20  11454     0    7  0  0
4                  919       1      0      0  1306  126  0  1
5                  619      64     20      3   166  771  7  6
6                   62       0      0      0    31   15  1  0
7                   37       0      0      0     6   13  0  0
Accuracy: 0.9313453883771181
Precision: 0.9305188584912817
Recall:  0.9313453883771181
F1-score: 0.9304243413124563


In [29]:
# KNN with 15 selected features
X_test_lg_15 = X_test.loc[:, lg_features_15]
KNN(X_train_lg_15, y_train, X_test_lg_15, y_test)



Predicted attack     0       1      2      3     4    5  6  7
Actual attack                                                
0                 4524      15     15      0   371  205  5  5
1                   64  124912  34868      6    21   34  0  0
2                   30   20775  17188     10    14   26  0  0
3                   18      42     13  12467    15    8  0  0
4                 1032      24     24      5  1175   91  2  0
5                  663      56     25      3   136  768  4  1
6                   70       1      0      0    25   12  1  0
7                   34       0      2      0    14    5  0  1
Accuracy: 0.7325645399749801
Precision: 0.7658474641655447
Recall:  0.7325645399749801
F1-score: 0.7457687580936297


In [30]:
# KNN with 15 selected features
X_test_xg_15 = X_test.loc[:, xg_features_15]
KNN(X_train_xg_15, y_train, X_test_xg_15, y_test)



Predicted attack     0       1      2      3     4    5  6  7
Actual attack                                                
0                 4392       1      0      0   503  240  3  1
1                    3  154557   5287     38     3   17  0  0
2                    2    4901  33109     15     0   16  0  0
3                    2     136     25  12395     0    5  0  0
4                  986       0      0      1  1264  100  2  0
5                  552      70     21      2   156  853  1  1
6                   53       0      0      0    27   24  5  0
7                   22       0      0      0    14   11  0  9
Accuracy: 0.939765722733993
Precision: 0.93956247832054
Recall:  0.939765722733993
F1-score: 0.9392760186566436


## 6.4 Gradient Boosting

In [31]:
# GB with 15 selected features
X_test_ig_15 = X_test.loc[:, ig_features_15]
GB(X_train_ig_15, y_train, X_test_ig_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4408       1      0      0   351   376   2   2
1                    1  159879      3      0     1    19   0   2
2                    1      13  38027      0     1     1   0   0
3                    0       1      0  12558     0     4   0   0
4                  231       1      1      0  1880   236   2   2
5                  119      10      0      0    72  1446   7   2
6                   10       1      0      0    10    37  51   0
7                    2       0      0      0     5    23   0  26
Accuracy: 0.9929489366541567
Precision: 0.9934496561420354
Recall:  0.9929489366541567
F1-score: 0.9930411407254375


In [32]:
# GB with 15 selected features
X_test_rfe_15 = X_test.loc[:, rfe_features_15]
GB(X_train_rfe_15, y_train, X_test_rfe_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 4878       0      0      0   133   118   9   2
1                    0  159886      2      0     1    12   2   2
2                    0      17  38024      0     1     1   0   0
3                    1      27      0  12533     1     1   0   0
4                  316       0      0      0  1958    70   7   2
5                  189      13      0      0   104  1343   5   2
6                   15       0      0      0    27    20  47   0
7                   10       0      0      0     8    12   0  26
Accuracy: 0.9948595473672239
Precision: 0.9947773408995016
Recall:  0.9948595473672239
F1-score: 0.9947719406988008


In [16]:
# GB with 15 selected features
X_test_rf_15 = X_test.loc[:, rf_features_15]
GB(X_train_rf_15, y_train, X_test_rf_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7483       0      0      1   404   627   1   2
1                    0  262290      3      0     3    15   0   0
2                    0      40  62616      0     0     3   0   0
3                    0       1      0  20319     0     0   0   0
4                  290       0      0      2  3256   324   1   2
5                  184      20      0      0   126  2409   0   6
6                   12       0      0      0    42    67  80   0
7                   10       0      0      0     8    36   0  55
Accuracy: 0.9938182281877707
Precision: 0.9943117157483381
Recall:  0.9938182281877707
F1-score: 0.9938776909170954


In [17]:
# GB with 15 selected features
X_test_lg_15 = X_test.loc[:, lg_features_15]
GB(X_train_lg_15, y_train, X_test_lg_15, y_test)

Predicted attack     0       1      2      3     4     5   6  7
Actual attack                                                  
0                 7840      39     29      1   370   225   7  7
1                   87  230447  31629     22    37    87   2  0
2                   54   34433  28093      8    13    53   4  1
3                    9      73     17  20192    14    14   0  1
4                 1594      75     91      5  1885   159  64  2
5                 1032     169     99      1   166  1266   4  8
6                  106      11      9      0    38    29   5  3
7                   62       3      2      0    14    25   1  2
Accuracy: 0.803159079442698
Precision: 0.7997282677627551
Recall:  0.803159079442698
F1-score: 0.8006584728074604


In [18]:
# GB with 15 selected features
X_test_xg_15 = X_test.loc[:, xg_features_15]
GB(X_train_xg_15, y_train, X_test_xg_15, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7793       0      0      0   253   470   2   0
1                    0  262290      4      3     3     6   5   0
2                    0      39  62614      3     2     1   0   0
3                    0      12      0  20308     0     0   0   0
4                  416       0      0      0  3086   368   2   3
5                  415      11      0      9    74  2227   3   6
6                   40       0      0      0    19    76  66   0
7                   15       0      0      0     2    30   0  62
Accuracy: 0.9936463582988208
Precision: 0.9938612632021844
Recall:  0.9936463582988208
F1-score: 0.9936155333805287


## 6.5 MLP

In [19]:
# MLP with 15 selected features
X_test_ig_15 = X_test.loc[:, ig_features_15]
MLP(X_train_ig_15, y_train, X_test_ig_15, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3    4     5
Actual attack                                         
0                 7885       5     0      1   93   534
1                   31  261053  1192     16    4    15
2                    6   53918  8607    126    0     2
3                    3     102    43  20169    3     0
4                 2916      14     0      0  701   244
5                 1400      72   187      8   23  1055
6                  157       0     0      0    5    39
7                   80       0     0      0    2    27


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8301592845777268
Precision: 0.8354566713221392
Recall:  0.8301592845777268
F1-score: 0.7788564988686082


In [20]:
# MLP with 15 selected features
X_test_rfe_15 = X_test.loc[:, rfe_features_15]
MLP(X_train_rfe_15, y_train, X_test_rfe_15, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3     4    5
Actual attack                                         
0                 7752      15     2      1   502  246
1                   27  260819  1050    369     4   42
2                    4   60487  2032     30     2  104
3                    0      60   102  20137    14    7
4                 2572      13     2      1  1164  123
5                 1341      66   214     16   120  988
6                  139       3     0      0    27   32
7                   77       0     0      0     4   28
Accuracy: 0.8119244437791416
Precision: 0.775870000900404
Recall:  0.8119244437791416
F1-score: 0.7417238345097295


  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
# MLP with 15 selected features
X_test_rf_15 = X_test.loc[:, rf_features_15]
MLP(X_train_rf_15, y_train, X_test_rf_15, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3     4    5
Actual attack                                         
0                 7103       5     1      0   683  726
1                   26  261429   656    159     8   33
2                    2   61428  1200      6     3   20
3                    0     244    99  19966    10    1
4                 2254       3     0      0  1261  357
5                 1576      57   192      2    66  852
6                  141       0     0      0    19   41
7                   84       0     0      0     3   22
Accuracy: 0.8089278091024511
Precision: 0.7658876924844034
Recall:  0.8089278091024511
F1-score: 0.7357125410570154


  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
# MLP with 15 selected features
X_test_lg_15 = X_test.loc[:, lg_features_15]
MLP(X_train_lg_15, y_train, X_test_lg_15, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1      2      3     4    5
Actual attack                                          
0                 7757     201     26      1   360  173
1                  455  259584   2072      5    44  151
2                  246   52045  10269     17    37   45
3                  101       3      6  20159    48    3
4                 2213     322    125     61  1079   75
5                 1605     292    152      0    52  644
6                  152      13      5      1    13   17
7                   81      10      1      0     9    8


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8302202706673542
Precision: 0.8270836501758967
Recall:  0.8302202706673542
F1-score: 0.7842276136220641


In [23]:
# MLP with 15 selected features
X_test_xg_15 = X_test.loc[:, xg_features_15]
MLP(X_train_xg_15, y_train, X_test_xg_15, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1     2      3     4     5
Actual attack                                          
0                 7160      70     0      0   622   666
1                    5  260389  1302    577    15    23
2                    1   54895  7668     88     4     3
3                    1     127    35  20140    16     1
4                 2576      68     0      1  1038   192
5                 1338     226    23     20   124  1014
6                  116       1     0      0    33    51
7                   82       0     0      0     5    22


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.824445996817635
Precision: 0.8265197148797239
Recall:  0.824445996817635
F1-score: 0.7716088688120296


# 7. 30 selected features

## 7.1 Decision Tree

In [24]:
# DT with 30 selected features
X_test_ig_30 = X_test.loc[:, ig_features_30]
DT(X_train_ig_30, y_train, X_test_ig_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7208     425     79      1   411   344  22  28
1                    0  262263      9      2     3    34   0   0
2                    0      15  62640      1     2     1   0   0
3                    0       0      0  20319     1     0   0   0
4                  260     515     76    190  2740    74  17   3
5                  144     655    162      0   111  1654  11   8
6                    9      61      7      0    16     8  97   3
7                    3      33      2      0     6     8   0  57
Accuracy: 0.989576922863685
Precision: 0.9887663654200576
Recall:  0.989576922863685
F1-score: 0.9889482340111935


In [25]:
# DT with 30 selected features
X_test_rfe_30 = X_test.loc[:, rfe_features_30]
DT(X_train_rfe_30, y_train, X_test_rfe_30, y_test)

Predicted attack     0       1      2      3     4     5    6   7
Actual attack                                                    
0                 7527       0      0      1   505   421   37  27
1                    1  262264     10      2     3    31    0   0
2                    1      20  62633      1     2     2    0   0
3                    0       0      0  20320     0     0    0   0
4                  358       0      1      0  3310   169   25  12
5                  222       8      0      0   199  2284   19  13
6                   18       0      0      0    39    31  111   2
7                   12       0      0      0    14    17    0  66
Accuracy: 0.993837632852652
Precision: 0.9939961997880659
Recall:  0.993837632852652
F1-score: 0.9938914999267838


In [26]:
# DT with 30 selected features
X_test_rf_30 = X_test.loc[:, rf_features_30]
DT(X_train_rf_30, y_train, X_test_rf_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7271     338     81     85   389   306  25  23
1                    0  262274     15      2     2    18   0   0
2                    1      15  62640      1     2     0   0   0
3                    0       0      0  20319     1     0   0   0
4                  234     420     79    283  2774    63  15   7
5                  154     550    205     63   101  1647  14  11
6                   10      47      7     14    17     8  98   0
7                    4      31      2      2     4     7   0  59
Accuracy: 0.9898652207419235
Precision: 0.9891043849012198
Recall:  0.9898652207419235
F1-score: 0.9892392066938294


In [27]:
# DT with 30 selected features
X_test_lg_30 = X_test.loc[:, lg_features_30]
DT(X_train_lg_30, y_train, X_test_lg_30, y_test)

Predicted attack     0       1      2      3     4     5    6   7
Actual attack                                                    
0                 7346       1      0      0   657   433   45  36
1                    3  262280     11      1     2    14    0   0
2                    0      24  62630      0     3     2    0   0
3                    0       1      0  20319     0     0    0   0
4                  397       0      0      1  3292   133   42  10
5                  274       3      0      0   199  2222   24  23
6                   26       0      0      0    30    24  120   1
7                   19       0      0      0    14    14    2  60
Accuracy: 0.9931556974868186
Precision: 0.9933692390587188
Recall:  0.9931556974868186
F1-score: 0.9932281875724204


In [28]:
# DT with 30 selected features
X_test_xg_30 = X_test.loc[:, xg_features_30]
DT(X_train_xg_30, y_train, X_test_xg_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7261     324    179      1   381   339  17  16
1                    0  262276     15      2     2    16   0   0
2                    0      13  62642      1     2     1   0   0
3                    0       0      0  20319     1     0   0   0
4                  224     348    242    191  2771    76  15   8
5                  155     608    211      0    98  1661   6   6
6                    9      37     31      0    16     9  97   2
7                    4      19     16      0     5     6   0  59
Accuracy: 0.9898763091218558
Precision: 0.989098347850299
Recall:  0.9898763091218558
F1-score: 0.9892511954380758


## 7.2 Random Forest

In [29]:
# RF with 30 selected features
X_test_ig_30 = X_test.loc[:, ig_features_30]
RF(X_train_ig_30, y_train, X_test_ig_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8198       0      0      0   169   151   0   0
1                    0  262294      8      2     2     5   0   0
2                    0      23  62634      0     1     1   0   0
3                    0       1      0  20319     0     0   0   0
4                  401       0      0      0  3352   122   0   0
5                  271      23      0      0   149  2302   0   0
6                   32       0      0      0    71    50  48   0
7                   28       0      0      0    24    23   0  34
Accuracy: 0.9956838481113717
Precision: 0.9957112719306539
Recall:  0.9956838481113717
F1-score: 0.995496658217841


In [30]:
# RF with 30 selected features
X_test_rfe_30 = X_test.loc[:, rfe_features_30]
RF(X_train_rfe_30, y_train, X_test_rfe_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8202       0      0      0   154   162   0   0
1                    0  262297      7      2     2     3   0   0
2                    2      22  62635      0     0     0   0   0
3                    0       0      0  20320     0     0   0   0
4                  383       1      0      0  3341   150   0   0
5                  279      25      0      0   139  2301   1   0
6                   28       0      0      0    66    52  55   0
7                   23       0      0      0    22    19   0  45
Accuracy: 0.9957254295361176
Precision: 0.9957408059042185
Recall:  0.9957254295361176
F1-score: 0.9955687717455746


In [31]:
# RF with 30 selected features
X_test_rf_30 = X_test.loc[:, rf_features_30]
RF(X_train_rf_30, y_train, X_test_rf_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8208       0      0      0   163   147   0   0
1                    0  262291     10      1     4     5   0   0
2                    1      28  62629      0     1     0   0   0
3                    0       0      0  20320     0     0   0   0
4                  386       0      0      0  3355   134   0   0
5                  280      27      0      0   162  2276   0   0
6                   28       0      0      0    73    54  46   0
7                   16       0      0      0    27    23   0  43
Accuracy: 0.9956478108765918
Precision: 0.9956676751566313
Recall:  0.9956478108765918
F1-score: 0.9954676403503647


In [32]:
# RF with 30 selected features
X_test_lg_30 = X_test.loc[:, lg_features_30]
RF(X_train_lg_30, y_train, X_test_lg_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8240       0      0      0   134   144   0   0
1                    0  262293     12      1     1     4   0   0
2                    1      30  62627      0     0     1   0   0
3                    0       0      0  20320     0     0   0   0
4                  564       0      0      0  3233    78   0   0
5                  429      34      0      0   145  2137   0   0
6                   70       0      0      0    62    24  45   0
7                   46       0      0      0    15    12   0  36
Accuracy: 0.994990824365606
Precision: 0.9950812838014546
Recall:  0.994990824365606
F1-score: 0.9947585322360688


In [33]:
# RF with 30 selected features
X_test_xg_30 = X_test.loc[:, xg_features_30]
RF(X_train_xg_30, y_train, X_test_xg_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8199       0      0      0   164   155   0   0
1                    0  262298      8      1     2     2   0   0
2                    1      22  62635      0     1     0   0   0
3                    0       0      0  20320     0     0   0   0
4                  386       0      0      0  3360   129   0   0
5                  263      31      0      0   153  2298   0   0
6                   33       0      0      0    71    50  47   0
7                   29       0      0      0    14    20   0  46
Accuracy: 0.995744834200999
Precision: 0.9957652593463877
Recall:  0.995744834200999
F1-score: 0.9955719005246749


## 7.3 KNN

In [34]:
# KNN with 30 selected features
X_test_ig_30 = X_test.loc[:, ig_features_30]
KNN(X_train_ig_30, y_train, X_test_ig_30, y_test)



Predicted attack     0       1      2      3     4     5   6  7
Actual attack                                                  
0                 7574       0      1      0   556   363  15  9
1                    0  254286   7956     19     4    46   0  0
2                    0   11966  50649     15     2    27   0  0
3                    0      70     19  20223     1     7   0  0
4                 1368       5      0      0  2335   156   5  6
5                  860      51     12      4   223  1585   3  7
6                  106       0      0      0    42    40   8  5
7                   61       0      0      0    16    25   3  4
Accuracy: 0.9332645853777534
Precision: 0.9317630228138398
Recall:  0.9332645853777534
F1-score: 0.9320281857429669


In [35]:
# KNN with 30 selected features
X_test_rfe_30 = X_test.loc[:, rfe_features_30]
KNN(X_train_rfe_30, y_train, X_test_rfe_30, y_test)



Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7572       2      0      0   569   360  14   1
1                    1  255714   6527     21     2    46   0   0
2                    1   12381  50231     16     1    29   0   0
3                    0      70     16  20225     0     9   0   0
4                 1333       5      0      0  2373   157   5   2
5                  868      44     19      3   219  1581   7   4
6                  108       0      0      0    37    45   9   2
7                   58       0      0      0    15    16   2  18
Accuracy: 0.9362002339648166
Precision: 0.9347147447114397
Recall:  0.9362002339648166
F1-score: 0.934705801281766


In [36]:
# KNN with 30 selected features
X_test_rf_30 = X_test.loc[:, rf_features_30]
KNN(X_train_rf_30, y_train, X_test_rf_30, y_test)



Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7596       2      0      0   553   349  16   2
1                    3  255145   7092     20     3    48   0   0
2                    1   12414  50200     16     1    27   0   0
3                    0      72     16  20224     1     7   0   0
4                 1354       4      0      0  2353   157   5   2
5                  852      47     14      4   221  1600   5   2
6                  111       1      0      0    39    42   8   0
7                   58       0      0      0    14    16   3  18
Accuracy: 0.9345951909696234
Precision: 0.9331051129613489
Recall:  0.9345951909696234
F1-score: 0.9331600777934632


In [37]:
# KNN with 30 selected features
X_test_lg_30 = X_test.loc[:, lg_features_30]
KNN(X_train_lg_30, y_train, X_test_lg_30, y_test)



Predicted attack     0       1      2      3     4     5   6  7
Actual attack                                                  
0                 7575       1      0      0   580   344  14  4
1                    3  254538   7701     22     2    45   0  0
2                    1    8751  53858     17     1    31   0  0
3                    0      63     21  20229     0     7   0  0
4                 1342       5      1      0  2370   149   5  3
5                  867      47     18      3   223  1576   6  5
6                  111       1      0      0    37    41   8  3
7                   65       0      0      0    16    22   2  4
Accuracy: 0.9429502852485737
Precision: 0.9421918175983961
Recall:  0.9429502852485737
F1-score: 0.9422666765531574


In [38]:
# KNN with 30 selected features
X_test_xg_30 = X_test.loc[:, xg_features_30]
KNN(X_train_xg_30, y_train, X_test_xg_30, y_test)



Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 7586       2      0      0   563   349  17   1
1                    3  255201   7040     19     2    46   0   0
2                    1   12332  50280     16     1    29   0   0
3                    0      67     16  20231     0     6   0   0
4                 1353       6      0      0  2347   161   6   2
5                  859      48     15      3   218  1594   6   2
6                  106       1      0      0    39    45   9   1
7                   58       0      0      0    14    16   3  18
Accuracy: 0.934933386557557
Precision: 0.9334570182028552
Recall:  0.934933386557557
F1-score: 0.9335120445774006


## 7.4 Gradient Boosting

In [39]:
# GB with 30 selected features
X_test_ig_30 = X_test.loc[:, ig_features_30]
GB(X_train_ig_30, y_train, X_test_ig_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8018       0      0      1     2   480  10   7
1                    0  262281     10      1     2    15   1   1
2                    1      46  62606      0     0     5   0   1
3                    0       0      0  20320     0     0   0   0
4                 1079       0      0      5  2369   391  25   6
5                  319      17      0      1     0  2390  13   5
6                   26       0      0      0     0    78  96   1
7                   20       0      0      0     0    33   0  56
Accuracy: 0.9927870088540713
Precision: 0.9936331821967421
Recall:  0.9927870088540713
F1-score: 0.9925716365117158


In [40]:
# GB with 30 selected features
X_test_rfe_30 = X_test.loc[:, rfe_features_30]
GB(X_train_rfe_30, y_train, X_test_rfe_30, y_test)

Predicted attack     0       1      2      3     4     5    6   7
Actual attack                                                    
0                 7840       1      0      0   415   250   11   1
1                    0  262280      6      0     5    13    7   0
2                    0      41  62613      2     2     1    0   0
3                    0       1      0  20319     0     0    0   0
4                  464       0      0      2  3188   212    8   1
5                  570     109      0      1   476  1409  175   5
6                   13       0      0      0    30    56  102   0
7                   23       0      0      0     6    16    2  62
Accuracy: 0.9918916221745422
Precision: 0.9917464074120872
Recall:  0.9918916221745422
F1-score: 0.9916471377134687


In [41]:
# GB with 30 selected features
X_test_rf_30 = X_test.loc[:, rf_features_30]
GB(X_train_rf_30, y_train, X_test_rf_30, y_test)

Predicted attack     0       1      2      3     4     5    6   7
Actual attack                                                    
0                 7844       0      0      0   464   203    1   6
1                    0  262288      5      0     3    12    3   0
2                    0      41  62612      2     2     1    1   0
3                    0       1      0  20319     0     0    0   0
4                  427       0      0      0  3276   163    3   6
5                  696     346      0      1   426  1116  149  11
6                   11       0      0      0    44    48   98   0
7                   21       0      0      0    12    12    0  64
Accuracy: 0.991348291557862
Precision: 0.9908851881836279
Recall:  0.991348291557862
F1-score: 0.9907504647945424


In [42]:
# GB with 30 selected features
X_test_lg_30 = X_test.loc[:, lg_features_30]
GB(X_train_lg_30, y_train, X_test_lg_30, y_test)

Predicted attack     0       1      2      3     4     5   6   7
Actual attack                                                   
0                 8002       0      0      0   261   250   0   5
1                    0  262295      4      0     4     6   2   0
2                    0      27  62625      0     2     4   1   0
3                    0       2      0  20318     0     0   0   0
4                  680       0      1      0  3095    94   2   3
5                  556      12      0      0    99  2072   1   5
6                   48       0      0      0    31    38  84   0
7                   34       0      0      0     8    12   0  55
Accuracy: 0.993923567797127
Precision: 0.9939537687144279
Recall:  0.993923567797127
F1-score: 0.9937908802384708


In [43]:
# GB with 30 selected features
X_test_xg_30 = X_test.loc[:, xg_features_30]
GB(X_train_xg_30, y_train, X_test_xg_30, y_test)

Predicted attack     0       1      2      3     4     5    6   7
Actual attack                                                    
0                 7869       1      0      0   301   339    1   7
1                    0  262280      4      0     2    18    7   0
2                    0      45  62609      2     1     2    0   0
3                    0       2      0  20318     0     0    0   0
4                  461       0      0      2  3152   251    1   8
5                  702     304     14      1   317  1243  156   8
6                   17       0      0      0    22    65   97   0
7                   21       0      0      0     2    22    0  64
Accuracy: 0.9913898729826078
Precision: 0.9908455597578111
Recall:  0.9913898729826078
F1-score: 0.9909662085278406


## 7.5 MLP

In [44]:
# MLP with 30 selected features
X_test_ig_30 = X_test.loc[:, ig_features_30]
MLP(X_train_ig_30, y_train, X_test_ig_30, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1      2      3     4    5
Actual attack                                          
0                 6865     332     10      1   963  347
1                  271  257085   4673      2    49  231
2                  106   50985  11469      1    15   83
3                   52      57      9  20102    97    3
4                 2016     385     66     65  1209  134
5                 1199     371    164      0   238  773
6                   96      14      4      1    50   36
7                   74       6      0      0     9   20
Accuracy: 0.8247065737460428
Precision: 0.8058972723215976
Recall:  0.8247065737460428
F1-score: 0.784053591099855


  _warn_prf(average, modifier, msg_start, len(result))


In [45]:
# MLP with 30 selected features
X_test_rfe_30 = X_test.loc[:, rfe_features_30]
MLP(X_train_rfe_30, y_train, X_test_rfe_30, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1      2      3     4     5
Actual attack                                           
0                 7429       6      1      6   708   368
1                   19  255162   7085      8    13    24
2                    7   48372  14178      8     6    88
3                    7      74     60  20172     3     4
4                 2102       6     14     35  1559   159
5                 1186     109    156      0   176  1118
6                  119       0      0      3    35    44
7                   74       0      0      0    12    23


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.83056955463522
Precision: 0.8089653231341136
Recall:  0.83056955463522
F1-score: 0.7971374911043384


In [46]:
# MLP with 30 selected features
X_test_rf_30 = X_test.loc[:, rf_features_30]
MLP(X_train_rf_30, y_train, X_test_rf_30, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1      2      3     4     5
Actual attack                                           
0                 7487       7      0      1   785   238
1                   12  254601   7651     13    10    24
2                    4   47792  14805     24     1    33
3                    5      69     45  20195     3     3
4                 2391      16     35     32  1285   116
5                 1296     245     18      3   125  1058
6                  138       0      0      2    26    35
7                   77       0      0      0     6    26


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8300511728733874
Precision: 0.8085907306793844
Recall:  0.8300511728733874
F1-score: 0.7977200805605644


In [47]:
# MLP with 30 selected features
X_test_lg_30 = X_test.loc[:, lg_features_30]
MLP(X_train_lg_30, y_train, X_test_lg_30, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1      2      3     4     5
Actual attack                                           
0                 7297     223     46      5   437   510
1                  451  258328   3367      5    87    73
2                  123   50539  11897      3    66    31
3                   74      52      1  20124    69     0
4                 2154     227    143     98  1122   131
5                 1183     135    223      0   126  1078
6                  120      14     12      2    17    36
7                   74       0      3      0     4    28
Accuracy: 0.8312015922913583
Precision: 0.8202065077631491
Recall:  0.8312015922913583
F1-score: 0.7905716356601495


  _warn_prf(average, modifier, msg_start, len(result))


In [48]:
# MLP with 30 selected features
X_test_xg_30 = X_test.loc[:, xg_features_30]
MLP(X_train_xg_30, y_train, X_test_xg_30, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Predicted attack     0       1      2      3     4     5
Actual attack                                           
0                 6906       5      0      5   788   814
1                    5  254903   7329      8    24    42
2                    3   49135  13478      6     7    30
3                    1     115     41  20132    26     5
4                 1836       9     51     56  1606   317
5                  930     242     14      0   167  1392
6                   82       0      0      2    40    77
7                   74       0      0      0     6    29


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.827240268560562
Precision: 0.8039201117831736
Recall:  0.827240268560562
F1-score: 0.793042846960013
