In [1]:
import time
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from memory_profiler import memory_usage

In [2]:
# Load the dataset
df = pd.read_csv("/Users/promisea/ALMA/EdgeIIoT/EdgeIIoT.csv")

# Preprocessing
X = df.drop(['Attack_type'], axis=1)
y = df['Attack_type']


# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

In [3]:
# Evaluate classifiers
results = {}
start_overall = time.perf_counter()  # Start measuring total execution time
for name, clf in classifiers.items():
    start_time = time.perf_counter()  # Real time measurement starts
    mem_usage = max(memory_usage((clf.fit, (X_train, y_train)), interval=1))  # Less frequent checks to reduce overhead
    train_time = time.perf_counter() - start_time  # Real time measurement ends
    
    start_time = time.perf_counter()  # Real time measurement starts
    y_pred = clf.predict(X_test)
    test_time = time.perf_counter() - start_time  # Real time measurement ends
    
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    results[name] = {
        "Accuracy": accuracy,
        "F1 Score": f1,
        "Training Time": train_time,
        "Testing Time": test_time,
        "Memory Usage": mem_usage
    }

In [4]:
# Display results
print("The results of the EdgeIIoT dataset..." )
for classifier, metrics in results.items():
    print(f"{classifier}:")
    for metric, value in metrics.items():
        print(f"    {metric}: {value:.4f}")

The results of the EdgeIIoT dataset...
Random Forest:
    Accuracy: 0.9913
    F1 Score: 0.9832
    Training Time: 4.9541
    Testing Time: 0.1909
    Memory Usage: 548.1719
KNN:
    Accuracy: 0.9616
    F1 Score: 0.9251
    Training Time: 0.7304
    Testing Time: 3.7649
    Memory Usage: 548.2812
SVM:
    Accuracy: 0.9113
    F1 Score: 0.7830
    Training Time: 486.5974
    Testing Time: 64.2922
    Memory Usage: 1060.2812
Decision Tree:
    Accuracy: 0.9928
    F1 Score: 0.9862
    Training Time: 1.0978
    Testing Time: 0.0050
    Memory Usage: 663.0156
Logistic Regression:
    Accuracy: 0.8951
    F1 Score: 0.7171
    Training Time: 2.0722
    Testing Time: 0.0037
    Memory Usage: 682.7969


In [5]:
# Load the dataset
df = pd.read_csv("/Users/promisea/ALMA/CIC/CIC_IoT_2023.csv")

# Preprocessing
X = df.drop(['label'], axis=1)
y = df['label']


# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

In [6]:
# Evaluate classifiers
results = {}
start_overall = time.perf_counter()  # Start measuring total execution time
for name, clf in classifiers.items():
    start_time = time.perf_counter()  # Real time measurement starts
    mem_usage = max(memory_usage((clf.fit, (X_train, y_train)), interval=1))  # Less frequent checks to reduce overhead
    train_time = time.perf_counter() - start_time  # Real time measurement ends
    
    start_time = time.perf_counter()  # Real time measurement starts
    y_pred = clf.predict(X_test)
    test_time = time.perf_counter() - start_time  # Real time measurement ends
    
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    results[name] = {
        "Accuracy": accuracy,
        "F1 Score": f1,
        "Training Time": train_time,
        "Testing Time": test_time,
        "Memory Usage": mem_usage
    }

In [7]:
# Display results
print("The results of the CIC IoT dataset..." )
for classifier, metrics in results.items():
    print(f"{classifier}:")
    for metric, value in metrics.items():
        print(f"    {metric}: {value:.4f}")

The results of the CIC IoT dataset...
Random Forest:
    Accuracy: 0.9968
    F1 Score: 0.9653
    Training Time: 17.1536
    Testing Time: 0.2161
    Memory Usage: 1179.4219
KNN:
    Accuracy: 0.9912
    F1 Score: 0.9084
    Training Time: 0.8447
    Testing Time: 31.8610
    Memory Usage: 1147.2188
SVM:
    Accuracy: 0.9928
    F1 Score: 0.9256
    Training Time: 172.9238
    Testing Time: 63.4193
    Memory Usage: 1367.0781
Decision Tree:
    Accuracy: 0.9957
    F1 Score: 0.9532
    Training Time: 2.4182
    Testing Time: 0.0074
    Memory Usage: 376.7812
Logistic Regression:
    Accuracy: 0.9889
    F1 Score: 0.8733
    Training Time: 4.1608
    Testing Time: 0.0095
    Memory Usage: 435.6094


In [8]:
# Load the dataset
df = pd.read_csv("/Users/promisea/ALMA/EHMS/ehms.csv")

# Preprocessing
X = df.drop(['Label'], axis=1)
y = df['Label']

# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

In [9]:
# Evaluate classifiers
results = {}
start_overall = time.perf_counter()  # Start measuring total execution time
for name, clf in classifiers.items():
    start_time = time.perf_counter()  # Real time measurement starts
    mem_usage = max(memory_usage((clf.fit, (X_train, y_train)), interval=1))  # Less frequent checks to reduce overhead
    train_time = time.perf_counter() - start_time  # Real time measurement ends
    
    start_time = time.perf_counter()  # Real time measurement starts
    y_pred = clf.predict(X_test)
    test_time = time.perf_counter() - start_time  # Real time measurement ends
    
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    results[name] = {
        "Accuracy": accuracy,
        "F1 Score": f1,
        "Training Time": train_time,
        "Testing Time": test_time,
        "Memory Usage": mem_usage
    }

In [10]:
# Display results
print("The results of the WUSTL-EHMS dataset..." )
for classifier, metrics in results.items():
    print(f"{classifier}:")
    for metric, value in metrics.items():
        print(f"    {metric}: {value:.4f}")

The results of the WUSTL-EHMS dataset...
Random Forest:
    Accuracy: 0.9367
    F1 Score: 0.8203
    Training Time: 3.5744
    Testing Time: 0.0345
    Memory Usage: 483.6094
KNN:
    Accuracy: 0.9387
    F1 Score: 0.8490
    Training Time: 1.3883
    Testing Time: 0.3568
    Memory Usage: 483.7188
SVM:
    Accuracy: 0.9307
    F1 Score: 0.7936
    Training Time: 2.0528
    Testing Time: 0.7612
    Memory Usage: 692.6562
Decision Tree:
    Accuracy: 0.9749
    F1 Score: 0.9429
    Training Time: 0.9394
    Testing Time: 0.0006
    Memory Usage: 692.6562
Logistic Regression:
    Accuracy: 0.9297
    F1 Score: 0.7920
    Training Time: 0.8302
    Testing Time: 0.0007
    Memory Usage: 693.5625


In [11]:
# Load the dataset
df = pd.read_csv("scada_modified.csv")

# Preprocessing
X = df.drop(['Target'], axis=1)
y = df['Target']


# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

In [12]:
# Evaluate classifiers
results = {}
start_overall = time.perf_counter()  # Start measuring total execution time
for name, clf in classifiers.items():
    start_time = time.perf_counter()  # Real time measurement starts
    mem_usage = max(memory_usage((clf.fit, (X_train, y_train)), interval=1))  # Less frequent checks to reduce overhead
    train_time = time.perf_counter() - start_time  # Real time measurement ends
    
    start_time = time.perf_counter()  # Real time measurement starts
    y_pred = clf.predict(X_test)
    test_time = time.perf_counter() - start_time  # Real time measurement ends
    
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    results[name] = {
        "Accuracy": accuracy,
        "F1 Score": f1,
        "Training Time": train_time,
        "Testing Time": test_time,
        "Memory Usage": mem_usage
    }

In [13]:
# Display results
print("The results of  the WUSTL-SCADA dataset..." )
for classifier, metrics in results.items():
    print(f"{classifier}:")
    for metric, value in metrics.items():
        print(f"    {metric}: {value:.4f}")

The results of  the WUSTL-SCADA dataset...
Random Forest:
    Accuracy: 1.0000
    F1 Score: 1.0000
    Training Time: 2.7049
    Testing Time: 0.0418
    Memory Usage: 713.0938
KNN:
    Accuracy: 0.9997
    F1 Score: 0.9997
    Training Time: 0.8123
    Testing Time: 0.4177
    Memory Usage: 713.2656
SVM:
    Accuracy: 0.9887
    F1 Score: 0.9887
    Training Time: 6.3741
    Testing Time: 2.6045
    Memory Usage: 1039.4531
Decision Tree:
    Accuracy: 1.0000
    F1 Score: 1.0000
    Training Time: 0.6592
    Testing Time: 0.0010
    Memory Usage: 1027.8281
Logistic Regression:
    Accuracy: 0.9821
    F1 Score: 0.9821
    Training Time: 0.9978
    Testing Time: 0.0026
    Memory Usage: 1029.1719
