In [1]:
#ID3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
import time  


file_path = 'arpspoof4.0.csv'
data = pd.read_csv(file_path)


for col in data.columns:
    data[col] = data[col].astype('category').cat.codes


X = data.drop(columns=['state'])
y = data['state']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print(f'Training data size: {X_train.shape[0]}')


nb_classifier = GaussianNB()


start_time = time.time()


nb_classifier.fit(X_train, y_train)


end_time = time.time()


training_time = end_time - start_time
print(f'Training time: {training_time:.4f} seconds')


y_pred = nb_classifier.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')


f1_scores = f1_score(y_test, y_pred, average=None)
precisions = precision_score(y_test, y_pred, average=None)
recalls = recall_score(y_test, y_pred, average=None)

for i, (f1, precision, recall) in enumerate(zip(f1_scores, precisions, recalls)):
    print(f'Class {i} F1 score: {f1:.4f}')
    print(f'Class {i} Precision: {precision:.4f}')
    print(f'Class {i} Recall: {recall:.4f}')


Training data size: 7964
Training time: 0.0101 seconds
Accuracy: 0.9859
Class 0 F1 score: 0.8557
Class 0 Precision: 0.7477
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9926
Class 1 Precision: 1.0000
Class 1 Recall: 0.9853


In [2]:
#ANN
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.base import BaseEstimator, ClassifierMixin
from skopt import BayesSearchCV
import joblib
import time
import warnings


warnings.filterwarnings("ignore")


class ANNModel(nn.Module):
    def __init__(self, input_dim, hidden_units=64, activation='ReLU'):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_units)
        self.fc2 = nn.Linear(hidden_units, hidden_units)
        self.output = nn.Linear(hidden_units, 1)
        self.activation = getattr(nn, activation)()
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.output.weight)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = torch.sigmoid(self.output(x))
        return x


class PyTorchClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, hidden_units=64, activation='ReLU', learning_rate=0.001, epochs=50, batch_size=32):
        self.input_dim = input_dim
        self.hidden_units = hidden_units
        self.activation = activation
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = ANNModel(input_dim, hidden_units, activation)
        self.criterion = nn.BCELoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.classes_ = None

    def fit(self, X, y):
        self.classes_ = np.unique(y)
        X, y = torch.FloatTensor(X), torch.FloatTensor(y).unsqueeze(1)
        for epoch in range(self.epochs):
            self.model.train()
            permutation = torch.randperm(X.size()[0])
            for i in range(0, X.size()[0], self.batch_size):
                indices = permutation[i:i + self.batch_size]
                batch_x, batch_y = X[indices], y[indices]
                self.optimizer.zero_grad()
                outputs = self.model(batch_x)
                loss = self.criterion(outputs, batch_y)
                loss.backward()
                self.optimizer.step()
        return self

    def predict(self, X):
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
            return (outputs > 0.5).numpy().astype(int).ravel()

    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    y = np.where(y == 1, 1, 0)
    return X, y


def main():
    csv_file_path = 'arpspoof4.0.csv'
    test_size = 0.4
    random_state = 42

    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    param_dist = {
        'hidden_units': [32, 64],
        'activation': ['ReLU', 'Tanh'],
        'learning_rate': [0.001, 0.01],
        'epochs': [50, 100],
        'batch_size': [32]
    }

    bayes_search = BayesSearchCV(
        PyTorchClassifier(input_dim=X_train.shape[1]), 
        search_spaces=param_dist, 
        n_iter=10, 
        scoring='accuracy', 
        n_jobs=1, 
        cv=3, 
        random_state=random_state, 
        verbose=0
    )

    start_time = time.time()
    bayes_search.fit(X_train, y_train)
    end_time = time.time()

    training_time = end_time - start_time
    best_params = bayes_search.best_params_
    best_score = bayes_search.best_score_

    print(f"Training data size: {X_train.shape[0]}")
    print(f"Training time: {training_time:.4f} seconds")
    print(f"Best parameters found: {best_params}")
    print(f"Best cross-validation accuracy: {best_score:.4f}")

    best_model = PyTorchClassifier(input_dim=X_train.shape[1], **best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy on test set: {test_accuracy:.4f}")


    f1_scores = f1_score(y_test, y_pred, average=None)
    precisions = precision_score(y_test, y_pred, average=None)
    recalls = recall_score(y_test, y_pred, average=None)

    for i, (f1, precision, recall) in enumerate(zip(f1_scores, precisions, recalls)):
        print(f"Class {i} F1 score: {f1:.4f}")
        print(f"Class {i} Precision: {precision:.4f}")
        print(f"Class {i} Recall: {recall:.4f}")


    model_filename = 'ann_model.pkl'
    joblib.dump(best_model, model_filename)
    print(f"Model saved as {model_filename}")

if __name__ == "__main__":
    main()


Training data size: 5973
Training time: 487.1100 seconds
Best parameters found: OrderedDict([('activation', 'ReLU'), ('batch_size', 32), ('epochs', 97), ('hidden_units', 42), ('learning_rate', 0.0070313315344204125)])
Best cross-validation accuracy: 1.0000
Accuracy on test set: 0.9867
Class 0 F1 score: 0.8251
Class 0 Precision: 1.0000
Class 0 Recall: 0.7022
Class 1 F1 score: 0.9931
Class 1 Precision: 0.9863
Class 1 Recall: 1.0000
Model saved as ann_model.pkl


In [3]:
#KNN
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from skopt import BayesSearchCV
import joblib  
import time  
import warnings


warnings.filterwarnings("ignore")


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    # 将目标变量转换为0和1
    y = np.where(y == 1, 1, 0)
    return X, y



def main():
    csv_file_path = 'arpspoof4.0.csv'
    test_size = 0.4 
    random_state = 42 


    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'n_neighbors': [3, 5, 7, 9, 11],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
        'leaf_size': [20, 30, 40, 50],
        'p': [1, 2]
    }


    bayes_search = BayesSearchCV(
        KNeighborsClassifier(), 
        search_spaces=param_dist, 
        n_iter=10, 
        scoring='accuracy', 
        n_jobs=1, 
        cv=3, 
        random_state=random_state, 
        verbose=0
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_score = bayes_search.best_score_

    print(f"Training data size: {X_train.shape[0]}")
    print(f"Training time: {training_time:.4f} seconds")
    print(f"Best parameters found: {best_params}")
    print(f"Best cross-validation accuracy: {best_score:.4f}")


    best_model = KNeighborsClassifier(**best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy on test set: {test_accuracy:.4f}")


    f1_scores = f1_score(y_test, y_pred, average=None)
    precisions = precision_score(y_test, y_pred, average=None)
    recalls = recall_score(y_test, y_pred, average=None)

    for i, (f1, precision, recall) in enumerate(zip(f1_scores, precisions, recalls)):
        print(f"Class {i} F1 score: {f1:.4f}")
        print(f"Class {i} Precision: {precision:.4f}")
        print(f"Class {i} Recall: {recall:.4f}")


    model_filename = 'knn_model.pkl'
    joblib.dump(best_model, model_filename)
    print(f"Model saved as {model_filename}")

if __name__ == "__main__":
    main()



Training data size: 5973
Training time: 3.4380 seconds
Best parameters found: OrderedDict([('algorithm', 'ball_tree'), ('leaf_size', 50), ('n_neighbors', 3), ('p', 1), ('weights', 'uniform')])
Best cross-validation accuracy: 1.0000
Accuracy on test set: 1.0000
Class 0 F1 score: 1.0000
Class 0 Precision: 1.0000
Class 0 Recall: 1.0000
Class 1 F1 score: 1.0000
Class 1 Precision: 1.0000
Class 1 Recall: 1.0000
Model saved as knn_model.pkl


In [4]:
#random forest
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from skopt import BayesSearchCV
import joblib
import time


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    return data, X, y

def main():
    csv_file_path = 'arpspoof4.0.csv'  # CSV文件路径
    test_size = 0.4
    random_state = 42


    data, X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'n_estimators': [100, 200, 300],
        'max_features': ['sqrt', 'log2'],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'bootstrap': [True, False]
    }


    bayes_search = BayesSearchCV(
        RandomForestClassifier(random_state=random_state),
        search_spaces=param_dist,
        n_iter=10,
        scoring='accuracy',
        n_jobs=1,
        cv=3,
        random_state=random_state,
        verbose=0
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_model = RandomForestClassifier(random_state=random_state, **best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)


    print(f"Training data size: {len(X_train)}")
    print(f"Training time: {training_time:.4f} seconds")
    print(f"Accuracy: {accuracy:.4f}")


    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_precision = precision_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_recall = recall_score(y_test, y_pred, average=None, labels=[cls])[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")

    print(f"Best parameters: {best_params}")


    feature_importances = best_model.feature_importances_
    feature_names = data.columns[:-1]
    non_zero_importances = [(name, importance) for name, importance in zip(feature_names, feature_importances) if importance > 0]

    print("\nFeatures with non-zero importance:")
    for name, importance in non_zero_importances:
        print(f"{name}: {importance:.4f}")

if __name__ == "__main__":
    main()


Training data size: 5973
Training time: 9.6879 seconds
Accuracy: 0.9995
Class 0 F1 score: 0.9944
Class 0 Precision: 0.9889
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9997
Class 1 Precision: 1.0000
Class 1 Recall: 0.9995
Best parameters: OrderedDict([('bootstrap', True), ('max_depth', 20), ('max_features', 'log2'), ('min_samples_leaf', 1), ('min_samples_split', 10), ('n_estimators', 200)])

Features with non-zero importance:
start: 0.0006
end: 0.0005
startOffset: 0.0003
endOffset: 0.0008
duration: 0.0285
sPackets: 0.0050
rPackets: 0.0534
sBytesSum: 0.0474
rBytesSum: 0.0903
sBytesMax: 0.0067
rBytesMax: 0.0398
sBytesMin: 0.0404
rBytesMin: 0.0463
sBytesAvg: 0.0158
rBytesAvg: 0.0383
sLoad: 0.0355
rLoad: 0.0405
sPayloadSum: 0.0047
rPayloadSum: 0.0197
sPayloadMax: 0.0559
rPayloadMax: 0.0202
sPayloadMin: 0.0249
rPayloadMin: 0.0148
sPayloadAvg: 0.0553
rPayloadAvg: 0.0242
sInterPacketAvg: 0.0035
rInterPacketAvg: 0.0207
rttl: 0.0131
sPshRate: 0.0162
sWinTCP: 0.0166
rWinTCP: 0.0106
sAckDelayMax: 0

In [5]:
# GBDT
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
from sklearn.ensemble import GradientBoostingClassifier
from skopt import BayesSearchCV
import joblib
import time


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    return data, X, y



def main():
    csv_file_path = 'arpspoof4.0.csv'  # CSV文件路径
    test_size = 0.4
    random_state = 42


    data, X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'subsample': [0.8, 0.9, 1.0]
    }


    bayes_search = BayesSearchCV(
        GradientBoostingClassifier(random_state=random_state),
        search_spaces=param_dist,
        n_iter=10,
        scoring='accuracy',
        n_jobs=1,
        cv=3,
        random_state=random_state,
        verbose=0
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_model = GradientBoostingClassifier(random_state=random_state, **best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)


    print(f"Training data size: {len(X_train)}")
    print(f"Training time: {training_time:.4f} seconds")
    print(f"Accuracy: {accuracy:.4f}")


    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_precision = precision_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_recall = recall_score(y_test, y_pred, average=None, labels=[cls])[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")

    print(f"Best parameters: {best_params}")


    feature_importances = best_model.feature_importances_
    feature_names = data.columns[:-1]
    non_zero_importances = [(name, importance) for name, importance in zip(feature_names, feature_importances) if importance > 0]

    print("\nFeatures with non-zero importance:")
    for name, importance in non_zero_importances:
        print(f"{name}: {importance:.4f}")

if __name__ == "__main__":
    main()



Training data size: 5973
Training time: 50.1976 seconds
Accuracy: 0.9995
Class 0 F1 score: 0.9944
Class 0 Precision: 0.9889
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9997
Class 1 Precision: 1.0000
Class 1 Recall: 0.9995
Best parameters: OrderedDict([('learning_rate', 0.1), ('max_depth', 7), ('min_samples_leaf', 4), ('min_samples_split', 2), ('n_estimators', 300), ('subsample', 0.9)])

Features with non-zero importance:
start: 0.0004
end: 0.0003
startOffset: 0.0002
endOffset: 0.0003
duration: 0.0000
sPackets: 0.0000
rPackets: 0.0000
sBytesSum: 0.0000
rBytesSum: 0.0000
sBytesMax: 0.0000
rBytesMax: 0.0000
sBytesMin: 0.0001
rBytesMin: 0.0000
sBytesAvg: 0.0000
rBytesAvg: 0.0000
sLoad: 0.0003
rLoad: 0.0002
sPayloadSum: 0.0000
rPayloadSum: 0.0000
sPayloadMax: 0.0001
rPayloadMax: 0.0000
sPayloadMin: 0.0002
rPayloadMin: 0.0000
sPayloadAvg: 0.0002
rPayloadAvg: 0.0000
sInterPacketAvg: 0.0000
rInterPacketAvg: 0.0000
totalBytes: 0.9977


In [6]:
#xgboost
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from xgboost import XGBClassifier
from skopt import BayesSearchCV
import joblib
import time
import warnings


warnings.filterwarnings("ignore")


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    return X, y



def main():
    csv_file_path = 'arpspoof4.0.csv' 
    test_size = 0.4
    random_state = 42


    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'gamma': [0, 0.1, 0.2]
    }


    bayes_search = BayesSearchCV(
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
        search_spaces=param_dist,
        n_iter=10,
        scoring='accuracy',
        n_jobs=-1,
        cv=3,
        random_state=random_state,
        verbose=0
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time


    best_params = bayes_search.best_params_
    best_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', **best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)


    print(f"Training data size: {len(X_train)}")
    print(f"Training time: {training_time:.4f} seconds")
    print(f"Accuracy: {accuracy:.4f}")


    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_precision = precision_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_recall = recall_score(y_test, y_pred, average=None, labels=[cls])[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")

    print(f"Best parameters: {best_params}")


    feature_importances = best_model.feature_importances_
    non_zero_importances = [(i, importance) for i, importance in enumerate(feature_importances) if importance > 0]

    print("\nFeatures with non-zero importance:")
    for feature_idx, importance in non_zero_importances:
        print(f"Feature {feature_idx}: {importance:.4f}")


    model_filename = 'xgboost_model.pkl'
    joblib.dump(best_model, model_filename)
    print(f'Model saved as {model_filename}')

if __name__ == "__main__":
    main()



Training data size: 5973
Training time: 8.6290 seconds
Accuracy: 0.9995
Class 0 F1 score: 0.9944
Class 0 Precision: 0.9889
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9997
Class 1 Precision: 1.0000
Class 1 Recall: 0.9995
Best parameters: OrderedDict([('colsample_bytree', 0.8), ('gamma', 0.2), ('learning_rate', 0.2), ('max_depth', 3), ('n_estimators', 200), ('subsample', 0.8)])

Features with non-zero importance:
Feature 5: 0.0010
Feature 7: 0.0295
Feature 8: 0.2541
Feature 15: 0.0009
Feature 16: 0.0011
Feature 19: 0.0020
Feature 21: 0.0014
Feature 23: 0.0030
Feature 38: 0.7070
Model saved as xgboost_model.pkl


In [7]:
#lightgbm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from lightgbm import LGBMClassifier
from skopt import BayesSearchCV
import joblib
import time
import warnings


warnings.filterwarnings("ignore")


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    y = np.where(y == 1, 1, 0)
    return X, y


def main():
    csv_file_path = 'arpspoof4.0.csv' 
    test_size = 0.4 
    random_state = 42  


    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'num_leaves': [15, 31, 63],        
        'max_depth': [3, 5, -1],            
        'learning_rate': [0.01, 0.1, 0.2],  
        'n_estimators': [50, 100, 200],     
        'subsample': [0.6, 0.8, 1.0],       
        'colsample_bytree': [0.6, 0.8, 1.0] 
    }


    bayes_search = BayesSearchCV(
        LGBMClassifier(verbose=-1), 
        search_spaces=param_dist,
        n_iter=10,
        scoring='accuracy',
        n_jobs=-1,
        cv=3,
        random_state=random_state,
        verbose=0
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_score = bayes_search.best_score_

    print(f"Best parameters found: {best_params}")
    print(f"Best cross-validation accuracy: {best_score:.4f}")
    print(f"Training time: {training_time:.4f} seconds")


    best_model = LGBMClassifier(verbose=-1, **best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on test set: {test_accuracy:.4f}')


    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_precision = precision_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_recall = recall_score(y_test, y_pred, average=None, labels=[cls])[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")


    feature_importances = best_model.feature_importances_
    non_zero_importances = [(i, importance) for i, importance in enumerate(feature_importances) if importance > 0]

    print("\nFeatures with non-zero importance:")
    for feature_idx, importance in non_zero_importances:
        print(f"Feature {feature_idx}: {importance:.4f}")


    model_filename = 'lightgbm_model.pkl'
    joblib.dump(best_model, model_filename)
    print(f'Model saved as {model_filename}')

if __name__ == "__main__":
    main()



Best parameters found: OrderedDict([('colsample_bytree', 0.8), ('learning_rate', 0.2), ('max_depth', -1), ('n_estimators', 50), ('num_leaves', 63), ('subsample', 0.8)])
Best cross-validation accuracy: 0.9998
Training time: 13.1785 seconds
Accuracy on test set: 0.9995
Class 0 F1 score: 0.9944
Class 0 Precision: 0.9889
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9997
Class 1 Precision: 1.0000
Class 1 Recall: 0.9995

Features with non-zero importance:
Feature 0: 523.0000
Feature 1: 80.0000
Feature 2: 23.0000
Feature 4: 41.0000
Feature 5: 37.0000
Feature 6: 31.0000
Feature 7: 10.0000
Feature 8: 41.0000
Feature 9: 6.0000
Feature 10: 7.0000
Feature 11: 4.0000
Feature 13: 22.0000
Feature 14: 3.0000
Feature 15: 45.0000
Feature 16: 26.0000
Feature 17: 3.0000
Feature 18: 1.0000
Feature 19: 8.0000
Feature 21: 1.0000
Feature 23: 1.0000
Feature 25: 13.0000
Feature 26: 14.0000
Feature 28: 1.0000
Feature 29: 8.0000
Feature 30: 9.0000
Feature 31: 2.0000
Feature 34: 10.0000
Feature 36: 7.0000
Feature 37

In [8]:
#SVC
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
from sklearn.svm import SVC
from skopt import BayesSearchCV
import joblib  
import time 

def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    y = np.where(y == 1, 1, 0)
    return X, y




def main():
    csv_file_path = 'arpspoof4.0.csv' 
    test_size = 0.4   
    random_state = 42  


    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf', 'poly'],
        'gamma': ['scale', 'auto']
    }


    bayes_search = BayesSearchCV(
        SVC(), 
        search_spaces=param_dist, 
        n_iter=10, 
        scoring='accuracy', 
        n_jobs=1, 
        cv=3, 
        random_state=random_state, 
        verbose=0
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)
    

    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_score = bayes_search.best_score_

    print(f"Best parameters found: {best_params}")
    print(f"Best cross-validation accuracy: {best_score:.4f}")
    print(f"Training time: {training_time:.4f} seconds")  

  
    best_model = SVC(**best_params)
    best_model.fit(X_train, y_train)

 
    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on test set: {test_accuracy:.4f}')


    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_precision = precision_score(y_test, y_pred, average=None, labels=[cls])[0]
        cls_recall = recall_score(y_test, y_pred, average=None, labels=[cls])[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")

 
    model_filename = 'svc_model.pkl'
    joblib.dump(best_model, model_filename)
    print(f'Model saved as {model_filename}')

if __name__ == "__main__":
    main()



Best parameters found: OrderedDict([('C', 1), ('gamma', 'auto'), ('kernel', 'rbf')])
Best cross-validation accuracy: 1.0000
Training time: 1.1885 seconds
Accuracy on test set: 1.0000
Class 0 F1 score: 1.0000
Class 0 Precision: 1.0000
Class 0 Recall: 1.0000
Class 1 F1 score: 1.0000
Class 1 Precision: 1.0000
Class 1 Recall: 1.0000
Model saved as svc_model.pkl


In [9]:
#adaboost
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import AdaBoostClassifier
from skopt import BayesSearchCV
import joblib
import time
import warnings


warnings.filterwarnings("ignore")


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    y = np.where(y == 1, 1, 0)
    return X, y


def main():
    csv_file_path = 'arpspoof4.0.csv'  
    test_size = 0.4  
    random_state = 42 


    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 1]
    }


    bayes_search = BayesSearchCV(
        AdaBoostClassifier(), 
        search_spaces=param_dist, 
        n_iter=10, 
        scoring='accuracy', 
        n_jobs=1, 
        cv=3, 
        random_state=random_state, 
        verbose=0  
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_score = bayes_search.best_score_

    print(f"Best parameters found: {best_params}")
    print(f"Best cross-validation accuracy: {best_score:.4f}")
    print(f"Training time: {training_time:.4f} seconds")


    best_model = AdaBoostClassifier(**best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on test set: {test_accuracy:.4f}')

 
    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, labels=[cls], average=None)[0]
        cls_precision = precision_score(y_test, y_pred, labels=[cls], average=None)[0]
        cls_recall = recall_score(y_test, y_pred, labels=[cls], average=None)[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")


    model_filename = 'adaboost_model.pkl'
    joblib.dump(best_model, model_filename)
    print(f'Model saved as {model_filename}')

if __name__ == "__main__":
    main()



Best parameters found: OrderedDict([('learning_rate', 0.1), ('n_estimators', 200)])
Best cross-validation accuracy: 0.9998
Training time: 28.8580 seconds
Accuracy on test set: 0.9995
Class 0 F1 score: 0.9944
Class 0 Precision: 0.9889
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9997
Class 1 Precision: 1.0000
Class 1 Recall: 0.9995
Model saved as adaboost_model.pkl


In [10]:
#elm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from skopt import BayesSearchCV
import h5py
import time
import warnings
import hpelm
import os
import sys


warnings.filterwarnings("ignore")


class SuppressOutput:
    def __enter__(self):
        self._stdout = sys.stdout
        self._stderr = sys.stderr
        sys.stdout = open(os.devnull, 'w')
        sys.stderr = open(os.devnull, 'w')
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stderr.close()
        sys.stdout = self._stdout
        sys.stderr = self._stderr


def load_data(csv_file_path):
    data = pd.read_csv(csv_file_path)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    y = np.where(y == 1, 1, 0)
    return X, y


class ELMClassifier:
    def __init__(self, input_dim, hidden_units=100, activation='sigm'):
        self.input_dim = input_dim
        self.hidden_units = hidden_units
        self.activation = activation
        with SuppressOutput():
            self.model = hpelm.ELM(input_dim, 2)  
            self.model.add_neurons(hidden_units, activation)

    def fit(self, X, y):
        y_one_hot = np.zeros((y.size, y.max() + 1))
        y_one_hot[np.arange(y.size), y] = 1
        with SuppressOutput():
            self.model.train(X, y_one_hot, 'c')

    def predict(self, X):
        with SuppressOutput():
            pred = self.model.predict(X)
        return np.argmax(pred, axis=1)

    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

    def get_params(self, deep=True):
        return {"input_dim": self.input_dim, "hidden_units": self.hidden_units, "activation": self.activation}

    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        with SuppressOutput():
            self.model = hpelm.ELM(self.input_dim, 2)  
            self.model.add_neurons(self.hidden_units, self.activation)
        return self


def main():
    csv_file_path = 'arpspoof4.0.csv'  
    test_size = 0.4  
    random_state = 42 


    X, y = load_data(csv_file_path)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=True)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


    param_dist = {
        'hidden_units': [50, 100, 200],
        'activation': ['sigm', 'tanh']
    }


    bayes_search = BayesSearchCV(
        ELMClassifier(input_dim=X_train.shape[1]), 
        search_spaces=param_dist, 
        n_iter=10, 
        scoring='accuracy', 
        n_jobs=1, 
        cv=3, 
        random_state=random_state, 
        verbose=0 
    )


    start_time = time.time()


    bayes_search.fit(X_train, y_train)


    end_time = time.time()


    training_time = end_time - start_time

    best_params = bayes_search.best_params_
    best_score = bayes_search.best_score_

    print(f"Best parameters found: {best_params}")
    print(f"Best cross-validation accuracy: {best_score:.4f}")
    print(f"Training time: {training_time:.4f} seconds")


    best_model = ELMClassifier(input_dim=X_train.shape[1], **best_params)
    best_model.fit(X_train, y_train)


    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on test set: {test_accuracy:.4f}')


    unique_classes = np.unique(y_test)
    for cls in unique_classes:
        cls_f1 = f1_score(y_test, y_pred, labels=[cls], average=None)[0]
        cls_precision = precision_score(y_test, y_pred, labels=[cls], average=None)[0]
        cls_recall = recall_score(y_test, y_pred, labels=[cls], average=None)[0]
        print(f"Class {cls} F1 score: {cls_f1:.4f}")
        print(f"Class {cls} Precision: {cls_precision:.4f}")
        print(f"Class {cls} Recall: {cls_recall:.4f}")


    model_filename = 'elm_model.h5'
    with h5py.File(model_filename, 'w') as h5f:
        h5f.create_dataset('scaler_mean', data=scaler.mean_)
        h5f.create_dataset('scaler_scale', data=scaler.scale_)
        h5f.attrs['best_params'] = str(best_params)
    print(f'Model saved as {model_filename}')

if __name__ == "__main__":
    main()

Best parameters found: OrderedDict([('activation', 'tanh'), ('hidden_units', 200)])
Best cross-validation accuracy: 0.9993
Training time: 1.6369 seconds
Accuracy on test set: 0.9992
Class 0 F1 score: 0.9916
Class 0 Precision: 0.9834
Class 0 Recall: 1.0000
Class 1 F1 score: 0.9996
Class 1 Precision: 1.0000
Class 1 Recall: 0.9992
Model saved as elm_model.h5
