In [94]:
import pandas as pd
from sklearn.model_selection import train_test_split
from utils.data_preprocessing import load_data, preprocess_data
import os
import numpy as np

save_data = "save_data_2018/"
if not os.path.exists(save_data):
    os.mkdir(save_data)

cicids2018_path_v1 = 'data/Thursday-22-02-2018_TrafficForML_CICFlowMeter.csv'
cicids2018_path_v2 = 'data/Friday-23-02-2018_TrafficForML_CICFlowMeter.csv'
merge_data = object
if(not os.path.isfile(save_data + "merge.csv")):
    # Optionally, remove duplicate rows
    data_2018_v1 = load_data(cicids2018_path_v1)
    data_2018_v2 = load_data(cicids2018_path_v2)

    print("Headers match. Proceeding with merge.")
    merged_df = pd.concat([data_2018_v1, data_2018_v2], axis=0, ignore_index=True)
    merged_df = merged_df.drop_duplicates()
    merged_df.to_csv(save_data + "merge.csv", index=False)
    
    merge_data = load_data(save_data + "merge.csv")
    
if(not os.path.isfile(save_data + "web_attacks.csv")):
    preprocess_data(merge_data, save_data)

In [95]:
df = pd.read_csv(save_data + 'web_attacks.csv')
df['Label'] = df['Label'].apply(lambda x: 0 if x == 'Benign' else 1)


normal_df = df[df['Label'] == 0]
attack_df = df[df['Label'] != 0]

num_attack = len(attack_df)

num_normal = 2 * num_attack

In [96]:
if num_normal > len(normal_df):
    raise ValueError("num_normal exceeds the number of rows in normal_df.")
normal_df = normal_df.sample(n=num_normal, random_state=42)

# Ensure 'Label' column exists
if 'Label' not in normal_df.columns or 'Label' not in attack_df.columns:
    raise ValueError("Both DataFrames must have a 'Label' column.")

# Combine and shuffle the dataset
balanced_df = pd.concat([normal_df, attack_df]).sample(frac=1, random_state=42).reset_index(drop=True)

# Split the balanced dataset
if 'Label' not in balanced_df.columns:
    raise ValueError("The 'Label' column is missing from the balanced DataFrame.")


train_df, temp_df = train_test_split(balanced_df, test_size=0.4, random_state=42, stratify=balanced_df['Label'])
val_df, test_df = train_test_split(temp_df, test_size=0.7, random_state=42, stratify=temp_df['Label'])

# Ensure enough data for balancing
train_normal_df = train_df[train_df['Label'] == 0]
train_attack_df = train_df[train_df['Label'] == 1]

if len(train_attack_df) == 0:
    raise ValueError("There are no attack instances in the training set.")

num_train_attack = len(train_attack_df)
num_train_normal = 2 * num_train_attack

# Ensure we don't try to sample more rows than are available
if len(train_normal_df) < num_train_normal:
    num_train_normal = len(train_normal_df)  # Adjust to available number of rows

train_normal_df = train_normal_df.sample(n=num_train_normal, random_state=42)
train_df = pd.concat([train_normal_df, train_attack_df]).sample(frac=1, random_state=42).reset_index(drop=True)

In [97]:
# Print the count of each class in the training, validation, and testing sets
for name, df in zip(["Training", "Validation", "Testing"], [train_df, val_df, test_df]):
    unique, counts = np.unique(df['Label'], return_counts=True)
    print(f"{name} set class distribution:", dict(zip(unique, counts)))

Training set class distribution: {0: 1113, 1: 557}
Validation set class distribution: {0: 223, 1: 111}
Testing set class distribution: {0: 520, 1: 260}


In [98]:
# Save the train, validation, and test sets to CSV files
train_df.to_csv(save_data +  'train_set.csv', index=False)
val_df.to_csv(save_data +  'val_set.csv', index=False)
test_df.to_csv(save_data + 'test_set.csv', index=False)

In [99]:
# Define feature columns (excluding the label column)
feature_cols = [col for col in df.columns if col != 'Label']

# Extract features and labels for training, validation, and testing sets
X_train = train_df[feature_cols]
y_train = train_df['Label']
X_val = val_df[feature_cols]
y_val = val_df['Label']
X_test = test_df[feature_cols]
y_test = test_df['Label']

In [100]:
# Define classifiers
from models.decision_tree import train_decision_tree
from models.random_forest import train_random_forest
from models.lda import train_lda
from models.naive_bayes import train_naive_bayes
from models.logistic_regression import train_logistic_regression
from models.knn import train_knn
from models.svm import train_svm
from models.extra_trees import train_extra_trees
from models.bagging import train_bagging
from models.mlp import train_mlp

classifiers = {
    'Decision Tree': train_decision_tree,
    'Random Forest': train_random_forest,
    'Linear Discriminant Analysis': train_lda,
    'Naive Bayes': train_naive_bayes,
    'Logistic Regression': train_logistic_regression,
    'K-Nearest Neighbors': train_knn,
    'Support Vector Machine': train_svm,
    'Extra Trees Classifier': train_extra_trees,
    'Bagging Classifier': train_bagging,
    'Multi-layer Perceptron': train_mlp
}

In [101]:
def model_10Classifier():
    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train, y_train, X_val, y_val, X_test, y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            "Test Accuracy": test_acc,
            "Test Precision": test_pre,
            "Test Recall": test_rec,
            "Test F1 Score": test_f1,
        })

    # Save results to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(save_data + '10Classsifer_results_2018.csv', index=False)
    
model_10Classifier()

Decision Tree Validation - Accuracy: 0.9221556886227545 Precision: 0.897196261682243 Recall: 0.8648648648648649 F1 Score: 0.8807339449541285
Random Forest Validation - Accuracy: 0.9341317365269461 Precision: 0.9238095238095239 Recall: 0.8738738738738738 F1 Score: 0.8981481481481481
LDA Validation - Accuracy: 0.9221556886227545 Precision: 0.8346456692913385 Recall: 0.954954954954955 F1 Score: 0.8907563025210085
Naive Bayes Validation - Accuracy: 0.6976047904191617 Precision: 0.5242718446601942 Recall: 0.972972972972973 F1 Score: 0.6813880126182965


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.8173652694610778 Precision: 0.8048780487804879 Recall: 0.5945945945945946 F1 Score: 0.6839378238341969
KNN Validation - Accuracy: 0.9341317365269461 Precision: 0.9238095238095239 Recall: 0.8738738738738738 F1 Score: 0.8981481481481481
SVM Validation - Accuracy: 0.7934131736526946 Precision: 0.9772727272727273 Recall: 0.38738738738738737 F1 Score: 0.5548387096774193
Extra Trees Validation - Accuracy: 0.9341317365269461 Precision: 0.9238095238095239 Recall: 0.8738738738738738 F1 Score: 0.8981481481481481
MLP Validation - Accuracy: 0.8023952095808383 Precision: 0.8169014084507042 Recall: 0.5225225225225225 F1 Score: 0.6373626373626373


In [102]:
import pandas as pd
from sklearn.decomposition import PCA

# Assume the classifiers dictionary and train functions are already defined
# Define a function to apply PCA and train classifiers

def train_with_pca(X_train, y_train, X_val, y_val, X_test, y_test, n_components=20):
    X_combined = np.concatenate((X_train, X_val, X_test), axis=0)
    y_combined = np.concatenate((y_train, y_val, y_test), axis=0)

    pca = PCA(n_components=n_components)
    X_combined_pca = pca.fit_transform(X_combined)

    # Bước 3: Tách lại dữ liệu thành các tập train, validation và test
    X_train_pca = X_combined_pca[:len(X_train)]
    X_val_pca = X_combined_pca[len(X_train):len(X_train)+len(X_val)]
    X_test_pca = X_combined_pca[len(X_train)+len(X_val):]

    # Kiểm tra kích thước của các tập dữ liệu sau khi PCA
    print("X_train_pca shape:", X_train_pca.shape)
    print("X_val_pca shape:", X_val_pca.shape)
    print("X_test_pca shape:", X_test_pca.shape)
    
    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train_pca, y_train, X_val_pca, y_val, X_test_pca, y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            'Test Accuracy': test_acc,
            'Test Precision': test_pre,
            'Test Recall': test_rec,
            'Test F1 Score': test_f1
        })
    
    return results

# Call the function and save results
pca_results = train_with_pca(X_train, y_train, X_val, y_val, X_test, y_test)
pca_results_df = pd.DataFrame(pca_results)
pca_results_df.to_csv(save_data + 'pca_model_results_2018.csv', index=False)


X_train_pca shape: (1670, 20)
X_val_pca shape: (334, 20)
X_test_pca shape: (780, 20)
Decision Tree Validation - Accuracy: 0.9251497005988024 Precision: 0.9134615384615384 Recall: 0.8558558558558559 F1 Score: 0.8837209302325582
Random Forest Validation - Accuracy: 0.9221556886227545 Precision: 0.9207920792079208 Recall: 0.8378378378378378 F1 Score: 0.8773584905660378
LDA Validation - Accuracy: 0.7844311377245509 Precision: 0.8823529411764706 Recall: 0.40540540540540543 F1 Score: 0.5555555555555556
Naive Bayes Validation - Accuracy: 0.39820359281437123 Precision: 0.3392857142857143 Recall: 0.8558558558558559 F1 Score: 0.4859335038363171


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.811377245508982 Precision: 0.875 Recall: 0.5045045045045045 F1 Score: 0.64
KNN Validation - Accuracy: 0.9281437125748503 Precision: 0.9223300970873787 Recall: 0.8558558558558559 F1 Score: 0.8878504672897196
SVM Validation - Accuracy: 0.7934131736526946 Precision: 0.9772727272727273 Recall: 0.38738738738738737 F1 Score: 0.5548387096774193
Extra Trees Validation - Accuracy: 0.9221556886227545 Precision: 0.912621359223301 Recall: 0.8468468468468469 F1 Score: 0.8785046728971962
MLP Validation - Accuracy: 0.8562874251497006 Precision: 0.92 Recall: 0.6216216216216216 F1 Score: 0.7419354838709677


In [103]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Định nghĩa Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        encoding_dim1 = 50
        encoding_dim2 = 30
        encoding_dim3 = int(np.sqrt(input_dim)) + 1

        # Lớp mã hóa
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim3),
            nn.ReLU()
        )
        # Lớp giải mã
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim3, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Hàm để áp dụng deep autoencoder và train classifiers
def train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test, epochs=50, batch_size=256, alpha1=0.1):
    input_dim = X_train.shape[1]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Khởi tạo mô hình và các thành phần cần thiết
    autoencoder = Autoencoder(input_dim).to(device)
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    # Chuyển đổi dữ liệu DataFrame thành NumPy array và tensor
    X_train_np = X_train.to_numpy()
    X_val_np = X_val.to_numpy()
    X_test_np = X_test.to_numpy()
    y_train_np = y_train.to_numpy()
    y_val_np = y_val.to_numpy()
    y_test_np = y_test.to_numpy()
    
    # Tạo TensorDataset cho training và validation
    train_dataset = TensorDataset(
        torch.tensor(X_train_np, dtype=torch.float32),
        torch.tensor(X_train_np, dtype=torch.float32)  # Sử dụng dữ liệu đầu vào làm mục tiêu cho autoencoder
    )
    val_dataset = TensorDataset(
        torch.tensor(X_val_np, dtype=torch.float32),
        torch.tensor(X_val_np, dtype=torch.float32)  # Sử dụng dữ liệu đầu vào làm mục tiêu cho autoencoder
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Training loop
    for epoch in range(epochs):
        autoencoder.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            encoded, outputs = autoencoder(inputs)
            loss = alpha1 * criterion(outputs, targets)  # Chỉ sử dụng MSE loss
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        print(f'Epoch [{epoch+1}/{epochs}], Loss: {train_loss/len(train_loader)}')

    # Encode dữ liệu
    autoencoder.eval()
    with torch.no_grad():
        X_train_enc, _ = autoencoder(torch.tensor(X_train_np, dtype=torch.float32).to(device))
        X_val_enc, _ = autoencoder(torch.tensor(X_val_np, dtype=torch.float32).to(device))
        X_test_enc, _ = autoencoder(torch.tensor(X_test_np, dtype=torch.float32).to(device))
    
    # Train và đánh giá classifiers
    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train_enc.cpu().numpy(), y_train, X_val_enc.cpu().numpy(), y_val, X_test_enc.cpu().numpy(), y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            'Test Accuracy': test_acc,
            'Test Precision': test_pre,
            'Test Recall': test_rec,
            'Test F1 Score': test_f1
        })
    
    return results

# Gọi hàm và lưu kết quả
autoencoder_results = train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test)
autoencoder_results_df = pd.DataFrame(autoencoder_results)
autoencoder_results_df.to_csv(save_data + 'autoencoder_model_2018.csv', index=False)


Epoch [1/50], Loss: 12694433450276.572
Epoch [2/50], Loss: 12613622544091.428
Epoch [3/50], Loss: 12925182186057.143
Epoch [4/50], Loss: 12655744740790.857
Epoch [5/50], Loss: 12640733027181.715
Epoch [6/50], Loss: 12891971537188.572
Epoch [7/50], Loss: 12781907008365.715
Epoch [8/50], Loss: 13040646581686.857
Epoch [9/50], Loss: 12951806729069.715
Epoch [10/50], Loss: 12881136151990.857
Epoch [11/50], Loss: 12806914796982.857
Epoch [12/50], Loss: 12786267736356.572
Epoch [13/50], Loss: 12808031830016.0
Epoch [14/50], Loss: 12608186126921.143
Epoch [15/50], Loss: 12913835095771.428
Epoch [16/50], Loss: 12853196994267.428
Epoch [17/50], Loss: 12620422259858.285
Epoch [18/50], Loss: 12605264045202.285
Epoch [19/50], Loss: 12673681532050.285
Epoch [20/50], Loss: 13015050841526.857
Epoch [21/50], Loss: 12831344071241.143
Epoch [22/50], Loss: 13034930793910.857
Epoch [23/50], Loss: 12508847706697.143
Epoch [24/50], Loss: 12692759623387.428
Epoch [25/50], Loss: 12693545306404.572
Epoch [26/5

In [105]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import random

# Định nghĩa Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        encoding_dim1 = 50
        encoding_dim2 = 30
        encoding_dim3 = int(np.sqrt(input_dim)) + 1

        # Lớp mã hóa
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim3),
            nn.ReLU()  # Đổi từ sigmoid thành ReLU để giữ giá trị dương
        )
        # Lớp giải mã
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim3, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, input_dim),
            nn.Sigmoid()  # Giữ sigmoid cho đầu ra
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded, encoded

class BinaryMagnetLoss(nn.Module):
    def __init__(self, D=100, M=2, alpha=7.18, epsilon=1e-6):
        super(BinaryMagnetLoss, self).__init__()
        self.D = D
        self.M = M
        self.alpha = alpha
        self.epsilon = epsilon

    def forward(self, outputs, indices, assignment):
        outputs = outputs.float()
        unique_clusters = torch.unique(assignment[indices])
        batch_clusters = {cluster.item(): (assignment[indices] == cluster).nonzero(as_tuple=True)[0] for cluster in unique_clusters}

        num_instances = sum(len(batch_clusters[cluster.item()]) for cluster in unique_clusters)
        c_means = torch.stack([outputs[batch_clusters[cluster.item()]].mean(dim=0) for cluster in unique_clusters])
        
        stdev = sum((outputs[i] - c_means[unique_clusters.tolist().index(assignment[indices[i]])]).norm(p=2).pow(2) 
                    for i in range(len(indices))).item() / (num_instances - 1)
        stdev = -2.0 * (stdev + self.epsilon)

        loss = torch.zeros(1).to(outputs.device)
        denom = [torch.zeros(1).to(outputs.device) for _ in range(len(indices))]

        for m, cluster in enumerate(unique_clusters):
            cluster_indices = batch_clusters[cluster.item()]
            for d in range(min(self.D, len(cluster_indices))):
                ind = cluster_indices[d].item()
                for other_cluster in unique_clusters:
                    if other_cluster != cluster:
                        denom[ind] += ((outputs[ind] - c_means[unique_clusters.tolist().index(other_cluster)]).norm().pow(2) / stdev).exp()

                loss_term = (((outputs[ind] - c_means[m]).norm().pow(2) / stdev - self.alpha).exp() / (denom[ind] + self.epsilon)).log().clamp(max=0.0)
                if torch.isnan(loss_term) or torch.isinf(loss_term):
                    continue
                loss -= loss_term

        loss /= num_instances
        return loss


# Custom Loss Function combining Cross-Entropy and Binary Magnet Loss
def custom_loss(y_true, y_pred, embeddings, alpha1=1.0, alpha2=2.0):
    ce_loss = F.cross_entropy(y_pred, y_true.long())  # Tính toán Cross-Entropy Loss
    magnet_loss_fn = BinaryMagnetLoss()
    magnet_loss_value = magnet_loss_fn(embeddings, torch.arange(len(y_true)), y_true)
    return alpha1 * ce_loss + alpha2 * magnet_loss_value

# Hàm để áp dụng deep autoencoder và train classifiers
def train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test, classifiers, save_data, epochs=10, batch_size=256, alpha1=1.0, alpha2=2.0):
    input_dim = X_train.shape[1]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    autoencoder = Autoencoder(input_dim).to(device)
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

    X_train_np = X_train.to_numpy()
    X_val_np = X_val.to_numpy()
    y_train_np = y_train.to_numpy()
    y_val_np = y_val.to_numpy()

    train_dataset = TensorDataset(
        torch.tensor(X_train_np, dtype=torch.float32),
        torch.tensor(y_train_np, dtype=torch.float32)
    )
    val_dataset = TensorDataset(
        torch.tensor(X_val_np, dtype=torch.float32),
        torch.tensor(y_val_np, dtype=torch.float32)
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        autoencoder.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs, encoded = autoencoder(inputs)
            loss = custom_loss(labels, outputs, encoded, alpha1=alpha1, alpha2=alpha2)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        print(f'Epoch [{epoch+1}/{epochs}], Loss: {train_loss/len(train_loader)}')

    autoencoder.eval()
    with torch.no_grad():
        X_train_enc = autoencoder.encoder(torch.tensor(X_train_np, dtype=torch.float32).to(device)).cpu().numpy()
        X_val_enc = autoencoder.encoder(torch.tensor(X_val_np, dtype=torch.float32).to(device)).cpu().numpy()
        X_test_enc = autoencoder.encoder(torch.tensor(X_test.to_numpy(), dtype=torch.float32).to(device)).cpu().numpy()

    results = []
    try:
        for name, train_function in classifiers.items():
            model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train_enc, y_train, X_val_enc, y_val, X_test_enc, y_test, save_data)
            results.append({
                'Model': name,
                'Validation Accuracy': val_acc,
                'Validation Precision': val_pre,
                'Validation Recall': val_rec,
                'Validation F1 Score': val_f1,
                'Test Accuracy': test_acc,
                'Test Precision': test_pre,
                'Test Recall': test_rec,
                'Test F1 Score': test_f1,
                'epochs': epochs,
                'alpha1': alpha1,
                'alpha2': alpha2
            })
    except:
        print("Error")

    return results

def generate_random_choice():
    num1 = random.randint(0, 10)  # Sinh số ngẫu nhiên từ 0 đến 3
    num2 = random.randint(0, 10)  # Sinh số ngẫu nhiên từ 0 đến 3
    return num1, num2

def generate_random_choice1():
    values = [i/100 for i in range(1, 11)]

    # Chọn một số ngẫu nhiên từ danh sách các giá trị
    num1 = random.choice(values)
    num2 = random.choice(values)

    return num1, num2

def generate_random_choice2():
    values = [i/10 for i in range(1, 11)]

    # Chọn một số ngẫu nhiên từ danh sách các giá trị
    num1 = random.choice(values)
    num2 = random.choice(values)

    return num1, num2

def generate_random_choice3():
    values1 = [i/10 for i in range(1, 11)]
    values2 = [i/100 for i in range(1, 11)]

    # Chọn một số ngẫu nhiên từ danh sách các giá trị
    num1 = random.choice(values1)
    num2 = random.choice(values2)

    return num1, num2

def generate_random_choice4():
    values1 = [i/100 for i in range(1, 11)]
    values2 = [i/10 for i in range(1, 11)]

    # Chọn một số ngẫu nhiên từ danh sách các giá trị
    num1 = random.choice(values1)
    num2 = random.choice(values2)

    return num1, num2

maxx  = 50
for rd in range(1, maxx):
    if rd < maxx/5:
        alpha1, alpha2 = generate_random_choice()
    elif rd < 2*maxx/5:
        alpha1, alpha2 = generate_random_choice1()
    elif rd < 3*maxx/5:
        alpha1, alpha2 = generate_random_choice2()
    elif rd < 4*maxx/5:
        alpha1, alpha2 = generate_random_choice3()
    else:
        alpha1, alpha2 = generate_random_choice4()

    # Gọi hàm và lưu kết quả
    autoencoder_results = train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test, classifiers, save_data, epochs=10, alpha1=alpha1, alpha2=alpha2)
    autoencoder_results_df = pd.DataFrame(autoencoder_results)

    csv_file = save_data + 'autoencoder_Binary_Magnet_Loss_results_2018_new.csv'

    with open(csv_file, mode='a+') as file:
        file.write('\n')  # Thêm một dòng trống
    autoencoder_results_df.to_csv(csv_file, mode='a+', header=True, index=False)


Epoch [1/10], Loss: 48.92115947178432
Epoch [2/10], Loss: 46.027458735874724
Epoch [3/10], Loss: 43.55541065761021
Epoch [4/10], Loss: 43.496831621442524
Epoch [5/10], Loss: 42.61217171805246
Epoch [6/10], Loss: 42.17488970075335
Epoch [7/10], Loss: 41.94122096470424
Epoch [8/10], Loss: 41.68976865495954
Epoch [9/10], Loss: 41.53050722394671
Epoch [10/10], Loss: 41.30454853602818
Decision Tree Validation - Accuracy: 0.907185628742515 Precision: 0.8846153846153846 Recall: 0.8288288288288288 F1 Score: 0.8558139534883721
Random Forest Validation - Accuracy: 0.9161676646706587 Precision: 0.9191919191919192 Recall: 0.8198198198198198 F1 Score: 0.8666666666666667
LDA Validation - Accuracy: 0.7844311377245509 Precision: 0.8679245283018868 Recall: 0.4144144144144144 F1 Score: 0.5609756097560976
Naive Bayes Validation - Accuracy: 0.4550898203592814 Precision: 0.37630662020905925 Recall: 0.972972972972973 F1 Score: 0.542713567839196
Logistic Regression Validation - Accuracy: 0.7934131736526946 P

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.7904191616766467 Precision: 0.9361702127659575 Recall: 0.3963963963963964 F1 Score: 0.5569620253164557
KNN Validation - Accuracy: 0.9341317365269461 Precision: 0.9238095238095239 Recall: 0.8738738738738738 F1 Score: 0.8981481481481481
SVM Validation - Accuracy: 0.7754491017964071 Precision: 0.86 Recall: 0.38738738738738737 F1 Score: 0.5341614906832298
Extra Trees Validation - Accuracy: 0.9281437125748503 Precision: 0.9223300970873787 Recall: 0.8558558558558559 F1 Score: 0.8878504672897196
MLP Validation - Accuracy: 0.8532934131736527 Precision: 0.7540983606557377 Recall: 0.8288288288288288 F1 Score: 0.7896995708154506
Epoch [1/10], Loss: 0.6884553347315107
Epoch [2/10], Loss: 0.6606410912105015
Epoch [3/10], Loss: 0.6395048754555839
Epoch [4/10], Loss: 0.6243484701429095
Epoch [5/10], Loss: 0.6201990757669721
Epoch [6/10], Loss: 0.6098505088261196
Epoch [7/10], Loss: 0.607458199773516
Epoch [8/10], Loss: 0.6031806724412101
Epoch [9/10], Loss

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.8023952095808383 Precision: 0.8571428571428571 Recall: 0.4864864864864865 F1 Score: 0.6206896551724138
KNN Validation - Accuracy: 0.9221556886227545 Precision: 0.9047619047619048 Recall: 0.8558558558558559 F1 Score: 0.8796296296296297
SVM Validation - Accuracy: 0.7904191616766467 Precision: 0.9019607843137255 Recall: 0.4144144144144144 F1 Score: 0.5679012345679012
Extra Trees Validation - Accuracy: 0.9161676646706587 Precision: 0.9029126213592233 Recall: 0.8378378378378378 F1 Score: 0.8691588785046729
MLP Validation - Accuracy: 0.8383233532934131 Precision: 0.7878787878787878 Recall: 0.7027027027027027 F1 Score: 0.7428571428571429
Epoch [1/10], Loss: 0.5114499500819615
Epoch [2/10], Loss: 0.5043963023594448
Epoch [3/10], Loss: 0.49247020908764433
Epoch [4/10], Loss: 0.4957325117928641
Epoch [5/10], Loss: 0.4853495103972299
Epoch [6/10], Loss: 0.4908943942614964
Epoch [7/10], Loss: 0.4871943976197924
Epoch [8/10], Loss: 0.4899751543998718
Epo

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.7574850299401198 Precision: 0.6923076923076923 Recall: 0.4864864864864865 F1 Score: 0.5714285714285714
KNN Validation - Accuracy: 0.9311377245508982 Precision: 0.9313725490196079 Recall: 0.8558558558558559 F1 Score: 0.892018779342723
SVM Validation - Accuracy: 0.7844311377245509 Precision: 0.9148936170212766 Recall: 0.38738738738738737 F1 Score: 0.5443037974683544
Extra Trees Validation - Accuracy: 0.9281437125748503 Precision: 0.9223300970873787 Recall: 0.8558558558558559 F1 Score: 0.8878504672897196
MLP Validation - Accuracy: 0.7634730538922155 Precision: 0.7051282051282052 Recall: 0.4954954954954955 F1 Score: 0.582010582010582
Epoch [1/10], Loss: 8.109719276428223
Epoch [2/10], Loss: 7.974916321890695
Epoch [3/10], Loss: 7.846714224134173
Epoch [4/10], Loss: 7.747748783656529
Epoch [5/10], Loss: 7.571279866354806
Epoch [6/10], Loss: 7.487406935010638
Epoch [7/10], Loss: 7.3957904406956265
Epoch [8/10], Loss: 7.334943498883929
Epoch [9/10]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.7934131736526946 Precision: 0.9038461538461539 Recall: 0.42342342342342343 F1 Score: 0.5766871165644172
KNN Validation - Accuracy: 0.937125748502994 Precision: 0.9326923076923077 Recall: 0.8738738738738738 F1 Score: 0.9023255813953488
SVM Validation - Accuracy: 0.7754491017964071 Precision: 0.8461538461538461 Recall: 0.3963963963963964 F1 Score: 0.5398773006134969
Extra Trees Validation - Accuracy: 0.9281437125748503 Precision: 0.9223300970873787 Recall: 0.8558558558558559 F1 Score: 0.8878504672897196
MLP Validation - Accuracy: 0.592814371257485 Precision: 0.41134751773049644 Recall: 0.5225225225225225 F1 Score: 0.4603174603174603
Epoch [1/10], Loss: 1.7088645185743059
Epoch [2/10], Loss: 1.6696676867348808
Epoch [3/10], Loss: 1.631928256579808
Epoch [4/10], Loss: 1.611127495765686
Epoch [5/10], Loss: 1.5995254516601562
Epoch [6/10], Loss: 1.57534088407244
Epoch [7/10], Loss: 1.556468163217817
Epoch [8/10], Loss: 1.5432124819074358
Epoch [9/

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.7934131736526946 Precision: 0.9772727272727273 Recall: 0.38738738738738737 F1 Score: 0.5548387096774193
KNN Validation - Accuracy: 0.9161676646706587 Precision: 0.8952380952380953 Recall: 0.8468468468468469 F1 Score: 0.8703703703703703
SVM Validation - Accuracy: 0.7844311377245509 Precision: 0.9148936170212766 Recall: 0.38738738738738737 F1 Score: 0.5443037974683544
Extra Trees Validation - Accuracy: 0.9221556886227545 Precision: 0.9207920792079208 Recall: 0.8378378378378378 F1 Score: 0.8773584905660378
MLP Validation - Accuracy: 0.8263473053892215 Precision: 0.6778523489932886 Recall: 0.9099099099099099 F1 Score: 0.7769230769230769
Epoch [1/10], Loss: 2.3802598203931535
Epoch [2/10], Loss: 2.3572867938450406
Epoch [3/10], Loss: 2.357969352177211
Epoch [4/10], Loss: 2.3264540263584683
Epoch [5/10], Loss: 2.3350648880004883
Epoch [6/10], Loss: 2.3432557923453197
Epoch [7/10], Loss: 2.336319923400879
Epoch [8/10], Loss: 2.334970269884382
Epoch