In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from utils.data_preprocessing import load_data, preprocess_data
import os
import numpy as np

save_data = "save_data_2017/"
def load_data(file_path):
    return pd.read_csv(file_path, encoding='cp1252', engine='python')
cicids2017_path = 'data/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv'

if(not os.path.isfile(save_data + "web_attacks.csv")):
    data_2017 = load_data(cicids2017_path)
    preprocess_data(data_2017, save_data)

In [3]:
df = pd.read_csv(save_data + 'web_attacks.csv')
df['Label'] = df['Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)


normal_df = df[df['Label'] == 0]
attack_df = df[df['Label'] != 0]

num_attack = len(attack_df)

num_normal = 2 * num_attack

In [4]:
normal_df = normal_df.sample(n=num_normal, random_state=42)

# Combine and shuffle the dataset
balanced_df = pd.concat([normal_df, attack_df]).sample(frac=1, random_state=42).reset_index(drop=True)

# Split the balanced dataset into training (60%), validation (20%), and testing sets (20%)
train_df, temp_df = train_test_split(balanced_df, test_size=0.4, random_state=42, stratify=balanced_df['Label'])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['Label'])

# Within the training set, ensure normal instances are twice the number of attack instances
train_normal_df = train_df[train_df['Label'] == 0]
train_attack_df = train_df[train_df['Label'] == 1]
num_train_attack = len(train_attack_df)
num_train_normal = 2 * num_train_attack
train_normal_df = train_normal_df.sample(n=num_train_normal, random_state=42)
train_df = pd.concat([train_normal_df, train_attack_df]).sample(frac=1, random_state=42).reset_index(drop=True)

In [5]:
# Print the count of each class in the training, validation, and testing sets
for name, df in zip(["Training", "Validation", "Testing"], [train_df, val_df, test_df]):
    unique, counts = np.unique(df['Label'], return_counts=True)
    print(f"{name} set class distribution:", dict(zip(unique, counts)))

Training set class distribution: {0: 2616, 1: 1308}
Validation set class distribution: {0: 872, 1: 436}
Testing set class distribution: {0: 872, 1: 436}


In [6]:
# Save the train, validation, and test sets to CSV files
train_df.to_csv(save_data +  'train_set.csv', index=False)
val_df.to_csv(save_data +  'val_set.csv', index=False)
test_df.to_csv(save_data + 'test_set.csv', index=False)

In [7]:
# Define feature columns (excluding the label column)
feature_cols = [col for col in df.columns if col != 'Label']

# Extract features and labels for training, validation, and testing sets
X_train = train_df[feature_cols]
y_train = train_df['Label']
X_val = val_df[feature_cols]
y_val = val_df['Label']
X_test = test_df[feature_cols]
y_test = test_df['Label']

In [8]:
# Define classifiers
from models.decision_tree import train_decision_tree
from models.random_forest import train_random_forest
from models.lda import train_lda
from models.naive_bayes import train_naive_bayes
from models.logistic_regression import train_logistic_regression
from models.knn import train_knn
from models.svm import train_svm
from models.extra_trees import train_extra_trees
from models.bagging import train_bagging
from models.mlp import train_mlp

classifiers = {
    'Decision Tree': train_decision_tree,
    'Random Forest': train_random_forest,
    'Linear Discriminant Analysis': train_lda,
    'Naive Bayes': train_naive_bayes,
    'Logistic Regression': train_logistic_regression,
    'K-Nearest Neighbors': train_knn,
    'Support Vector Machine': train_svm,
    'Extra Trees Classifier': train_extra_trees,
    'Bagging Classifier': train_bagging,
    'Multi-layer Perceptron': train_mlp
}

In [9]:
def model_10Classifier():
    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train, y_train, X_val, y_val, X_test, y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            "Test Accuracy": test_acc,
            "Test Precision": test_pre,
            "Test Recall": test_rec,
            "Test F1 Score": test_f1,
        })

    # Save results to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(save_data + '10Classsifer_results_2017.csv', index=False)
    
model_10Classifier()

Decision Tree Validation - Accuracy: 0.9938837920489296 Precision: 0.9885844748858448 Recall: 0.9931192660550459 F1 Score: 0.9908466819221968
Random Forest Validation - Accuracy: 0.9938837920489296 Precision: 0.9953703703703703 Recall: 0.9862385321100917 F1 Score: 0.9907834101382489
LDA Validation - Accuracy: 0.9503058103975535 Precision: 0.9385342789598109 Recall: 0.9105504587155964 F1 Score: 0.9243306169965075
Naive Bayes Validation - Accuracy: 0.845565749235474 Precision: 0.6833855799373041 Recall: 1.0 F1 Score: 0.8119180633147114


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.9870030581039755 Precision: 0.9794050343249427 Recall: 0.981651376146789 F1 Score: 0.9805269186712485
KNN Validation - Accuracy: 0.9831804281345565 Precision: 0.9620535714285714 Recall: 0.9885321100917431 F1 Score: 0.9751131221719457
SVM Validation - Accuracy: 0.941131498470948 Precision: 0.9126436781609195 Recall: 0.9105504587155964 F1 Score: 0.9115958668197475
Extra Trees Validation - Accuracy: 0.9969418960244648 Precision: 0.9931506849315068 Recall: 0.9977064220183486 F1 Score: 0.9954233409610984
MLP Validation - Accuracy: 0.9655963302752294 Precision: 0.911578947368421 Recall: 0.9931192660550459 F1 Score: 0.9506037321624589


In [10]:
import pandas as pd
from sklearn.decomposition import PCA

# Assume the classifiers dictionary and train functions are already defined
# Define a function to apply PCA and train classifiers

def train_with_pca(X_train, y_train, X_val, y_val, X_test, y_test, n_components=20):
    X_combined = np.concatenate((X_train, X_val, X_test), axis=0)
    y_combined = np.concatenate((y_train, y_val, y_test), axis=0)

    pca = PCA(n_components=n_components)
    X_combined_pca = pca.fit_transform(X_combined)

    # Bước 3: Tách lại dữ liệu thành các tập train, validation và test
    X_train_pca = X_combined_pca[:len(X_train)]
    X_val_pca = X_combined_pca[len(X_train):len(X_train)+len(X_val)]
    X_test_pca = X_combined_pca[len(X_train)+len(X_val):]

    # Kiểm tra kích thước của các tập dữ liệu sau khi PCA
    print("X_train_pca shape:", X_train_pca.shape)
    print("X_val_pca shape:", X_val_pca.shape)
    print("X_test_pca shape:", X_test_pca.shape)
    
    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train_pca, y_train, X_val_pca, y_val, X_test_pca, y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            'Test Accuracy': test_acc,
            'Test Precision': test_pre,
            'Test Recall': test_rec,
            'Test F1 Score': test_f1
        })
    
    return results

# Call the function and save results
pca_results = train_with_pca(X_train, y_train, X_val, y_val, X_test, y_test)
pca_results_df = pd.DataFrame(pca_results)
pca_results_df.to_csv(save_data + 'pca_model_results_2017.csv', index=False)


X_train_pca shape: (3924, 20)
X_val_pca shape: (1308, 20)
X_test_pca shape: (1308, 20)
Decision Tree Validation - Accuracy: 0.985474006116208 Precision: 0.9793103448275862 Recall: 0.9770642201834863 F1 Score: 0.9781859931113662
Random Forest Validation - Accuracy: 0.9892966360856269 Precision: 0.9884259259259259 Recall: 0.9793577981651376 F1 Score: 0.9838709677419355
LDA Validation - Accuracy: 0.9281345565749235 Precision: 0.9476439790575916 Recall: 0.8302752293577982 F1 Score: 0.8850855745721271
Naive Bayes Validation - Accuracy: 0.5680428134556575 Precision: 0.42981501632208924 Recall: 0.9059633027522935 F1 Score: 0.5830258302583026


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Validation - Accuracy: 0.9388379204892966 Precision: 0.908256880733945 Recall: 0.908256880733945 F1 Score: 0.908256880733945
KNN Validation - Accuracy: 0.9610091743119266 Precision: 0.9249448123620309 Recall: 0.9610091743119266 F1 Score: 0.9426321709786277
SVM Validation - Accuracy: 0.9418960244648318 Precision: 0.9147465437788018 Recall: 0.9105504587155964 F1 Score: 0.9126436781609195
Extra Trees Validation - Accuracy: 0.9900611620795107 Precision: 0.9884526558891455 Recall: 0.981651376146789 F1 Score: 0.9850402761795167
MLP Validation - Accuracy: 0.9587155963302753 Precision: 0.9569377990430622 Recall: 0.9174311926605505 F1 Score: 0.936768149882904


In [11]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Định nghĩa Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        encoding_dim1 = 50
        encoding_dim2 = 30
        encoding_dim3 = int(np.sqrt(input_dim)) + 1

        # Lớp mã hóa
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim3),
            nn.ReLU()
        )
        # Lớp giải mã
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim3, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Hàm để áp dụng deep autoencoder và train classifiers
def train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test, epochs=50, batch_size=256, alpha1=0.1):
    input_dim = X_train.shape[1]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Khởi tạo mô hình và các thành phần cần thiết
    autoencoder = Autoencoder(input_dim).to(device)
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    # Chuyển đổi dữ liệu DataFrame thành NumPy array và tensor
    X_train_np = X_train.to_numpy()
    X_val_np = X_val.to_numpy()
    X_test_np = X_test.to_numpy()
    y_train_np = y_train.to_numpy()
    y_val_np = y_val.to_numpy()
    y_test_np = y_test.to_numpy()
    
    # Tạo TensorDataset cho training và validation
    train_dataset = TensorDataset(
        torch.tensor(X_train_np, dtype=torch.float32),
        torch.tensor(X_train_np, dtype=torch.float32)  # Sử dụng dữ liệu đầu vào làm mục tiêu cho autoencoder
    )
    val_dataset = TensorDataset(
        torch.tensor(X_val_np, dtype=torch.float32),
        torch.tensor(X_val_np, dtype=torch.float32)  # Sử dụng dữ liệu đầu vào làm mục tiêu cho autoencoder
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Training loop
    for epoch in range(epochs):
        autoencoder.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            encoded, outputs = autoencoder(inputs)
            loss = alpha1 * criterion(outputs, targets)  # Chỉ sử dụng MSE loss
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        print(f'Epoch [{epoch+1}/{epochs}], Loss: {train_loss/len(train_loader)}')

    # Encode dữ liệu
    autoencoder.eval()
    with torch.no_grad():
        X_train_enc, _ = autoencoder(torch.tensor(X_train_np, dtype=torch.float32).to(device))
        X_val_enc, _ = autoencoder(torch.tensor(X_val_np, dtype=torch.float32).to(device))
        X_test_enc, _ = autoencoder(torch.tensor(X_test_np, dtype=torch.float32).to(device))
    
    # Train và đánh giá classifiers
    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train_enc.cpu().numpy(), y_train, X_val_enc.cpu().numpy(), y_val, X_test_enc.cpu().numpy(), y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            'Test Accuracy': test_acc,
            'Test Precision': test_pre,
            'Test Recall': test_rec,
            'Test F1 Score': test_f1
        })
    
    return results

# Gọi hàm và lưu kết quả
autoencoder_results = train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test)
autoencoder_results_df = pd.DataFrame(autoencoder_results)
autoencoder_results_df.to_csv(save_data + 'autoencoder_model_results.csv', index=False)


Epoch [1/50], Loss: 5841290149888.0
Epoch [2/50], Loss: 5798083837952.0
Epoch [3/50], Loss: 5910991552512.0
Epoch [4/50], Loss: 5722112278528.0
Epoch [5/50], Loss: 5803935481856.0
Epoch [6/50], Loss: 5975964778496.0
Epoch [7/50], Loss: 5692207988736.0
Epoch [8/50], Loss: 5802480599040.0
Epoch [9/50], Loss: 5680468410368.0
Epoch [10/50], Loss: 5802610343936.0
Epoch [11/50], Loss: 5742305280000.0
Epoch [12/50], Loss: 5887142690816.0
Epoch [13/50], Loss: 6496283344896.0
Epoch [14/50], Loss: 5795666132992.0
Epoch [15/50], Loss: 5840801218560.0
Epoch [16/50], Loss: 5706979983360.0
Epoch [17/50], Loss: 5907976192000.0
Epoch [18/50], Loss: 5834244571136.0
Epoch [19/50], Loss: 5789612720128.0
Epoch [20/50], Loss: 5680554344448.0
Epoch [21/50], Loss: 5931684085760.0
Epoch [22/50], Loss: 5873183703040.0
Epoch [23/50], Loss: 5803325095936.0
Epoch [24/50], Loss: 5821406478336.0
Epoch [25/50], Loss: 5753672155136.0
Epoch [26/50], Loss: 5882027491328.0
Epoch [27/50], Loss: 5720095883264.0
Epoch [28/

In [17]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

# Định nghĩa Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        encoding_dim1 = 50
        encoding_dim2 = 30
        encoding_dim3 = int(np.sqrt(input_dim)) + 1

        # Lớp mã hóa
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim3),
            nn.ReLU()  # Đổi từ sigmoid thành ReLU để giữ giá trị dương
        )
        # Lớp giải mã
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim3, encoding_dim2),
            nn.ReLU(),
            nn.Linear(encoding_dim2, encoding_dim1),
            nn.ReLU(),
            nn.Linear(encoding_dim1, input_dim),
            nn.Sigmoid()  # Giữ sigmoid cho đầu ra
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded, encoded

class BinaryMagnetLoss(nn.Module):
    def __init__(self, alpha=7.18, epsilon=1e-6):
        super(BinaryMagnetLoss, self).__init__()
        self.alpha = alpha
        self.epsilon = epsilon

    def forward(self, embeddings, labels):
        _min_float = 1e-6
        embeddings = embeddings.float()
        batch_size = embeddings.size(0)

        unique_labels = labels.unique()
        batch_clusters = {label.item(): (labels == label).nonzero(as_tuple=True)[0] for label in unique_labels}

        num_instances = 0.0
        stdev = torch.zeros(1).to(embeddings.device)
        c_means = torch.stack([torch.mean(embeddings[batch_clusters[label.item()]], dim=0) for label in unique_labels]).to(embeddings.device)

        for label in unique_labels:
            cluster_indices = batch_clusters[label.item()]
            for i in cluster_indices:
                stdev += (embeddings[i] - c_means[unique_labels.tolist().index(label)]).norm(p=2).pow(2)
                num_instances += 1.0

        stdev = stdev / (num_instances - 1.0)
        stdev = -2.0 * (stdev + self.epsilon)  # Add epsilon to avoid division by zero

        loss = torch.zeros(1).to(embeddings.device)
        denom = [torch.zeros(1).to(embeddings.device) for _ in range(batch_size)]

        for label in unique_labels:
            cluster_indices = batch_clusters[label.item()]
            for i in cluster_indices:
                for other_label in unique_labels:
                    if other_label != label:
                        denom[i] += ((embeddings[i] - c_means[unique_labels.tolist().index(other_label)]).norm().pow(2) / stdev).exp()

                # Avoid log(0) by clamping values
                loss_term = (((embeddings[i] - c_means[unique_labels.tolist().index(label)]).norm().pow(2) / stdev - self.alpha).exp() / (denom[i] + self.epsilon)).log().clamp(max=0.0)
                if torch.isnan(loss_term) or torch.isinf(loss_term):
                    continue  # Skip this term if it's invalid
                loss -= loss_term

        loss /= num_instances
        return loss


# Custom Loss Function combining Cross-Entropy and Binary Magnet Loss
def custom_loss(y_true, y_pred, embeddings, alpha1=1.0, alpha2=1.0):
    ce_loss = F.cross_entropy(y_pred, y_true.long())  # Tính toán Cross-Entropy Loss
    magnet_loss_fn = BinaryMagnetLoss()
    magnet_loss_value = magnet_loss_fn(embeddings, y_true)
    return alpha1 * ce_loss + alpha2 * magnet_loss_value

# Hàm để áp dụng deep autoencoder và train classifiers
def train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test, classifiers, save_data, epochs=10, batch_size=256, alpha1=1.0, alpha2=1.0):
    input_dim = X_train.shape[1]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    autoencoder = Autoencoder(input_dim).to(device)
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

    X_train_np = X_train.to_numpy()
    X_val_np = X_val.to_numpy()
    y_train_np = y_train.to_numpy()
    y_val_np = y_val.to_numpy()

    train_dataset = TensorDataset(
        torch.tensor(X_train_np, dtype=torch.float32),
        torch.tensor(y_train_np, dtype=torch.float32)
    )
    val_dataset = TensorDataset(
        torch.tensor(X_val_np, dtype=torch.float32),
        torch.tensor(y_val_np, dtype=torch.float32)
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        autoencoder.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs, encoded = autoencoder(inputs)
            loss = custom_loss(labels, outputs, encoded, alpha1=alpha1, alpha2=alpha2)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        print(f'Epoch [{epoch+1}/{epochs}], Loss: {train_loss/len(train_loader)}')

    autoencoder.eval()
    with torch.no_grad():
        X_train_enc = autoencoder.encoder(torch.tensor(X_train_np, dtype=torch.float32).to(device)).cpu().numpy()
        X_val_enc = autoencoder.encoder(torch.tensor(X_val_np, dtype=torch.float32).to(device)).cpu().numpy()
        X_test_enc = autoencoder.encoder(torch.tensor(X_test.to_numpy(), dtype=torch.float32).to(device)).cpu().numpy()

    results = []
    for name, train_function in classifiers.items():
        model, val_acc, val_pre, val_rec, val_f1, test_acc, test_pre, test_rec, test_f1 = train_function(X_train_enc, y_train, X_val_enc, y_val, X_test_enc, y_test, save_data)
        results.append({
            'Model': name,
            'Validation Accuracy': val_acc,
            'Validation Precision': val_pre,
            'Validation Recall': val_rec,
            'Validation F1 Score': val_f1,
            'Test Accuracy': test_acc,
            'Test Precision': test_pre,
            'Test Recall': test_rec,
            'Test F1 Score': test_f1
        })

    return results

# Gọi hàm và lưu kết quả
autoencoder_results = train_with_autoencoder(X_train, y_train, X_val, y_val, X_test, y_test, classifiers, save_data)
autoencoder_results_df = pd.DataFrame(autoencoder_results)
autoencoder_results_df.to_csv(save_data + 'autoencoder_Binary_Magnet_Loss_results.csv', index=False)


Epoch [1/10], Loss: 11.087749660015106
Epoch [2/10], Loss: 10.836863934993744
Epoch [3/10], Loss: 10.908373057842255
Epoch [4/10], Loss: 10.80006217956543
Epoch [5/10], Loss: 10.778883874416351
Epoch [6/10], Loss: 10.565011441707611
Epoch [7/10], Loss: 10.519079506397247
Epoch [8/10], Loss: 10.497524201869965
Epoch [9/10], Loss: 10.499695062637329
Epoch [10/10], Loss: 10.492914974689484
Decision Tree Validation - Accuracy: 0.9709480122324159 Precision: 0.9481981981981982 Recall: 0.9655963302752294 F1 Score: 0.9568181818181818
Random Forest Validation - Accuracy: 0.9747706422018348 Precision: 0.9569160997732427 Recall: 0.9678899082568807 F1 Score: 0.9623717217787914
LDA Validation - Accuracy: 0.9296636085626911 Precision: 0.9387755102040817 Recall: 0.8440366972477065 F1 Score: 0.8888888888888888
Naive Bayes Validation - Accuracy: 0.44571865443425074 Precision: 0.3710972346119536 Recall: 0.9541284403669725 F1 Score: 0.5343609505459217
Logistic Regression Validation - Accuracy: 0.92889908