In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import optuna

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

In [6]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# With PCA

In [47]:
df = pd.read_csv("preprocessed.csv")
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [8]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device)

In [49]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, activation_function):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, 20)
        self.fc4 = nn.Linear(20, output_size)
        activation_functions = {
            'relu': F.relu,
            'tanh': torch.tanh,
            'sigmoid': torch.sigmoid
        }
        self.activate = activation_functions[activation_function]

    def forward(self, x):
        x = self.activate(self.fc1(x))
        x = self.activate(self.fc2(x))
        x = self.activate(self.fc3(x))
        x = self.fc4(x)
        return x

In [50]:
def objective(trial):
    hidden_size1 = trial.suggest_int('hidden_size1', 64, 256)
    hidden_size2 = trial.suggest_int('hidden_size2', 32, 128)
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 128)
    activation_function = trial.suggest_categorical(
        'activation_function', ['relu', 'tanh', 'sigmoid'])

    model = SimpleNN(input_size=X_train.shape[1], hidden_size1=hidden_size1,
                     hidden_size2=hidden_size2, output_size=len(
                         np.unique(y_train)),
                     activation_function=activation_function).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    writer = SummaryWriter(
        f'runs/op_{hidden_size1}_{hidden_size2}_{lr}_{batch_size}')

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True)

    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            writer.add_scalar('training_loss', loss.item(),
                              epoch * len(train_loader) + i)

        model.eval()
        with torch.no_grad():
            predicted_outputs = model(X_test_tensor)
            _, predicted_classes = torch.max(predicted_outputs, 1)
            accuracy = (predicted_classes == y_test_tensor).sum(
            ).item() / y_test_tensor.size(0)
            writer.add_scalar('test_accuracy', accuracy, epoch)

        model.train()

    writer.close()
    return accuracy


study = optuna.create_study(
    study_name="activation_function_100",
    storage='sqlite:///log.db',
    direction='maximize')
study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial

print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

DuplicatedStudyError: Another study with name 'activation_function_100' already exists. Please specify a different name, or reuse the existing one by setting `load_if_exists` (for Python API) or `--skip-if-exists` flag (for CLI).

# Without PCA

In [4]:
df = pd.read_csv("data_without_pca.csv")
X = df.iloc[:, 1:]
y = df.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device)

In [None]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, activation_function):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, 20)
        self.fc4 = nn.Linear(20, output_size)
        activation_functions = {
            'relu': F.relu,
            'tanh': torch.tanh,
            'sigmoid': torch.sigmoid
        }
        self.activate = activation_functions[activation_function]

    def forward(self, x):
        x = self.activate(self.fc1(x))
        x = self.activate(self.fc2(x))
        x = self.activate(self.fc3(x))
        x = self.fc4(x)
        return x

In [None]:
def objective(trial):
    hidden_size1 = trial.suggest_int('hidden_size1', 64, 256)
    hidden_size2 = trial.suggest_int('hidden_size2', 32, 128)
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 128)
    activation_function = trial.suggest_categorical(
        'activation_function', ['relu', 'tanh', 'sigmoid'])

    model = SimpleNN(input_size=X_train.shape[1], hidden_size1=hidden_size1,
                     hidden_size2=hidden_size2, output_size=len(
                         np.unique(y_train)),
                     activation_function=activation_function).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    writer = SummaryWriter(
        f'runs/op_{hidden_size1}_{hidden_size2}_{lr}_{batch_size}')

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True)

    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            writer.add_scalar('training_loss', loss.item(),
                              epoch * len(train_loader) + i)

        model.eval()
        with torch.no_grad():
            predicted_outputs = model(X_test_tensor)
            _, predicted_classes = torch.max(predicted_outputs, 1)
            accuracy = (predicted_classes == y_test_tensor).sum(
            ).item() / y_test_tensor.size(0)
            writer.add_scalar('test_accuracy', accuracy, epoch)

        model.train()

    writer.close()
    return accuracy


study = optuna.create_study(
    study_name="activation_function_100_w_pca",
    storage='sqlite:///log.db',
    direction='maximize')
study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial

print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2024-05-20 17:17:34,868] A new study created in RDB with name: activation_function_100_w_pca
[I 2024-05-20 17:18:08,746] Trial 0 finished with value: 0.7020785219399538 and parameters: {'hidden_size1': 77, 'hidden_size2': 51, 'lr': 0.009684488959166704, 'batch_size': 22, 'activation_function': 'tanh'}. Best is trial 0 with value: 0.7020785219399538.
[I 2024-05-20 17:18:25,380] Trial 1 finished with value: 0.7321016166281755 and parameters: {'hidden_size1': 195, 'hidden_size2': 79, 'lr': 0.0007364011407376165, 'batch_size': 76, 'activation_function': 'relu'}. Best is trial 1 with value: 0.7321016166281755.
[I 2024-05-20 17:18:38,478] Trial 2 finished with value: 0.6327944572748267 and parameters: {'hidden_size1': 124, 'hidden_size2': 53, 'lr': 0.00011989221681046469, 'batch_size': 91, 'activation_function': 'relu'}. Best is trial 1 with value: 0.7321016166281755.
[I 2024-05-20 17:18:57,527] Trial 3 finished with value: 0.6535796766743649 and parameters: {'hidden_size1': 99, 'hidden_s

Best trial:
  Value: 0.7806004618937644
  Params: 
    hidden_size1: 223
    hidden_size2: 123
    lr: 0.009243325883276787
    batch_size: 40
    activation_function: relu


In [None]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, activation_function):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, 20)
        self.fc4 = nn.Linear(20, output_size)
        activation_functions = {
            'relu': F.relu,
            'tanh': torch.tanh,
            'sigmoid': torch.sigmoid
        }
        self.activate = activation_functions[activation_function]

    def forward(self, x):
        x = self.activate(self.fc1(x))
        x = self.activate(self.fc2(x))
        x = self.activate(self.fc3(x))
        x = self.fc4(x)
        return x

In [None]:
# Second experiment

def objective(trial):
    hidden_size1 = trial.suggest_int('hidden_size1', 64, 256)
    hidden_size2 = trial.suggest_int('hidden_size2', 64, 256)
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 128)
    activation_function = trial.suggest_categorical(
        'activation_function', ['relu', 'tanh', 'sigmoid'])

    model = SimpleNN(input_size=X_train.shape[1], hidden_size1=hidden_size1,
                     hidden_size2=hidden_size2, output_size=len(
                         np.unique(y_train)),
                     activation_function=activation_function).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer_algorithm = trial.suggest_categorical(
        'optimizer', ['adam', 'adamw', 'adamax', 'adadelta'])
    optimizers = {
        'adam': optim.Adam(model.parameters(), lr=lr),
        'adamw': optim.AdamW(model.parameters(), lr=lr),
        'adamax': optim.Adamax(model.parameters(), lr=lr),
        'adadelta': optim.Adadelta(model.parameters(), lr=lr)
    }
    optimizer = optimizers[optimizer_algorithm]

    optimizer = optim.Adam(model.parameters(), lr=lr)
    writer = SummaryWriter(
        f'runs/op_{hidden_size1}_{hidden_size2}_{lr}_{batch_size}')

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True)

    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            writer.add_scalar('training_loss', loss.item(),
                              epoch * len(train_loader) + i)

        model.eval()
        with torch.no_grad():
            predicted_outputs = model(X_test_tensor)
            _, predicted_classes = torch.max(predicted_outputs, 1)
            accuracy = (predicted_classes == y_test_tensor).sum(
            ).item() / y_test_tensor.size(0)
            writer.add_scalar('test_accuracy', accuracy, epoch)

        model.train()

    writer.close()
    return accuracy


study = optuna.create_study(
    study_name="active_function",
    storage='sqlite:///log.db',
    direction='maximize')
study.optimize(objective, n_trials=1000, n_jobs=10)

print("Best trial:")
trial = study.best_trial

print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2024-05-21 05:52:32,795] A new study created in RDB with name: active_function
[I 2024-05-21 05:53:24,161] Trial 2 finished with value: 0.7482678983833718 and parameters: {'hidden_size1': 106, 'hidden_size2': 217, 'lr': 0.0034714406315490116, 'batch_size': 116, 'activation_function': 'tanh', 'optimizer': 'adamax'}. Best is trial 2 with value: 0.7482678983833718.
[I 2024-05-21 05:53:29,961] Trial 0 finished with value: 0.5080831408775982 and parameters: {'hidden_size1': 232, 'hidden_size2': 185, 'lr': 1.5053612007965442e-05, 'batch_size': 104, 'activation_function': 'sigmoid', 'optimizer': 'adadelta'}. Best is trial 2 with value: 0.7482678983833718.
[I 2024-05-21 05:53:37,340] Trial 4 finished with value: 0.7136258660508084 and parameters: {'hidden_size1': 137, 'hidden_size2': 92, 'lr': 0.0001770009938854068, 'batch_size': 87, 'activation_function': 'relu', 'optimizer': 'adamw'}. Best is trial 2 with value: 0.7482678983833718.
[I 2024-05-21 05:54:04,187] Trial 3 finished with value: 

Best trial:
  Value: 0.7806004618937644
  Params: 
    hidden_size1: 240
    hidden_size2: 138
    lr: 0.0040303667072265625
    batch_size: 111
    activation_function: tanh
    optimizer: adam


# Tuning

In [2]:
class DenseNN(nn.Module):
    def __init__(self,  layer_sizes, activation_function):
        super(DenseNN, self).__init__()
        self.densebois = nn.ModuleList()
        for i in range(1, len(layer_sizes)):
            self.densebois.append(
                nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
        activation_functions = {
            'relu': F.relu,
            'tanh': torch.tanh,
            'sigmoid': torch.sigmoid
        }
        self.activate = activation_functions[activation_function]

    def forward(self, x):
        for layer in self.densebois[:-1]:
            x = self.activate(layer(x))
        x = self.densebois[-1](x)
        return x

In [9]:
def objective(trial):
    num_layers = trial.suggest_int('num_layers', 2, 5)
    layer_sizes = [X_train.shape[1]]
    batch_size = trial.suggest_int('batch_size', 16, 256)

    for i in range(num_layers - 1):
        layer_sizes.append(trial.suggest_int(f'layer_size_{i+1}', 64, 256))

    layer_sizes.append(len(np.unique(y_train)))

    activation_function = trial.suggest_categorical(
        'activation_function', ['relu', 'tanh', 'sigmoid'])

    model = DenseNN(layer_sizes=layer_sizes,
                    activation_function=activation_function).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer_algorithm = trial.suggest_categorical(
        'optimizer', ['adam', 'adamw', 'adamax', 'adadelta'])
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    optimizers = {
        'adam': optim.Adam(model.parameters(), lr=lr),
        'adamw': optim.AdamW(model.parameters(), lr=lr),
        'adamax': optim.Adamax(model.parameters(), lr=lr),
        'adadelta': optim.Adadelta(model.parameters(), lr=lr)
    }
    optimizer = optimizers[optimizer_algorithm]

    writer = SummaryWriter(f'runs/op_{layer_sizes}_{lr}_{batch_size}')

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True)

    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            writer.add_scalar('training_loss', loss.item(),
                              epoch * len(train_loader) + i)

        model.eval()
        with torch.no_grad():
            predicted_outputs = model(X_test_tensor)
            _, predicted_classes = torch.max(predicted_outputs, 1)
            accuracy = (predicted_classes == y_test_tensor).sum(
            ).item() / y_test_tensor.size(0)
            writer.add_scalar('test_accuracy', accuracy, epoch)

        model.train()

    writer.close()
    return accuracy


study = optuna.create_study(study_name="dynamic_layers",
                            storage='sqlite:///log.db',
                            direction='maximize', load_if_exists=True)
study.optimize(objective, n_trials=1000, n_jobs=10)

print("Best trial:")
trial = study.best_trial

print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2024-05-23 06:48:24,611] Using an existing study with name 'dynamic_layers' instead of creating a new one.
[I 2024-05-23 06:55:48,193] Trial 914 finished with value: 0.7113163972286374 and parameters: {'num_layers': 4, 'batch_size': 168, 'layer_size_1': 239, 'layer_size_2': 131, 'layer_size_3': 113, 'activation_function': 'tanh', 'optimizer': 'adamw', 'lr': 0.0002743431775195438}. Best is trial 914 with value: 0.7113163972286374.
[I 2024-05-23 06:55:55,212] Trial 915 finished with value: 0.7274826789838337 and parameters: {'num_layers': 4, 'batch_size': 168, 'layer_size_1': 245, 'layer_size_2': 135, 'layer_size_3': 164, 'activation_function': 'relu', 'optimizer': 'adamw', 'lr': 0.00027477624954491735}. Best is trial 915 with value: 0.7274826789838337.
[I 2024-05-23 06:55:57,492] Trial 918 finished with value: 0.7274826789838337 and parameters: {'num_layers': 4, 'batch_size': 168, 'layer_size_1': 246, 'layer_size_2': 130, 'layer_size_3': 114, 'activation_function': 'tanh', 'optimizer