<a href="https://colab.research.google.com/github/yukiharada1228/spring_seminar_2023/blob/main/07_seminar_%E8%AA%B2%E9%A1%8C%E6%8F%90%E5%87%BA/ER20069_%E5%8E%9F%E7%94%B0%E5%84%AA%E8%BC%9D/11_CIFAR_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.3/365.3 KB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 KB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 KB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from torch import nn

import torchvision
from torchvision import transforms

import optuna

In [3]:
train_data = torchvision.datasets.CIFAR10(
    root="./", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10(
    root="./", train=False, transform=transforms.ToTensor(), download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


In [46]:
def train(model, device, train_loader, criterion, optimizer):
    sum_loss = 0
    num_correct = 0
    num_images = len(train_loader.dataset)
    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        model.zero_grad()
        loss.backward()
        optimizer.step()

        sum_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        num_correct += torch.sum(preds == labels).item()
    mean_loss = sum_loss / num_images
    accuracy = num_correct / num_images
    print({
        "action": "train", 
        "mean_loss": mean_loss, 
        "accuracy": accuracy}
    )
    return mean_loss, accuracy

def test(model, device, test_loader):
    num_correct = 0
    num_images = len(test_loader.dataset)
    model.eval()
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)

        preds = torch.argmax(outputs, dim=1)
        num_correct += torch.sum(preds == labels).item()
    accuracy = num_correct / num_images
    print({
        "action": "test", 
        "accuracy": accuracy}
    )
    return accuracy

def get_activation(trial):
    activation_names = ('ReLU', 'ELU')
    activation_name = trial.suggest_categorical('activation', activation_names)
    if activation_name == activation_names[0]:
        activation = nn.ReLU()
    else:
        activation = nn.ELU()
    return activation, activation_name

class CNN(nn.Module):
    def __init__(self, activation, fc_units, num_layer):
        super(CNN, self).__init__()
        in_height = 32
        in_width = 32
        in_channels = 3
        out_channels = 16
        self.convs = nn.ModuleList([
            nn.Conv2d(
                in_channels, out_channels, kernel_size=3, stride=1, padding=1)])
        out_height = in_height
        out_width = in_width
        for i in range(1, num_layer - 3):
            out_height //= 2
            out_width //= 2
            in_channels = out_channels
            out_channels *= 2
            self.convs.append(
                nn.Conv2d(
                    in_channels, out_channels, kernel_size=3, stride=1, padding=1))
        out_height //= 2
        out_width //= 2
        self.l1 = nn.Linear(out_height * out_width * out_channels, fc_units)
        self.l2 = nn.Linear(fc_units, fc_units)
        self.l3 = nn.Linear(fc_units, 10)
        self.act = activation
        self.pool = nn.MaxPool2d(2, 2)
    
    def forward(self, x):
        for conv in self.convs:
            x = self.pool(self.act(conv(x)))
        x = x.view(x.size()[0], -1)
        x = self.act(self.l1(x))
        x = self.act(self.l2(x))
        x = self.l3(x)
        return x

def get_optimizer(trial, model):
    optimizer_names = ('MomentumSGD', 'Adam')
    optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
    weight_decay = trial.suggest_float('weight_decay', 1e-10, 1e-3, log=True)
    if optimizer_name == optimizer_names[0]:
        lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    else:
        lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    return optimizer, optimizer_name, lr, weight_decay

def objective(trial):
    num_epoch = trial.suggest_int("num_epoch", 10, 50)
    batch_size = trial.suggest_int("batch_size", 64, 256)
    fc_units = trial.suggest_int("fc_units", 1024, 4096)
    num_layer = trial.suggest_int("num_layer", 5, 7)
    activation, activation_name = get_activation(trial)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    train_loader = torch.utils.data.DataLoader(
        train_data, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        test_data, batch_size=batch_size, shuffle=False)
    criterion = nn.CrossEntropyLoss().to(device)
    model = CNN(activation, fc_units, num_layer).to(device)
    optimizer, optimizer_name, lr, weight_decay = get_optimizer(trial, model)
    print({
        "action": "objective", 
        "num_epoch": num_epoch,
        "activation_name": activation_name,
        "fc_units": fc_units,
        "num_layer": num_layer,
        "optimizer_name": optimizer_name,
        "lr": lr, 
        "weight_decay": weight_decay,})
    for epoch in range(1, num_epoch + 1):
        train_sum_loss, train_accuracy = train(
            model, device, train_loader, criterion, optimizer)
    test_accuracy = test(model, device, test_loader)
    return test_accuracy

In [48]:
study = optuna.create_study(
    direction="maximize", storage="sqlite:///optuna.db", 
    study_name="CNN", load_if_exists=True)
study.optimize(objective, n_trials=10)

[32m[I 2023-03-20 16:17:57,472][0m A new study created in RDB with name: CNN[0m


{'action': 'objective', 'num_epoch': 41, 'activation_name': 'ELU', 'fc_units': 2091, 'num_layer': 7, 'optimizer_name': 'Adam', 'lr': 0.004585810865533606, 'weight_decay': 9.001636356893551e-05}
{'action': 'train', 'mean_loss': 0.011719607520103455, 'accuracy': 0.38648}
{'action': 'train', 'mean_loss': 0.007184911177158356, 'accuracy': 0.58432}
{'action': 'train', 'mean_loss': 0.006401865640878677, 'accuracy': 0.63328}
{'action': 'train', 'mean_loss': 0.005823105705976486, 'accuracy': 0.6669}
{'action': 'train', 'mean_loss': 0.0053364636874198915, 'accuracy': 0.6959}
{'action': 'train', 'mean_loss': 0.005091590056419373, 'accuracy': 0.71094}
{'action': 'train', 'mean_loss': 0.004825899468660354, 'accuracy': 0.73018}
{'action': 'train', 'mean_loss': 2.0531205676496027, 'accuracy': 0.55536}
{'action': 'train', 'mean_loss': 0.020339992842674254, 'accuracy': 0.10282}
{'action': 'train', 'mean_loss': 0.015185096502304077, 'accuracy': 0.11228}
{'action': 'train', 'mean_loss': 0.01525770232200

[32m[I 2023-03-20 16:26:34,353][0m Trial 0 finished with value: 0.1 and parameters: {'num_epoch': 41, 'batch_size': 164, 'fc_units': 2091, 'num_layer': 7, 'activation': 'ELU', 'optimizer': 'Adam', 'weight_decay': 9.001636356893551e-05, 'lr': 0.004585810865533606}. Best is trial 0 with value: 0.1.[0m


{'action': 'test', 'accuracy': 0.1}
{'action': 'objective', 'num_epoch': 37, 'activation_name': 'ReLU', 'fc_units': 2101, 'num_layer': 6, 'optimizer_name': 'MomentumSGD', 'lr': 0.0003525863051047292, 'weight_decay': 1.425570712003436e-09}
{'action': 'train', 'mean_loss': 0.009531156988143922, 'accuracy': 0.11164}
{'action': 'train', 'mean_loss': 0.009528208575248718, 'accuracy': 0.10952}
{'action': 'train', 'mean_loss': 0.009525134897232055, 'accuracy': 0.13462}
{'action': 'train', 'mean_loss': 0.00952175066947937, 'accuracy': 0.15786}
{'action': 'train', 'mean_loss': 0.009517834124565125, 'accuracy': 0.16122}
{'action': 'train', 'mean_loss': 0.00951314311504364, 'accuracy': 0.14888}
{'action': 'train', 'mean_loss': 0.0095074764919281, 'accuracy': 0.13236}
{'action': 'train', 'mean_loss': 0.009500225005149842, 'accuracy': 0.1263}
{'action': 'train', 'mean_loss': 0.0094909113073349, 'accuracy': 0.13094}
{'action': 'train', 'mean_loss': 0.00947894371986389, 'accuracy': 0.12504}
{'action'

[32m[I 2023-03-20 16:33:33,538][0m Trial 1 finished with value: 0.3707 and parameters: {'num_epoch': 37, 'batch_size': 242, 'fc_units': 2101, 'num_layer': 6, 'activation': 'ReLU', 'optimizer': 'MomentumSGD', 'weight_decay': 1.425570712003436e-09, 'lr': 0.0003525863051047292}. Best is trial 1 with value: 0.3707.[0m


{'action': 'test', 'accuracy': 0.3707}
{'action': 'objective', 'num_epoch': 27, 'activation_name': 'ELU', 'fc_units': 2755, 'num_layer': 5, 'optimizer_name': 'Adam', 'lr': 0.00011960441454274021, 'weight_decay': 1.8744621637622562e-08}
{'action': 'train', 'mean_loss': 0.006788593428134918, 'accuracy': 0.38646}
{'action': 'train', 'mean_loss': 0.005725726296901703, 'accuracy': 0.48626}
{'action': 'train', 'mean_loss': 0.005255042066574097, 'accuracy': 0.53126}
{'action': 'train', 'mean_loss': 0.004946095387935639, 'accuracy': 0.56018}
{'action': 'train', 'mean_loss': 0.004708562693595886, 'accuracy': 0.58066}
{'action': 'train', 'mean_loss': 0.004540925097465515, 'accuracy': 0.59726}
{'action': 'train', 'mean_loss': 0.004381102088689804, 'accuracy': 0.61024}
{'action': 'train', 'mean_loss': 0.004275233733654022, 'accuracy': 0.61924}
{'action': 'train', 'mean_loss': 0.004171485720872879, 'accuracy': 0.6303}
{'action': 'train', 'mean_loss': 0.004078167088031769, 'accuracy': 0.63812}
{'act

[32m[I 2023-03-20 16:39:11,559][0m Trial 2 finished with value: 0.6357 and parameters: {'num_epoch': 27, 'batch_size': 254, 'fc_units': 2755, 'num_layer': 5, 'activation': 'ELU', 'optimizer': 'Adam', 'weight_decay': 1.8744621637622562e-08, 'lr': 0.00011960441454274021}. Best is trial 2 with value: 0.6357.[0m


{'action': 'test', 'accuracy': 0.6357}
{'action': 'objective', 'num_epoch': 45, 'activation_name': 'ELU', 'fc_units': 3241, 'num_layer': 6, 'optimizer_name': 'MomentumSGD', 'lr': 0.0018303221828338506, 'weight_decay': 1.4616122048756746e-06}
{'action': 'train', 'mean_loss': 0.025294790766239167, 'accuracy': 0.20358}
{'action': 'train', 'mean_loss': 0.02108346347570419, 'accuracy': 0.35926}
{'action': 'train', 'mean_loss': 0.018368250682353975, 'accuracy': 0.43614}
{'action': 'train', 'mean_loss': 0.01705931688785553, 'accuracy': 0.47874}
{'action': 'train', 'mean_loss': 0.015898083000183104, 'accuracy': 0.51606}
{'action': 'train', 'mean_loss': 0.014876388852596283, 'accuracy': 0.54826}
{'action': 'train', 'mean_loss': 0.013917396413087845, 'accuracy': 0.57758}
{'action': 'train', 'mean_loss': 0.013166468287706375, 'accuracy': 0.59956}
{'action': 'train', 'mean_loss': 0.012477254512310028, 'accuracy': 0.62058}
{'action': 'train', 'mean_loss': 0.011937195738554, 'accuracy': 0.64014}
{'a

[32m[I 2023-03-20 16:49:59,403][0m Trial 3 finished with value: 0.7292 and parameters: {'num_epoch': 45, 'batch_size': 86, 'fc_units': 3241, 'num_layer': 6, 'activation': 'ELU', 'optimizer': 'MomentumSGD', 'weight_decay': 1.4616122048756746e-06, 'lr': 0.0018303221828338506}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.7292}
{'action': 'objective', 'num_epoch': 49, 'activation_name': 'ReLU', 'fc_units': 1496, 'num_layer': 6, 'optimizer_name': 'Adam', 'lr': 0.004220359557881649, 'weight_decay': 0.0006939160468570179}
{'action': 'train', 'mean_loss': 0.00909331269979477, 'accuracy': 0.30552}
{'action': 'train', 'mean_loss': 0.007102437500953674, 'accuracy': 0.47926}
{'action': 'train', 'mean_loss': 0.0062803859186172485, 'accuracy': 0.54388}
{'action': 'train', 'mean_loss': 0.005638876625299454, 'accuracy': 0.59508}
{'action': 'train', 'mean_loss': 0.005221017805337906, 'accuracy': 0.62792}
{'action': 'train', 'mean_loss': 0.004878550741672516, 'accuracy': 0.65214}
{'action': 'train', 'mean_loss': 0.004632216012477874, 'accuracy': 0.6718}
{'action': 'train', 'mean_loss': 0.004415258774757385, 'accuracy': 0.68752}
{'action': 'train', 'mean_loss': 0.004267326225042343, 'accuracy': 0.69918}
{'action': 'train', 'mean_loss': 0.004147193398475647, 'accuracy': 0.70734}
{'actio

[32m[I 2023-03-20 16:59:07,580][0m Trial 4 finished with value: 0.7153 and parameters: {'num_epoch': 49, 'batch_size': 201, 'fc_units': 1496, 'num_layer': 6, 'activation': 'ReLU', 'optimizer': 'Adam', 'weight_decay': 0.0006939160468570179, 'lr': 0.004220359557881649}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.7153}
{'action': 'objective', 'num_epoch': 19, 'activation_name': 'ELU', 'fc_units': 3140, 'num_layer': 6, 'optimizer_name': 'MomentumSGD', 'lr': 0.0001160053277802786, 'weight_decay': 1.0532124982330885e-07}
{'action': 'train', 'mean_loss': 0.03487002031326294, 'accuracy': 0.11572}
{'action': 'train', 'mean_loss': 0.03476797332763672, 'accuracy': 0.10904}
{'action': 'train', 'mean_loss': 0.034637117490768435, 'accuracy': 0.13192}
{'action': 'train', 'mean_loss': 0.03443514187335968, 'accuracy': 0.14652}
{'action': 'train', 'mean_loss': 0.034073926911354066, 'accuracy': 0.19814}
{'action': 'train', 'mean_loss': 0.03333650982379913, 'accuracy': 0.23712}
{'action': 'train', 'mean_loss': 0.03200572922468185, 'accuracy': 0.257}
{'action': 'train', 'mean_loss': 0.03091896845340729, 'accuracy': 0.27148}
{'action': 'train', 'mean_loss': 0.030470668926239015, 'accuracy': 0.2821}
{'action': 'train', 'mean_loss': 0.030174583973884582, 'accuracy': 0.2892}
{'action

[32m[I 2023-03-20 17:03:54,518][0m Trial 5 finished with value: 0.3969 and parameters: {'num_epoch': 19, 'batch_size': 66, 'fc_units': 3140, 'num_layer': 6, 'activation': 'ELU', 'optimizer': 'MomentumSGD', 'weight_decay': 1.0532124982330885e-07, 'lr': 0.0001160053277802786}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.3969}
{'action': 'objective', 'num_epoch': 21, 'activation_name': 'ReLU', 'fc_units': 1808, 'num_layer': 6, 'optimizer_name': 'MomentumSGD', 'lr': 0.00020921996659466465, 'weight_decay': 0.0005581012288646244}
{'action': 'train', 'mean_loss': 0.02214965557575226, 'accuracy': 0.10632}
{'action': 'train', 'mean_loss': 0.022147761754989625, 'accuracy': 0.11126}
{'action': 'train', 'mean_loss': 0.022145998039245607, 'accuracy': 0.12298}
{'action': 'train', 'mean_loss': 0.02214434310913086, 'accuracy': 0.12542}
{'action': 'train', 'mean_loss': 0.022142651839256285, 'accuracy': 0.14868}
{'action': 'train', 'mean_loss': 0.022140907430648803, 'accuracy': 0.14924}
{'action': 'train', 'mean_loss': 0.02213900532722473, 'accuracy': 0.14352}
{'action': 'train', 'mean_loss': 0.02213695001125336, 'accuracy': 0.16308}
{'action': 'train', 'mean_loss': 0.02213462959766388, 'accuracy': 0.17902}
{'action': 'train', 'mean_loss': 0.022131980299949647, 'accuracy': 0.16868}
{'

[32m[I 2023-03-20 17:08:07,089][0m Trial 6 finished with value: 0.212 and parameters: {'num_epoch': 21, 'batch_size': 104, 'fc_units': 1808, 'num_layer': 6, 'activation': 'ReLU', 'optimizer': 'MomentumSGD', 'weight_decay': 0.0005581012288646244, 'lr': 0.00020921996659466465}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.212}
{'action': 'objective', 'num_epoch': 12, 'activation_name': 'ReLU', 'fc_units': 1538, 'num_layer': 7, 'optimizer_name': 'Adam', 'lr': 0.002895641762927132, 'weight_decay': 1.5294588677881859e-06}
{'action': 'train', 'mean_loss': 0.008574312498569488, 'accuracy': 0.32104}
{'action': 'train', 'mean_loss': 0.00639941799402237, 'accuracy': 0.5115}
{'action': 'train', 'mean_loss': 0.005530523422956467, 'accuracy': 0.5904}
{'action': 'train', 'mean_loss': 0.004913119673728943, 'accuracy': 0.63954}
{'action': 'train', 'mean_loss': 0.0044913748598098754, 'accuracy': 0.6729}
{'action': 'train', 'mean_loss': 0.0041580005633831026, 'accuracy': 0.69458}
{'action': 'train', 'mean_loss': 0.003890778361558914, 'accuracy': 0.71604}
{'action': 'train', 'mean_loss': 0.0036572927367687223, 'accuracy': 0.73096}
{'action': 'train', 'mean_loss': 0.003399243003129959, 'accuracy': 0.7489}
{'action': 'train', 'mean_loss': 0.0031827754980325697, 'accuracy': 0.76594}
{'actio

[32m[I 2023-03-20 17:10:28,095][0m Trial 7 finished with value: 0.6877 and parameters: {'num_epoch': 12, 'batch_size': 206, 'fc_units': 1538, 'num_layer': 7, 'activation': 'ReLU', 'optimizer': 'Adam', 'weight_decay': 1.5294588677881859e-06, 'lr': 0.002895641762927132}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.6877}
{'action': 'objective', 'num_epoch': 47, 'activation_name': 'ELU', 'fc_units': 2684, 'num_layer': 7, 'optimizer_name': 'MomentumSGD', 'lr': 0.00034812015644491415, 'weight_decay': 1.215698389346229e-10}
{'action': 'train', 'mean_loss': 0.013584067769050598, 'accuracy': 0.1}
{'action': 'train', 'mean_loss': 0.013578125610351563, 'accuracy': 0.1044}
{'action': 'train', 'mean_loss': 0.013571721925735473, 'accuracy': 0.13314}
{'action': 'train', 'mean_loss': 0.01356360234260559, 'accuracy': 0.13578}
{'action': 'train', 'mean_loss': 0.013552246265411377, 'accuracy': 0.15214}
{'action': 'train', 'mean_loss': 0.013534687094688415, 'accuracy': 0.13574}
{'action': 'train', 'mean_loss': 0.013505833520889281, 'accuracy': 0.14158}
{'action': 'train', 'mean_loss': 0.013452882242202759, 'accuracy': 0.15086}
{'action': 'train', 'mean_loss': 0.013343793807029724, 'accuracy': 0.17348}
{'action': 'train', 'mean_loss': 0.013070450177192688, 'accuracy': 0.19722}
{'ac

[32m[I 2023-03-20 17:20:23,951][0m Trial 8 finished with value: 0.4767 and parameters: {'num_epoch': 47, 'batch_size': 170, 'fc_units': 2684, 'num_layer': 7, 'activation': 'ELU', 'optimizer': 'MomentumSGD', 'weight_decay': 1.215698389346229e-10, 'lr': 0.00034812015644491415}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.4767}
{'action': 'objective', 'num_epoch': 39, 'activation_name': 'ReLU', 'fc_units': 2791, 'num_layer': 7, 'optimizer_name': 'MomentumSGD', 'lr': 0.004506971106135137, 'weight_decay': 1.2948405912708029e-08}
{'action': 'train', 'mean_loss': 0.03347010161399841, 'accuracy': 0.13996}
{'action': 'train', 'mean_loss': 0.028178699748516084, 'accuracy': 0.28212}
{'action': 'train', 'mean_loss': 0.023576230781078338, 'accuracy': 0.41006}
{'action': 'train', 'mean_loss': 0.020550621045827864, 'accuracy': 0.49088}
{'action': 'train', 'mean_loss': 0.018471933641433715, 'accuracy': 0.54524}
{'action': 'train', 'mean_loss': 0.01665387326002121, 'accuracy': 0.59656}
{'action': 'train', 'mean_loss': 0.015062288182973862, 'accuracy': 0.6361}
{'action': 'train', 'mean_loss': 0.013763211636543274, 'accuracy': 0.66994}
{'action': 'train', 'mean_loss': 0.012699568895697594, 'accuracy': 0.69814}
{'action': 'train', 'mean_loss': 0.011529677755832673, 'accuracy': 0.72436}
{

[32m[I 2023-03-20 17:30:02,439][0m Trial 9 finished with value: 0.7111 and parameters: {'num_epoch': 39, 'batch_size': 68, 'fc_units': 2791, 'num_layer': 7, 'activation': 'ReLU', 'optimizer': 'MomentumSGD', 'weight_decay': 1.2948405912708029e-08, 'lr': 0.004506971106135137}. Best is trial 3 with value: 0.7292.[0m


{'action': 'test', 'accuracy': 0.7111}


In [49]:
best_params = study.best_params
best_value = study.best_value
print(best_params)
print(best_value)

{'activation': 'ELU', 'batch_size': 86, 'fc_units': 3241, 'lr': 0.0018303221828338506, 'num_epoch': 45, 'num_layer': 6, 'optimizer': 'MomentumSGD', 'weight_decay': 1.4616122048756746e-06}
0.7292


In [50]:
optuna.visualization.plot_param_importances(
    study=study,
    params=["num_epoch", "batch_size", "fc_units", "num_layer", "activation", "optimizer", "weight_decay", "lr"]
).show()

In [61]:
df = study.trials_dataframe()
df.sort_values("value", ascending=False)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_activation,params_batch_size,params_fc_units,params_lr,params_num_epoch,params_num_layer,params_optimizer,params_weight_decay,state
3,3,0.7292,2023-03-20 16:39:11.568233,2023-03-20 16:49:59.384006,0 days 00:10:47.815773,ELU,86,3241,0.00183,45,6,MomentumSGD,1.461612e-06,COMPLETE
4,4,0.7153,2023-03-20 16:49:59.411419,2023-03-20 16:59:07.558789,0 days 00:09:08.147370,ReLU,201,1496,0.00422,49,6,Adam,0.000693916,COMPLETE
9,9,0.7111,2023-03-20 17:20:23.958202,2023-03-20 17:30:02.416620,0 days 00:09:38.458418,ReLU,68,2791,0.004507,39,7,MomentumSGD,1.294841e-08,COMPLETE
7,7,0.6877,2023-03-20 17:08:07.097007,2023-03-20 17:10:28.075328,0 days 00:02:20.978321,ReLU,206,1538,0.002896,12,7,Adam,1.529459e-06,COMPLETE
2,2,0.6357,2023-03-20 16:33:33.545935,2023-03-20 16:39:11.539991,0 days 00:05:37.994056,ELU,254,2755,0.00012,27,5,Adam,1.874462e-08,COMPLETE
8,8,0.4767,2023-03-20 17:10:28.103540,2023-03-20 17:20:23.928365,0 days 00:09:55.824825,ELU,170,2684,0.000348,47,7,MomentumSGD,1.215698e-10,COMPLETE
5,5,0.3969,2023-03-20 16:59:07.587512,2023-03-20 17:03:54.487231,0 days 00:04:46.899719,ELU,66,3140,0.000116,19,6,MomentumSGD,1.053212e-07,COMPLETE
1,1,0.3707,2023-03-20 16:26:34.364313,2023-03-20 16:33:33.516817,0 days 00:06:59.152504,ReLU,242,2101,0.000353,37,6,MomentumSGD,1.425571e-09,COMPLETE
6,6,0.212,2023-03-20 17:03:54.528334,2023-03-20 17:08:07.070446,0 days 00:04:12.542112,ReLU,104,1808,0.000209,21,6,MomentumSGD,0.0005581012,COMPLETE
0,0,0.1,2023-03-20 16:17:57.490513,2023-03-20 16:26:34.319521,0 days 00:08:36.829008,ELU,164,2091,0.004586,41,7,Adam,9.001636e-05,COMPLETE
