In [153]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision as tv
import torchmetrics
from torchvision import datasets
from torch.utils.data import DataLoader

import pandas as pd
from tqdm import tqdm
from pathlib import Path
from functools import reduce

In [154]:
class FeedForwardClassifier(nn.Module):
  def __init__(self, input_shape, n_classes, hidden_units=1024):
    super(FeedForwardClassifier, self).__init__()
    input_dim = reduce(lambda x, y: x * y, input_shape)
    self.classifier = nn.Sequential(
      nn.Flatten(),
      nn.Linear(input_dim, hidden_units),
      nn.ReLU(),
      nn.Linear(hidden_units, hidden_units),
      nn.ReLU(),
      nn.Linear(hidden_units, n_classes),
    )

  def forward(self, x):
    return self.classifier(x)

In [155]:
class CNNClassifier(nn.Module):
  def __init__(self, input_shape, n_classes, cnn_hidden_units=32, linear_hidden_units=1024):
    super(CNNClassifier, self).__init__()
    self.conv_block1 = nn.Sequential(
      nn.Conv2d(input_shape[0], cnn_hidden_units, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.Conv2d(cnn_hidden_units, cnn_hidden_units, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.conv_block2 = nn.Sequential(
      nn.Conv2d(cnn_hidden_units, cnn_hidden_units, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.Conv2d(cnn_hidden_units, cnn_hidden_units, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(2),
    )
    self.classifier = nn.Sequential(
      nn.Flatten(),
      nn.Linear(cnn_hidden_units * 7 * 7, linear_hidden_units),
      nn.Sigmoid(),
      nn.Linear(linear_hidden_units, n_classes)
    )

  def forward(self, x):
    x = self.conv_block1(x)
    x = self.conv_block2(x)

    x = x.view(x.size(0), -1)

    x = self.classifier(x)

    return x

In [156]:
example_datasets = (datasets.EMNIST, datasets.FashionMNIST, datasets.CIFAR100)
models = (FeedForwardClassifier, CNNClassifier)

In [157]:
device = (
  'cuda'
  if torch.cuda.is_available()
  else 'mps'
  if torch.backends.mps.is_available()
  else 'cpu'
)

In [158]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
  """Calculates accuracy between truth labels and predictions.

  Args:
      y_true (torch.Tensor): Truth labels for predictions.
      y_pred (torch.Tensor): Predictions to be compared to predictions.

  Returns:
      [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
  """
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct / len(y_pred)) * 100
  return acc

In [159]:
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn):
    """Returns a dictionary containing the results of model predicting on data_loader.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            # Make predictions with the model
            y_pred = model(x)
            
            # Accumulate the loss and accuracy values per batch
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, 
                                y_pred=y_pred.argmax(dim=1)) # For accuracy, need the prediction labels (logits -> pred_prob -> pred_labels)
        
        # Scale loss and acc to find the average loss/acc per batch
        loss /= len(data_loader)
        acc /= len(data_loader)
        
    return {"name": model.__class__.__name__, # only works when model was created with a class
            "loss": loss.item(),
            "acc": acc}

In [160]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

In [161]:
data_dir = Path('data')

batch_size = 32
epochs = 10

torch.manual_seed(42)

results_df = pd.DataFrame()

for Dataset in tqdm(example_datasets, desc='Datasets'):
  train_data = datasets.FashionMNIST(
    root=data_dir / Dataset.__name__,
    train=True,
    download=True,
    transform=tv.transforms.ToTensor(),
    target_transform=None,
  )
  test_data = datasets.FashionMNIST(
    root=data_dir / Dataset.__name__,
    train=False,
    download=True,
    transform=tv.transforms.ToTensor(),
  )

  image, label = train_data[0]
  input_shape = image.shape
  print(input_shape)
  class_names = train_data.classes

  train_dataloader = DataLoader(
    dataset=train_data,
    batch_size=batch_size,
    shuffle=True,
  )
  test_dataloader = DataLoader(
    dataset=test_data,
    batch_size=batch_size,
    shuffle=False,
  )

  for Model in tqdm(models, desc='Models', leave=False):
    model = Model(input_shape=image.shape, n_classes=len(class_names)).to(device)
    print(get_n_params(model))
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1)

    for epoch in tqdm(range(epochs), desc='Epochs', leave=False):
      print(f"Epoch: {epoch}\n-------")

      train_loss = 0

      for batch, (x, y) in enumerate(train_dataloader):
        x, y = x.to(device), y.to(device)

        model.train()

        y_pred = model(x)

        loss = loss_fn(y_pred, y)
        train_loss += loss

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        if batch % 400 == 0:
          print(f"Looked at {batch * len(x)}/{len(train_dataloader.dataset)} samples")

      train_loss /= len(train_dataloader)

      test_loss, test_acc = 0, 0

      model.eval()

      with torch.inference_mode():
        for x, y in test_dataloader:
          x, y = x.to(device), y.to(device)

          test_pred = model(x)

          test_loss += loss_fn(test_pred, y)

          test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))

          test_loss /= len(test_dataloader)
          test_acc /= len(test_dataloader)

      print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")

    results = eval_model(model, test_dataloader, loss_fn, accuracy_fn)
    row = pd.DataFrame({
      'dataset': Dataset.__name__,
      'model': Model.__name__,
      'loss': results['loss'],
      'acc': results['acc'],
    }, index=[0])
    results_df = pd.concat([results_df, row], ignore_index=True)

results_df

Datasets:   0%|          | 0/3 [00:00<?, ?it/s]

torch.Size([1, 28, 28])




1863690



[A

Epoch: 0
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.55904 | Test loss: 0.00073, Test acc: 0.28%

Epoch: 1
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.38551 | Test loss: 0.00062, Test acc: 0.30%

Epoch: 2
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.34275 | Test loss: 0.00053, Test acc: 0.32%

Epoch: 3
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.31506 | Test loss: 0.00055, Test acc: 0.30%

Epoch: 4
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.29562 | Test loss: 0.00037, Test acc: 0.32%

Epoch: 5
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.27936 | Test loss: 0.00028, Test acc: 0.32%

Epoch: 6
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.26459 | Test loss: 0.00038, Test acc: 0.30%

Epoch: 7
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.25269 | Test loss: 0.00035, Test acc: 0.32%

Epoch: 8
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.24124 | Test loss: 0.00049, Test acc: 0.30%

Epoch: 9
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



100%|██████████| 10/10 [00:54<00:00,  5.47s/it]


Train loss: 0.23150 | Test loss: 0.00029, Test acc: 0.30%






1644970



[A

Epoch: 0
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.72616 | Test loss: 0.00087, Test acc: 0.28%

Epoch: 1
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.38950 | Test loss: 0.00061, Test acc: 0.30%

Epoch: 2
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.31791 | Test loss: 0.00026, Test acc: 0.32%

Epoch: 3
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.27079 | Test loss: 0.00023, Test acc: 0.32%

Epoch: 4
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.24183 | Test loss: 0.00036, Test acc: 0.32%

Epoch: 5
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.21774 | Test loss: 0.00041, Test acc: 0.30%

Epoch: 6
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.19629 | Test loss: 0.00037, Test acc: 0.30%

Epoch: 7
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.18149 | Test loss: 0.00030, Test acc: 0.30%

Epoch: 8
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.16807 | Test loss: 0.00055, Test acc: 0.28%

Epoch: 9
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



100%|██████████| 10/10 [01:28<00:00,  8.84s/it]


Train loss: 0.15263 | Test loss: 0.00047, Test acc: 0.30%




Datasets:  33%|███▎      | 1/3 [02:24<04:49, 144.78s/it]

torch.Size([1, 28, 28])




1863690



[A

Epoch: 0
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.55474 | Test loss: 0.00086, Test acc: 0.30%

Epoch: 1
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.38721 | Test loss: 0.00079, Test acc: 0.28%

Epoch: 2
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.34466 | Test loss: 0.00056, Test acc: 0.30%

Epoch: 3
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.31456 | Test loss: 0.00038, Test acc: 0.32%

Epoch: 4
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.29508 | Test loss: 0.00052, Test acc: 0.28%

Epoch: 5
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.27746 | Test loss: 0.00036, Test acc: 0.30%

Epoch: 6
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.26496 | Test loss: 0.00051, Test acc: 0.28%

Epoch: 7
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.25369 | Test loss: 0.00043, Test acc: 0.30%

Epoch: 8
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.24157 | Test loss: 0.00036, Test acc: 0.30%

Epoch: 9
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



100%|██████████| 10/10 [00:53<00:00,  5.33s/it]


Train loss: 0.23231 | Test loss: 0.00015, Test acc: 0.32%






1644970



[A

Epoch: 0
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.70671 | Test loss: 0.00064, Test acc: 0.30%

Epoch: 1
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.37743 | Test loss: 0.00039, Test acc: 0.30%

Epoch: 2
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.30688 | Test loss: 0.00041, Test acc: 0.32%

Epoch: 3
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.26737 | Test loss: 0.00043, Test acc: 0.32%

Epoch: 4
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.23978 | Test loss: 0.00045, Test acc: 0.32%

Epoch: 5
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.21867 | Test loss: 0.00036, Test acc: 0.30%

Epoch: 6
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.20047 | Test loss: 0.00043, Test acc: 0.32%

Epoch: 7
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.18365 | Test loss: 0.00021, Test acc: 0.32%

Epoch: 8
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.16986 | Test loss: 0.00043, Test acc: 0.32%

Epoch: 9
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



100%|██████████| 10/10 [01:29<00:00,  8.92s/it]


Train loss: 0.15550 | Test loss: 0.00017, Test acc: 0.32%




Datasets:  67%|██████▋   | 2/3 [04:49<02:24, 144.45s/it]

torch.Size([1, 28, 28])




1863690



[A

Epoch: 0
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.56055 | Test loss: 0.00083, Test acc: 0.28%

Epoch: 1
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.38707 | Test loss: 0.00078, Test acc: 0.28%

Epoch: 2
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.34294 | Test loss: 0.00098, Test acc: 0.28%

Epoch: 3
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.31531 | Test loss: 0.00052, Test acc: 0.32%

Epoch: 4
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.29594 | Test loss: 0.00094, Test acc: 0.30%

Epoch: 5
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.27810 | Test loss: 0.00080, Test acc: 0.30%

Epoch: 6
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.26568 | Test loss: 0.00031, Test acc: 0.32%

Epoch: 7
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.25223 | Test loss: 0.00039, Test acc: 0.30%

Epoch: 8
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.24073 | Test loss: 0.00017, Test acc: 0.32%

Epoch: 9
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



100%|██████████| 10/10 [00:54<00:00,  5.46s/it]


Train loss: 0.23102 | Test loss: 0.00032, Test acc: 0.32%






1644970



[A

Epoch: 0
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.68596 | Test loss: 0.00103, Test acc: 0.30%

Epoch: 1
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.38009 | Test loss: 0.00036, Test acc: 0.32%

Epoch: 2
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.31033 | Test loss: 0.00037, Test acc: 0.32%

Epoch: 3
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.26831 | Test loss: 0.00045, Test acc: 0.32%

Epoch: 4
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.24073 | Test loss: 0.00033, Test acc: 0.32%

Epoch: 5
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.21704 | Test loss: 0.00027, Test acc: 0.32%

Epoch: 6
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.19921 | Test loss: 0.00017, Test acc: 0.32%

Epoch: 7
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.18044 | Test loss: 0.00025, Test acc: 0.32%

Epoch: 8
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



[A


Train loss: 0.16918 | Test loss: 0.00024, Test acc: 0.32%

Epoch: 9
-------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples



100%|██████████| 10/10 [01:33<00:00,  9.39s/it]


Train loss: 0.15579 | Test loss: 0.00020, Test acc: 0.32%




Datasets: 100%|██████████| 3/3 [07:19<00:00, 146.41s/it]


Unnamed: 0,dataset,model,loss,acc
0,EMNIST,FeedForwardClassifier,0.317328,88.678115
1,EMNIST,CNNClassifier,0.247236,91.144169
2,FashionMNIST,FeedForwardClassifier,0.332177,88.26877
3,FashionMNIST,CNNClassifier,0.250672,90.854633
4,CIFAR100,FeedForwardClassifier,0.307631,89.247204
5,CIFAR100,CNNClassifier,0.253376,90.904553
