In [9]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

import sig_networks
from sig_networks.ffn import FeedforwardNeuralNetModel
from sig_networks.focal_loss import FocalLoss
from sig_networkstorch_utils import training_pytorch, testing_pytorch

seed = 2023

In [10]:
batch_size = 100

# MNIST
train_dataset = torchvision.datasets.MNIST(
    root="./data",
    train=True,
    transform=transforms.Compose(
        [transforms.ToTensor(), transforms.Lambda(lambda x: torch.flatten(x))]
    ),
    download=True,
)
test_dataset = torchvision.datasets.MNIST(
    root="./data",
    train=False,
    transform=transforms.Compose(
        [transforms.ToTensor(), transforms.Lambda(lambda x: torch.flatten(x))]
    ),
    download=True,
)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [11]:
examples = iter(train_loader)
samples, label = examples.next()
print(samples.shape, label.shape)

torch.Size([100, 784]) torch.Size([100])


In [17]:
# hyperparameters
input_size = 784  # 28X28
hidden_size = [100, 100, 100]
num_classes = 10
num_epochs = 1
learning_rate = 0.001
gamma = 1

In [18]:
ffn_model = FeedforwardNeuralNetModel(
    input_dim=input_size,
    hidden_dim=hidden_size,
    output_dim=num_classes,
    dropout_rate=0.5,
)

# define loss
criterion = FocalLoss(gamma=gamma)
# criterion = torch.nn.CrossEntropyLoss()

# define optimizer
optimizer = torch.optim.Adam(ffn_model.parameters(), lr=learning_rate)

# define scheduler for adjusting the learning rate
scheduler = ReduceLROnPlateau(optimizer, "min")
# scheduler = StepLR(optimizer, step_size = 4, gamma = 0.5)
# scheduler = None

In [19]:
# accuracy before training
pred, label = testing_pytorch(ffn_model, test_loader, criterion)
sum(pred == label) / len(pred)

Accuracy on dataset of size 10000: 10.3100004196167 %.
Average loss: 2.073407211303711


tensor(0.1031)

In [20]:
ffn_model = training_pytorch(
    model=ffn_model,
    train_loader=train_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    scheduler=scheduler,
    seed=seed,
    verbose=True,
    verbose_epoch=1,
)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1/10 || Item: 0/600 || Loss: 2.064084529876709
--------------------------------------------------
##### Epoch: 1/10 || Loss: 0.4937691390514374
--------------------------------------------------
Epoch: 2/10 || Item: 0/600 || Loss: 0.4600626826286316
--------------------------------------------------
##### Epoch: 2/10 || Loss: 0.39700111746788025
--------------------------------------------------
Epoch: 3/10 || Item: 0/600 || Loss: 0.2833329439163208
--------------------------------------------------
##### Epoch: 3/10 || Loss: 0.3118630349636078
--------------------------------------------------
Epoch: 4/10 || Item: 0/600 || Loss: 0.25615835189819336
--------------------------------------------------
##### Epoch: 4/10 || Loss: 0.292373925447464
--------------------------------------------------
Epoch: 5/10 || Item: 0/600 || Loss: 0.16734611988067627
--------------------------------------------------
##### Epoch: 5/10 || Loss: 0.3590061068534851
-----------------------------------

In [21]:
# accuracy after training
pred, label = testing_pytorch(ffn_model, test_loader, criterion)
sum(pred == label) / len(pred)

Accuracy on dataset of size 10000: 95.98999786376953 %.
Average loss: 0.09848488617921249


tensor(0.9599)