In [1]:
try:
    import torch as t
    import torch.nn as tnn
except ImportError:
    print("Colab users: pytorch comes preinstalled. Select Change Runtime > T4 GPU")
    print("Local users: Please install pytorch for your hardware using instructions from here: https://pytorch.org/get-started/locally/")
    print("ACG users: Please follow instructions here: https://vikasdhiman.info/ECE490-Neural-Networks/posts/0000-00-06-acg-slurm-jupyter/")
    
    raise

if t.cuda.is_available():
    DEVICE="cuda"
elif t.mps.is_available():
    DEVICE="mps"
else:
    DEVICE="cpu"
    
DTYPE = t.get_default_dtype()

## Dataset

In [2]:
## Doing it the Pytorch way without using our custom feature extraction
import torch
import torch.nn
import torch.optim
import torchvision
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

#torch.manual_seed(17) # Only use during debugging

# Getting the dataset, the Pytorch way
all_training_data = torchvision.datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = torchvision.datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [3]:
training_data, validation_data = torch.utils.data.random_split(all_training_data, [0.9, 0.1])

## Hyper Parameters

In [4]:
# Hyper parameters
learning_rate = 1e-3 # controls how fast the 
batch_size = 64
epochs = 10
momentum = 0.9

## Model

In [5]:
training_dataloader = DataLoader(training_data, shuffle=True, batch_size=batch_size)
validation_dataloader = DataLoader(validation_data,  batch_size=batch_size)
test_dataloader = DataLoader(test_data,  batch_size=batch_size)

In [6]:
# TODO:
# Define model = ?
class MLPNetwork(torch.nn.Module):
    def __init__(self, hidden_size=10, nclasses=10, input_size=28*28):
        super().__init__()
        self._layers = torch.nn.ModuleList([torch.nn.Flatten(),
            tnn.Linear(input_size, hidden_size),
            tnn.ReLU(),
            tnn.Linear(hidden_size, nclasses)])
    def forward(self, x):
        for l in self._layers:
            xnext = l(x) # call the layers in sequence
            x = xnext
        return x
model = MLPNetwork()

# alternatively you can also
# hidden_size=10
# nclasses=10
# input_size=28*28
# model = torch.nn.Sequential(torch.nn.Flatten(),
#            tnn.Linear(input_size, hidden_size),
#            tnn.ReLU(),
#            tnn.Linear(hidden_size, nclasses))
# 

# Loss function

In [7]:
loss = torch.nn.CrossEntropyLoss()

## Training

In [None]:
# Define optimizer
# Define learning_rate scheduler
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")

def loss_and_accuracy(model, loss, validation_dataloader, device=DEVICE):
    # Validation loop
    validation_size = len(validation_dataloader.dataset)
    num_batches = len(validation_dataloader)
    test_loss, correct = 0, 0

    model.eval()
    with torch.no_grad():
        for X, y in validation_dataloader:
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            test_loss += loss(pred, y).item()
            correct += (pred.argmax(dim=-1) == y).type(DTYPE).sum().item()
    model.train()
    test_loss /= num_batches
    correct /= validation_size
    return test_loss, correct
    
def train(model, loss, training_dataloader, validation_dataloader, device=DEVICE):
    model.to(device)
    train_losses = []
    valid_losses = []
    model.train()
    for t in range(epochs):
        # Train loop
        training_size = len(training_dataloader.dataset)
        for batch, (X, y) in enumerate(training_dataloader):
            X = X.to(device)
            y = y.to(device)
            # Compute prediction and loss
            pred = model(X)
            loss_t = loss(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss_t.backward()
            optimizer.step()

        valid_loss, correct = loss_and_accuracy(model, loss, validation_dataloader, device=device)
        #scheduler.step(valid_loss) 
        valid_losses.append(valid_loss)
    
        loss_t = loss_t.item()
        print(f"loss: {loss_t:>7f}", end="\r")
        train_losses.append(loss_t)
        
        print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {valid_loss:>8f} \n")
    return model, train_losses, valid_losses
        
trained_model, train_losses, valid_losses = train(model, loss, training_dataloader, validation_dataloader)

test_loss, correct = loss_and_accuracy(model, loss, test_dataloader)
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_losses, 'r', label='train')
plt.plot(valid_losses, 'b', label='validation')
plt.legend()

In [None]:
X, _ = next(iter(test_dataloader))
X.shape

In [None]:
import matplotlib.pyplot as plt
plt.imshow(X[0, 0])

In [None]:
print("The predicted image label is ", model(X.to(DEVICE)).argmax(dim=-1)[0].item())