In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

## Dataloader

In [2]:
train_data = datasets.FashionMNIST(root= 'data', train= True, download= True, transform = ToTensor())
test_data = datasets.FashionMNIST(root= 'data', train= False, download= True, transform = ToTensor())

In [3]:
#create dataloader
batch_size = 64

train_data_loaders = DataLoader(train_data, batch_size= batch_size, shuffle= True)
test_data_loaders = DataLoader(test_data, batch_size= batch_size)

for X, y in test_data_loaders :
    print(f"Shape of X [N, C, H, W] : {X.shape}")
    print(f"Shape of y {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W] : torch.Size([64, 1, 28, 28])
Shape of y torch.Size([64]) torch.int64


60000

## Creating Models

In [4]:
#Get CPU or GPU device for training
device = "cuda" if torch.cuda.is_available() else 'cpu'
print(f'Using device {device}')


#Define the model

class NeuralNetwork(nn.Module) :
    
    def __init__(self) :
        
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features= 28*28, out_features= 512),
            nn.ReLU(),
            nn.Linear(in_features= 512, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features= 512, out_features= 10),
            nn.ReLU()
        )
        
    def forward(self, x) :
        x = self.flatten(x)
        
        logits = self.linear_relu_stack(x)
        return logits
    

Using device cuda


In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


## Optimizing Model Parameters

In [7]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)

In [8]:
def train(dataloader, model, loss_fn, optimizer) :
    
    size = len(dataloader.dataset) #total number of rows in dataset
    '''tells your model that you are training the model. So effectively layers like dropout, batchnorm etc. 
    which behave different on the train and test procedures know what is going on and hence can behave accordingly.
    '''
    model.train() 
    for batch, (X,y) in enumerate(dataloader) :
        X, y = X.to(device), y.to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0 :
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        

In [11]:
def test(dataloader, model, loss_fn) :
    
    size = len(dataloader.dataset)  #total number of rows in dataset
    num_batches = len(dataloader) #total number of batches in dataset
    model.eval() # to tell that you are testing the model
    test_loss, correct = 0, 0
    with torch.no_grad() :
        for X, y in dataloader :
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred,y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

60000

In [17]:
import time
start_time = time.time()
epochs = 5
for t in range(epochs) :
    print(f'Epoch {t+1}\n-----------------------')
    train(train_data_loaders, model, loss_fn, optimizer)
    test(test_data_loaders, model, loss_fn)
    

print(f'Time Taken {(time.time() - start_time)/60}')
print('Completed')

Epoch 1n-----------------------
loss: 2.304558  [    0/60000]
loss: 1.447910  [ 6400/60000]
loss: 1.227303  [12800/60000]
loss: 1.173591  [19200/60000]
loss: 1.209198  [25600/60000]
loss: 1.113881  [32000/60000]
loss: 1.208611  [38400/60000]
loss: 0.885400  [44800/60000]
loss: 1.298644  [51200/60000]
loss: 1.199545  [57600/60000]
Test Error: 
 Accuracy: 56.6%, Avg loss: 1.175118 

Epoch 2n-----------------------
loss: 1.070366  [    0/60000]
loss: 1.215748  [ 6400/60000]
loss: 1.060885  [12800/60000]
loss: 0.973287  [19200/60000]
loss: 1.035190  [25600/60000]
loss: 1.029123  [32000/60000]
loss: 1.161226  [38400/60000]
loss: 1.398678  [44800/60000]
loss: 1.088459  [51200/60000]
loss: 1.233141  [57600/60000]
Test Error: 
 Accuracy: 55.8%, Avg loss: 1.192575 

Epoch 3n-----------------------
loss: 1.197509  [    0/60000]
loss: 1.028413  [ 6400/60000]
loss: 1.340223  [12800/60000]
loss: 1.063067  [19200/60000]
loss: 1.165577  [25600/60000]
loss: 0.986293  [32000/60000]
loss: 0.909631  [384

## Saving & Loading the Model

In [18]:
torch.save(model.state_dict(), 'model.pth')
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [19]:
#load the model
model = NeuralNetwork()
model.load_state_dict(torch.load('model.pth'))

<All keys matched successfully>

## Making Predictions

In [20]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

In [22]:
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad() :
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
