# PyTorch Neural Network -- fashion MNIST [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/x-datascience-datacamp/datacamp-master/blob/main/12-deep-learning/01-pytorch-neural-network.ipynb)

Authors: [Thomas Moreau](https://tommoral.github.io)
        [Pedro L. C. Rodrigues](https://plcrodrigues.github.io)


Notebook inspired from materials from [PyTorch tutorial](https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html).

Start by importing helpers from `torch` and `torchvision`:

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Compose, Normalize
from torchvision.transforms import RandomHorizontalFlip, RandomCrop

### Load data - Fashion MNIST

Fashion MNIST is a dataset with images of clothes from 10 categories:

In [None]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",     # Where to store the data
    train=True,      # Which part of the dataset to load (train set)
    download=True,   # Download the data if necessary
    # Transform for data augmetation.
    transform=Compose([
        RandomHorizontalFlip(),
        RandomCrop(size=28, padding=4),
        ToTensor(),
        Normalize([0.2700], [0.3500]),
    ]),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False, 
    download=True,
    transform=Compose([ToTensor(), Normalize((0.2700,), (0.3500,))])
)

print("Classes:", training_data.classes)

print(f"Training samples: {len(training_data) / 1000:.0f}k\n"
      f"Test samples: {len(test_data) / 1000:.0f}k")

In [None]:
print(f"Type: {type(training_data[0])}\n")
X, y = training_data[0]
print(f"X shape: {X.shape}\ny: {y}")

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 5, figsize=(15, 5))
for i, ax in enumerate(axes.flatten()):
    ax.imshow(training_data[i][0][0], cmap='gray')

### Accessing batch of images -- DataLoader

To train the network, we want to use SGD with minibatch of data.
A convenient way to iterate through the samples is to use a `DataLoader`:

In [None]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

X, y = next(iter(test_dataloader))
print(f"Shape of X [N, C, H, W]: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")

### Creating a Network -- nn.Module

Now, we will define a simple feedforward fully connected model:

In [None]:
# Define model
class NeuralNetwork(nn.Module):

    def __init__(self):
        super().__init__()

        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()
print(model)

### Using the right computation architecture - CPU vs GPU

In [None]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = model.to(device)

### Defining the training procedure -- Loss and Optimizer

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
def train_epoch(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [None]:
def eval_model(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [None]:
epochs = 5

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
for t in range(epochs):
    print(f"Epoch {t+1}\n{'-' * 20}")
    train_epoch(train_dataloader, model, loss_fn, optimizer)
    eval_model(test_dataloader, model, loss_fn)
print("Done!")

### Computational bottleneck for NN training

We now explore the computational bottleneck for training the network with `snakeviz`.

_Note_ this extension can be installed with `pip install snakeviz`.

In [None]:
%load_ext snakeviz

In [None]:
%%snakeviz

epochs = 2
num_workers = 1 # set it to 8 to see what it changes

train_dataloader = DataLoader(training_data, batch_size=batch_size, num_workers=num_workers)
test_dataloader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_epoch(train_dataloader, model, loss_fn, optimizer)
    eval_model(test_dataloader, model, loss_fn)
print("Done!")