# Pytorch Lighning
1. This package helps us skip and not worry about when to set model to train or evaluation mode -
    - `model.train()`
    - `model.eval()`
    
    can be excluded

2. Also not worry about setting the device for GPU (`.to(device)`)
3. Easy GPU/TPU support
4. Easy scsling of GPUs
5. We can completely not worry about the back prop and update steps -
    - `optimizer.zero_grad()`
    - `loss.backward()`
    - `optimizer.step()`

6. No need to worry about using `torch.no_grad()` and `x.detach()` for stopping gradient calculations
7. BONUS:
    - Integrated Tensorboard support
    - Prints tips and hints

In [1]:
import pytorch_lightning as pl
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from pytorch_lightning import Trainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Hyper-parameters
input_size = 28*28
hidden_size = 100
num_classes = 10
num_epochs = 10
batch_size = 128
learning_rate = 0.001

In [3]:
class LightningNeuralNet(pl.LightningModule):
    def __init__(self, input_size, hidden_size, num_classes):
        super(LightningNeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no activation and no softmax at the end
        return out

    def training_step(self, batch, batch_idx):
        images, labels = batch
        images = images.reshape(-1, 28 * 28)

        # Forward pass
        outputs = self(images)
        loss = F.cross_entropy(outputs, labels)
        
        tensorboard_logs = {'train_loss': loss}
        # use key 'log'
        return {"loss": loss, 'log': tensorboard_logs}
        # tensorboard_logs = {'train_loss': loss}
        # return {'loss':loss} # {'loss': loss, 'log': tensorboard_logs}
        # Needs to return it as a dictionary

    # define what happens for testing here

    def train_dataloader(self):
        # MNIST dataset
        train_dataset = torchvision.datasets.MNIST(
            root="datasets", train=True, transform=transforms.ToTensor(), download=True
        )
        # Data loader
        train_loader = torch.utils.data.DataLoader(
            dataset=train_dataset, batch_size=batch_size, num_workers=4, shuffle=False
        )
        return train_loader

    def val_dataloader(self):
        test_dataset = torchvision.datasets.MNIST(
            root="datasets", train=False, transform=transforms.ToTensor()
        )

        test_loader = torch.utils.data.DataLoader(
            dataset=test_dataset, batch_size=batch_size, num_workers=4, shuffle=False
        )
        return test_loader
    
    def validation_step(self, batch, batch_idx):
        images, labels = batch
        images = images.reshape(-1, 28 * 28)

        # Forward pass
        outputs = self(images)
                        
        loss = F.cross_entropy(outputs, labels)
        return {"val_loss": loss}
    
    def validation_epoch_end(self, outputs):
        # outputs = list of dictionaries
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'avg_val_loss': avg_loss}
        # use key 'log'
        return {'val_loss': avg_loss, 'log': tensorboard_logs}
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=learning_rate)

In [4]:
# One batch at a time
trainer = Trainer(fast_dev_run=True, gpus = 1, accelerator='mps')
model = LightningNeuralNet(input_size, hidden_size, num_classes)
trainer.fit(model)

  rank_zero_deprecation(
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.

  | Name | Type   | Params
--------------------------------
0 | l1   | Linear | 78.5 K
1 | relu | ReLU   | 0     
2 | l2   | Linear | 1.0 K 
--------------------------------
79.5 K    Trainable params
0         Non-trainable params
79.5 K    Total params
0.318     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 0: 100%|██████████| 2/2 [00:01<00:00,  1.46it/s, loss=2.32, v_num=]

`Trainer.fit` stopped: `max_steps=1` reached.


Epoch 0: 100%|██████████| 2/2 [00:01<00:00,  1.46it/s, loss=2.32, v_num=]


In [7]:
trainer = Trainer(fast_dev_run=False, max_epochs=num_epochs, gpus = 1, accelerator='cpu', callbacks=[pl.callbacks.TQDMProgressBar(refresh_rate=10)])
model = LightningNeuralNet(input_size, hidden_size, num_classes)
trainer.fit(model)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name | Type   | Params
--------------------------------
0 | l1   | Linear | 78.5 K
1 | relu | ReLU   | 0     
2 | l2   | Linear | 1.0 K 
--------------------------------
79.5 K    Trainable params
0         Non-trainable params
79.5 K    Total params
0.318     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 548/548 [00:02<00:00, 220.71it/s, loss=0.0381, v_num=20]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 548/548 [00:02<00:00, 220.37it/s, loss=0.0381, v_num=20]


`auto_lr_find` finds the best lr for the data

In [8]:
trainer = Trainer(auto_lr_find=True, fast_dev_run=False, max_epochs=num_epochs, gpus = 1, accelerator='cpu', callbacks=[pl.callbacks.TQDMProgressBar(refresh_rate=10)])
model = LightningNeuralNet(input_size, hidden_size, num_classes)
trainer.fit(model)

  rank_zero_deprecation(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name | Type   | Params
--------------------------------
0 | l1   | Linear | 78.5 K
1 | relu | ReLU   | 0     
2 | l2   | Linear | 1.0 K 
--------------------------------
79.5 K    Trainable params
0         Non-trainable params
79.5 K    Total params
0.318     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 548/548 [00:02<00:00, 229.40it/s, loss=0.0367, v_num=21]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 548/548 [00:02<00:00, 229.04it/s, loss=0.0367, v_num=21]


In [6]:
images, _ = next(iter(model.train_dataloader()))
images = images.reshape(-1, 28*28)
pred = model(images)
pred = torch.max(pred, 1)[1]
pred

tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1,
        1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5,
        3, 3, 3, 0, 7, 4, 9, 8, 0, 9, 4, 1, 4, 4, 6, 0, 4, 5, 6, 1, 0, 0, 1, 7,
        1, 6, 3, 0, 2, 1, 1, 7, 0, 0, 2, 6, 7, 8, 3, 9, 0, 4, 6, 7, 4, 6, 8, 0,
        7, 8, 3, 1, 5, 7, 1, 7, 1, 1, 6, 3, 0, 2, 9, 3, 1, 1, 0, 4, 9, 2, 0, 0,
        2, 0, 2, 7, 1, 8, 6, 4])