# GoogLeNet

Inception block don't change image size, only change channel size.


In [1]:
import torch
from lightning_datasets import MnistDataModule
import pytorch_lightning as L
from torch import nn
from torchmetrics import Accuracy
from pytorch_lightning.loggers import MLFlowLogger
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [2]:
torch.set_float32_matmul_precision('high')

In [3]:
class Inception(nn.Module):
    def __init__(self, in_channels, path1_channels, path2_channels, path3_channels, path4_channels):
        super().__init__()
        self.path1_block1 = nn.Conv2d(
            in_channels, path1_channels, kernel_size=(1, 1))
        self.path2_block1 = nn.Conv2d(
            in_channels, path2_channels[0], kernel_size=(1, 1))
        self.path2_block2 = nn.Conv2d(
            path2_channels[0], path2_channels[1], kernel_size=(3, 3), padding=1)
        self.path3_block1 = nn.Conv2d(
            in_channels, path3_channels[0], kernel_size=1)
        self.path3_block2 = nn.Conv2d(
            path3_channels[0], path3_channels[1], kernel_size=(5, 5), padding=2)
        self.path4_block1 = nn.MaxPool2d(
            kernel_size=(3, 3), stride=1, padding=1)
        self.path4_block2 = nn.Conv2d(
            in_channels, path4_channels, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.path1_block1(x))
        p2 = F.relu(self.path2_block2(F.relu(self.path2_block1(x))))
        p3 = F.relu(self.path3_block2(F.relu(self.path3_block1(x))))
        p4 = F.relu(self.path4_block2(F.relu(self.path4_block1(x))))
        return torch.cat((p1, p2, p3, p4), dim=1)

In [4]:
example_inception_layer = Inception(1, 2, (2, 3), (2, 3), 2)

sample_data = torch.randn((1, 1, 28, 28))

sample_data.shape, example_inception_layer(sample_data).shape

(torch.Size([1, 1, 28, 28]), torch.Size([1, 10, 28, 28]))

In [5]:
class GoogLeNet(L.LightningModule):
    def __init__(self, lr=0.1):
        super().__init__()
        stage1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        stage2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        stage3 = nn.Sequential(
            Inception(192, 64, (96, 128), (16, 32), 32),
            Inception(256, 128, (128, 192), (32, 96), 64),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        stage4 = nn.Sequential(
            Inception(480, 192, (96, 208), (16, 48), 64),
            Inception(512, 160, (112, 224), (24, 64), 64),
            Inception(512, 128, (128, 256), (24, 64), 64),
            Inception(512, 112, (144, 288), (32, 64), 64),
            Inception(528, 256, (160, 320), (32, 128), 128),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        stage5 = nn.Sequential(
            Inception(832, 256, (160, 320), (32, 128), 128),
            Inception(832, 384, (192, 384), (48, 128), 128),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )
        self.net = nn.Sequential(
            stage1,
            stage2,
            stage3,
            stage4,
            stage5,
            nn.Linear(1024, 10)
        )

        self.net.apply(GoogLeNet.__init_vars)
        self.lr = lr
        self.loss = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task='multiclass', num_classes=10)

    def __init_vars(layer):
        if isinstance(layer, nn.Conv2d):
            nn.init.kaiming_uniform_(layer.weight, 0.2)

    def forward(self, X):
        return self.net(X)

    def training_step(self, batch, batch_idx):
        loss, output, y = self.__common_step(batch, batch_idx)
        accuracy = self.accuracy(output, y)
        self.logger.log_metrics({"training_loss": loss, "train_acc": accuracy})
        return loss

    def validation_step(self, batch, batch_idx):
        loss, output, y = self.__common_step(batch, batch_idx)
        accuracy = self.accuracy(output, y)
        self.logger.log_metrics(
            {"validation_loss": loss, "validation_acc": accuracy})
        return loss

    def test_step(self, batch, batch_idx):
        loss, output, y = self.__common_step(batch, batch_idx)
        accuracy = self.accuracy(output, y)
        self.logger.log_metrics({"test_loss": loss, "test_acc": accuracy})
        return loss

    def on_train_start(self):
        self.logger.log_hyperparams({"learning_rate": self.lr})

    def __common_step(self, batch, batch_idx):
        X, y = batch
        output = self(X)
        loss = self.loss(output, y)
        return loss, output, y

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=self.lr)

In [6]:
model = GoogLeNet(lr=0.05)
dataset = MnistDataModule(image_size=(96, 96), batch_size=32)
logger = MLFlowLogger(run_name="GoogLeNet mnist")

trainer = L.Trainer('gpu', logger=logger, max_epochs=10)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [7]:
trainer.fit(model, dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type               | Params | Mode 
--------------------------------------------------------
0 | net      | Sequential         | 6.0 M  | train
1 | loss     | CrossEntropyLoss   | 0      | train
2 | accuracy | MulticlassAccuracy | 0      | train
--------------------------------------------------------
6.0 M     Trainable params
0         Non-trainable params
6.0 M     Total params
23.910    Total estimated model params size (MB)
93        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [8]:
trainer.validate(model, dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |          | 0/? [00:00<?, ?it/s]

[{}]

In [9]:
trainer.test(model, dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{}]