In [1]:
import torch
from lightning_datasets import MnistDataModule
import pytorch_lightning as L
from torch import nn
from torchmetrics import Accuracy
from pytorch_lightning.loggers import MLFlowLogger

In [2]:
class VGG(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.net = VGG.vgg()
        self.loss = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task='multiclass', num_classes=10)
        self.lr = 0.05

    def forward(self, X):
        return self.net(X)

    def training_step(self, batch, batch_idx):
        loss, out, y = self.__common_step(batch, batch_idx)
        train_acc = self.accuracy(out, y)
        self.logger.log_metrics(
            {"training_loss": loss, "train_acc": train_acc})
        return loss

    def validation_step(self, batch, batch_idx):
        loss, out, y = self.__common_step(batch, batch_idx)
        val_acc = self.accuracy(out, y)
        self.logger.log_metrics(
            {"validation_loss": loss, "validation_acc": val_acc})
        return loss

    def test_step(self, batch, batch_idx):
        loss, out, y = self.__common_step(batch, batch_idx)
        test_acc = self.accuracy(out, y)
        self.logger.log_metrics(
            {"test_loss": loss, "test_acc": test_acc})
        return loss

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), self.lr)

    def __common_step(self, batch, batch_idx):
        X, y = batch
        out = self(X)
        loss = self.loss(out, y)
        return loss, out, y

    def vgg_block(num_conv, in_channels, out_channels):
        layers = []
        for _ in range(num_conv):
            layers.append(nn.Conv2d(in_channels, out_channels,
                          kernel_size=3, padding=1))
            layers.append(nn.ReLU())
            in_channels = out_channels
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        return nn.Sequential(*layers)

    def vgg():
        conv_arch = [(1, 64), (1, 128), (2, 256), (2, 512), (2, 512)]
        in_channels = 1

        conv_blks = []

        for num_convs, out_channels in conv_arch:
            conv_blks.append(VGG.vgg_block(
                num_convs, in_channels, out_channels))
            in_channels = out_channels

        return nn.Sequential(
            *conv_blks,
            nn.Flatten(),
            nn.Linear(out_channels*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10)
        )

In [3]:
dataset = MnistDataModule(batch_size=128, image_size=(224, 224))
model = VGG()

logger = MLFlowLogger()

trainer = L.Trainer('gpu', logger=logger, min_epochs=5, max_epochs=10)
trainer.fit(model, dataset)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti SUPER') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type               | Params | Mode 
--------------------------------------------------------
0 | net      | Sequential         | 128 M  | train
1 | loss     | CrossEntropyLoss   | 0      | train
2 | accuracy | MulticlassAccuracy | 0      | train
-------------------------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [4]:
trainer.validate(model, dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |          | 0/? [00:00<?, ?it/s]

[{}]

In [5]:
trainer.test(model, dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{}]