In [1]:
import torch
from torch import nn
import pytorch_lightning as L
from torch.utils import data
from torchvision.transforms import v2
import torchvision
import random
import matplotlib.pyplot as plt
from torchmetrics import Metric
import torchmetrics

In [2]:
class MnistDataModule(L.LightningDataModule):
    def __init__(self):
        super().__init__()

        self.data_dir = "./data"
        self.batch_size = 256
        self.num_workers = 16
        self.trans = v2.Compose([v2.ToImage(), v2.ToDtype(
            torch.float32, True), v2.Resize((224, 224))])

    def prepare_data(self):
        torchvision.datasets.FashionMNIST(
            root=self.data_dir, train=True, download=True)
        torchvision.datasets.FashionMNIST(
            root=self.data_dir, train=False, download=True)

    def setup(self, stage):
        entire_dataset = torchvision.datasets.FashionMNIST(
            root=self.data_dir, train=True, transform=self.trans, download=False)

        train_dataset, validation_dataset = data.random_split(
            entire_dataset, [50000, 10000])
        self.train_ds = train_dataset
        self.val_ds = validation_dataset

        self.test_ds = torchvision.datasets.FashionMNIST(
            root=self.data_dir, train=False, download=True)

    def train_dataloader(self):
        return data.DataLoader(self.train_ds, 256, True, num_workers=16, persistent_workers=True)

    def val_dataloader(self):
        return data.DataLoader(self.val_ds, 256, False, num_workers=16, persistent_workers=True)

    def test_dataloader(self):
        return data.DataLoader(self.test_ds, 256, False, num_workers=16, persistent_workers=True)

In [3]:
class AlexNet(L.LightningModule):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.net = nn.Sequential(
            nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten(),
            nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
            nn.Linear(4096, 10)
        )

        self.loss = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(
            task="multiclass", num_classes=10)

    def forward(self, X):
        return self.net(X)

    def training_step(self, batch, batch_idx):
        X, y = batch
        output = self(X)
        loss = self.loss(output, y)
        accuracy = self.accuracy(output, y)
        self.log_dict({"training_loss": loss, "train_accuracy": accuracy},
                      on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        X, y = batch
        output = self(X)
        loss = self.loss(output, y)
        accuracy = self.accuracy(output, y)
        self.log_dict({"validation_loss":loss,"validation_accuracy":accuracy})
        return loss

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=0.01)

In [4]:
model = AlexNet()
data_module = MnistDataModule()

trainer = L.Trainer(accelerator='gpu', max_epochs=10)
trainer.fit(model, data_module)

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
f:\code\deep-learning\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti SUPER') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [6]:
trainer.validate(model,data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   validation_accuracy      0.7077000141143799
     validation_loss        0.7692745327949524
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'validation_loss': 0.7692745327949524,
  'validation_accuracy': 0.7077000141143799}]