In [1]:
import os

import pytorch_lightning as pl
import torch
from dotenv import load_dotenv

load_dotenv("../.env")

dataset_folder = os.environ["DATASET_FOLDER"]

In [3]:
from pl_bolts.datamodules.vision_datamodule import VisionDataModule

In [7]:
from typing import Any, Callable, Optional, Union

In [48]:
from typing import Any, Callable, List, Optional, Tuple, Union

import pytorch_lightning as pl
import torch
from torch.utils.data import DataLoader, Dataset, Subset, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder


class Logo2kDataModule(pl.LightningDataModule):
    name = "Logo2k+"
    dims = (3, 256, 256)

    def __init__(
        self,
        data_dir: str,
        train_val_test_split: Union[
            Tuple[float, float, float], Tuple[int, int, int]
        ] = (0.7, 0.2, 0.1),
        num_workers: int = 16,
        batch_size: int = 32,
        seed: int = 42,
        shuffle: bool = False,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        super().__init__(
            *args,
            **kwargs,
        )
        self.data_dir = data_dir
        self.train_val_test_split = train_val_test_split
        self.num_workers = num_workers
        self.batch_size = batch_size
        self.seed = seed
        self.shuffle = shuffle

    @property
    def num_classes(self) -> int:
        return 10

    def setup(self, stage: Optional[str] = None) -> None:
        dataset = ImageFolder(self.data_dir)
        splits = self._get_splits(dataset)
        train, val, test = random_split(
            dataset, splits, generator=torch.Generator().manual_seed(self.seed)
        )
        if stage == "fit" or stage is None:
            self.train_dataset = SubsetTransform(
                train,
                self.test_transforms,
            )
            self.val_dataset = SubsetTransform(val, self.test_transforms)

        if stage == "test" or stage is None:
            self.test_dataset = SubsetTransform(test, self.test_transforms)

    def train_dataloader(self):
        return self._data_loader(self.train_dataset, shuffle=True)

    def val_dataloader(self):
        return self._data_loader(self.val_dataset)

    def test_dataloader(self):
        return self._data_loader(self.test_dataset)

    @property
    def test_transforms(self) -> Callable:
        return transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
                ),
            ]
        )

    def _get_splits(self, dataset: Dataset) -> List[int]:
        train, val, test = self.train_val_test_split
        if isinstance(train, int):
            train, val, test = self.train_val_test_split
            return [train, val, test]  # type: ignore
        elif isinstance(train, float):
            dataset_len = len(dataset)  # type: ignore[arg-type]
            train_len = int(train * dataset_len)
            val_len = int(test * dataset_len)
            test_len = dataset_len - train_len - val_len

            return [train_len, val_len, test_len]
        else:
            raise ValueError(f"Unsupported type {type(self.train_val_test_split[0])}")

    def _data_loader(self, dataset: Dataset, shuffle: bool = False) -> DataLoader:
        return DataLoader(
            dataset,
            batch_size=self.batch_size,
            shuffle=shuffle,
            num_workers=self.num_workers,
        )


class SubsetTransform(Dataset):
    def __init__(self, subset: Subset, transforms: Optional[Callable] = None):
        self.subset = subset
        self.transforms = transforms

    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transforms:
            x = self.transforms(x)
        return x, y

    def __len__(self):
        return len(self.subset)


In [49]:
dm = Logo2kDataModule(dataset_folder)

In [62]:
from torchvision import models
import pytorch_lightning as pl

import torch.nn.functional as F

from torch import nn, optim


class Logo2kTransferModel(pl.LightningModule):
    def __init__(self, output_classes: int, lr: float = 1e-4):
        super().__init__()

        self.save_hyperparameters()
        self.output_classes = output_classes
        self.lr = lr

        # Load pretrained model (except for final layer), and freeze it
        backbone = models.resnet50(pretrained=True)
        backbone.eval()
        for param in backbone.parameters():
            param.requires_grad = False

        num_filters = backbone.fc.in_features
        layers = list(backbone.children())[:-1]
        self.feature_extractor = nn.Sequential(*layers)

        self.classifier = nn.Linear(num_filters, self.output_classes)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = F.softmax(x, dim=1)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        self.log("val_loss", loss, on_step=True, on_epoch=True, logger=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        self.log("test_loss", loss, on_step=True, on_epoch=True, logger=True)

        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.lr)

In [59]:
model = Logo2kTransferModel(dm.num_classes)

In [60]:
trainer = pl.Trainer(gpus=-1, max_epochs=10)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [61]:
trainer.fit(model, dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type       | Params
-------------------------------------------------
0 | feature_extractor | Sequential | 23.5 M
1 | classifier        | Linear     | 20.5 K
-------------------------------------------------
20.5 K    Trainable params
23.5 M    Non-trainable params
23.5 M    Total params
94.114    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

AttributeError: 'NoneType' object has no attribute 'dim'