In [6]:
from pathlib import Path
from datasets import load_dataset, Image

DATA_DIR = Path("/kaggle/input/asl-alphabet")
TRAIN_DIR = DATA_DIR.joinpath("asl_alphabet_train", "asl_alphabet_train")

TRAIN_DATASET = load_dataset("imagefolder", data_dir=TRAIN_DIR, split="train")
TRAIN_DATASET = TRAIN_DATASET.cast_column("image", Image(mode="RGB"))
TRAIN_DATASET = TRAIN_DATASET.with_format("torch")
TRAIN_DATASET.save_to_disk("data/train")

  from .autonotebook import tqdm as notebook_tqdm
Downloading data: 100%|██████████| 87000/87000 [00:03<00:00, 28420.37files/s] 
Computing checksums: 100%|██████████| 87000/87000 [00:23<00:00, 3694.84it/s] 
Generating train split: 87000 examples [00:08, 10238.76 examples/s]
Saving the dataset (3/3 shards): 100%|██████████| 87000/87000 [09:56<00:00, 145.88 examples/s]


In [17]:
import torch
import torchmetrics
import lightning as L
from torch import nn
from torch.nn import functional as F
from lightning.pytorch import callbacks
from torchvision.transforms import v2 as transforms


class AslTranslator(L.LightningModule):
    def __init__(
        self,
        input_dim: tuple[int, int, int],
        output_dim: int,
        lr=5e-5,
        optimizer_cls: type[torch.optim.Optimizer] = torch.optim.SGD,
        lr_scheduler_cls: type[
            torch.optim.lr_scheduler.LRScheduler
        ] = torch.optim.lr_scheduler.CosineAnnealingLR,
        lr_scheduler_kwargs: dict[str, any] = {},
    ):
        super().__init__()
        self.save_hyperparameters()

        channels, _, _ = input_dim
        decoy_tensor = torch.zeros(input_dim)

        conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=channels,
                out_channels=channels * 2,
                kernel_size=3,
            ),
            nn.AvgPool2d(kernel_size=3, stride=2),
            nn.GELU(),
            nn.Dropout(),
        )

        conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=channels * 2,
                out_channels=channels * 4,
                kernel_size=3,
            ),
            nn.AvgPool2d(kernel_size=3, stride=2),
            nn.GELU(),
            nn.Dropout(),
        )

        conv3 = nn.Sequential(
            nn.Conv2d(
                in_channels=channels * 4,
                out_channels=channels * 8,
                kernel_size=3,
            ),
            nn.AvgPool2d(kernel_size=3, stride=2),
            nn.GELU(),
            nn.Dropout(),
        )

        self.conv = nn.Sequential(conv1, conv2, conv3)
        decoy_tensor: torch.Tensor = self.conv(decoy_tensor)
        decoy_tensor = torch.flatten(decoy_tensor, start_dim=1)
        hidden_size = decoy_tensor.size(1) // 2

        linear1 = nn.Sequential(
            nn.Linear(in_features=decoy_tensor.size(1), out_features=hidden_size),
            nn.GELU(),
            nn.Dropout(),
        )

        linear2 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.GELU(),
            nn.Dropout(),
        )

        self.linear = nn.Sequential(linear1, linear2)
        decoy_tensor: torch.Tensor = self.linear(decoy_tensor)

        self.classifier = nn.Linear(
            in_features=decoy_tensor.size(1), out_features=output_dim
        )

    def forward(self, inputs):
        x = self.conv(inputs)
        x = self.linear(x)
        logits = self.classifier(x)
        return logits

    def training_step(self, examples, _):
        features: torch.Tensor = examples["images"]
        targets: torch.Tensor = examples["labels"]

        logits: torch.Tensor = self(features)

        loss = F.cross_entropy(logits, targets)

        preds = logits.argmax(1)
        f1 = torchmetrics.functional.f1_score(preds, targets, task="multiclass")
        acc = torchmetrics.functional.accuracy(preds, targets, task="multiclass")
        recall = torchmetrics.functional.recall(preds, targets, task="multiclass")

        self.log_dict(
            {
                "train_loss": loss.item(),
                "train_f1": f1.item(),
                "train_acc": acc.item(),
                "train_recall": recall.item(),
            },
            prog_bar=True,
            batch_size=targets.size(0),
        )

        return loss

    def validation_step(self, examples, _):
        features: torch.Tensor = examples["images"]
        targets: torch.Tensor = examples["labels"]
        logits: torch.Tensor = self(features)

        loss = F.cross_entropy(logits, targets)

        preds = logits.argmax(1)
        f1 = torchmetrics.functional.f1_score(preds, targets, task="multiclass")
        acc = torchmetrics.functional.accuracy(preds, targets, task="multiclass")
        recall = torchmetrics.functional.recall(preds, targets, task="multiclass")

        self.log_dict(
            {
                "val_loss": loss.item(),
                "val_f1": f1.item(),
                "val_acc": acc.item(),
                "val_recall": recall.item(),
            },
            prog_bar=True,
            batch_size=targets.size(0),
        )

        return loss

    def predict_step(self, examples, index):
        raise NotImplementedError()

    def configure_optimizers(self):
        optimizer_cls: type[torch.optim.Optimizer] = self.hparams.get("optimizer_cls")
        lr_scheduler_cls: type[torch.optim.lr_scheduler.LRScheduler] = self.hparams.get(
            "lr_scheduler_cls"
        )

        lr: float = self.hparams.get("lr")
        lr_scheduler_kwargs: dict[str, any] = self.hparams.get("lr_scheduler_kwargs")

        optimizer = optimizer_cls(self.parameters(), lr=lr)
        lr_scheduler = lr_scheduler_cls(optimizer, **lr_scheduler_kwargs)

        return {"optimizer": optimizer, "lr_scheduler": lr_scheduler}

    def configure_callbacks(self):
        swa = callbacks.StochasticWeightAveraging(swa_lrs=1e-2)
        checkpoint = callbacks.ModelCheckpoint(
            monitor="val_loss", mode="min", save_top_k=5
        )
        return [swa, checkpoint]


In [18]:
import torch
import lightning as L


trainer = L.Trainer()
model = AslTranslator(input_dim=(3, 256, 256), output_dim=29)

INFO: GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs


In [19]:
model

AslTranslator(
  (conv): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
    (1): Sequential(
      (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(1, 1))
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
    (2): Sequential(
      (0): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1))
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
  )
  (linear): Sequential(
    (0): Sequential(
      (0): Linear(in_features=62500, out_features=31250, bias=True)
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
    (1): Sequential(
      (0): Linear(in_features=31250, out_features=31250, bias=True)
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
    (2): Sequential(
      (0): Linear(in_features=31250, out_features=31250, bias=True)
      (1):

In [9]:
train_dataset = TRAIN_DATASET.train_test_split(0.2, stratify_by_column="label")
train_dataloader = torch.utils.data.DataLoader(train_dataset["train"])
eval_dataloader = torch.utils.data.DataLoader(train_dataset["test"])

trainer.fit(model, train_dataloader, eval_dataloader)

/opt/conda/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py:72: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
INFO: The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint


TypeError: CosineAnnealingLR.__init__() missing 1 required positional argument: 'T_max'