In [2]:
from pathlib import Path
from datasets import Dataset

DATA_DIR = Path("/kaggle/input/asl-cv/data")

TRAIN_DATASET = Dataset.load_from_disk(DATA_DIR.joinpath("train"), keep_in_memory=True)
TRAIN_DATASET = TRAIN_DATASET.with_format("torch")
DATA_LEN = len(TRAIN_DATASET)
VAL_COEFF = 0.2
VAL_LEN = int(VAL_COEFF * DATA_LEN)
TRAIN_LEN = DATA_LEN - VAL_LEN
BATCH_SIZE = 64


In [4]:
import torch
import torchmetrics
import lightning as L
from torch import nn
from torch.nn import functional as F
from lightning.pytorch import callbacks
from torchvision.transforms.v2 import functional as transforms

EPOCHS = 64

CALLBACKS = [
    callbacks.ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=3),
    callbacks.RichProgressBar(),
]
TRAINER = L.Trainer(
    max_epochs=EPOCHS,
    callbacks=CALLBACKS,
    gradient_clip_val=1.0,
    gradient_clip_algorithm="norm",
)


class AslTranslator(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.lr = 0.1
        decoy_tensor = torch.zeros((1, 3, 64, 64))

        conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=3,
                out_channels=32,
                kernel_size=3,
            ),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2),
            nn.Dropout(),
        )

        conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,
            ),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2),
            nn.Dropout(),
        )

        self.conv = nn.Sequential(conv1, conv2)
        with torch.no_grad():
            decoy_tensor: torch.Tensor = self.conv(decoy_tensor)

        decoy_tensor = torch.flatten(decoy_tensor, start_dim=1)
        hidden_size = decoy_tensor.size(1)

        linear1 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size // 2),
            nn.GELU(),
            nn.Dropout(),
        )

        linear2 = nn.Sequential(
            nn.Linear(in_features=hidden_size // 2, out_features=hidden_size // 4),
            nn.GELU(),
            nn.Dropout(),
        )

        self.linear = nn.Sequential(linear1, linear2)
        with torch.no_grad():
            decoy_tensor: torch.Tensor = self.linear(decoy_tensor)

        self.classifier = nn.Linear(in_features=decoy_tensor.size(1), out_features=29)

    def forward(self, inputs):
        x = transforms.to_dtype(inputs, dtype=self.dtype, scale=True)
        x = transforms.resize(x, [64, 64])
        x = self.conv(x)
        x = torch.flatten(x, start_dim=1)
        x = self.linear(x)
        logits = self.classifier(x)
        return logits

    def training_step(self, examples, _):
        features: torch.Tensor = examples["image"]
        targets: torch.Tensor = examples["label"]

        logits: torch.Tensor = self(features)

        loss = F.cross_entropy(logits, targets)

        preds = logits.argmax(1)
        f1 = torchmetrics.functional.f1_score(
            preds, targets, task="multiclass", num_classes=29
        )
        acc = torchmetrics.functional.accuracy(
            preds, targets, task="multiclass", num_classes=29
        )
        recall = torchmetrics.functional.recall(
            preds, targets, task="multiclass", num_classes=29
        )

        self.log_dict(
            {
                "train_loss": loss.item(),
                "train_f1": f1.item(),
                "train_acc": acc.item(),
                "train_recall": recall.item(),
            },
            prog_bar=True,
            batch_size=targets.size(0),
        )

        return loss

    def validation_step(self, examples, _):
        features: torch.Tensor = examples["image"]
        targets: torch.Tensor = examples["label"]
        logits: torch.Tensor = self(features)

        loss = F.cross_entropy(logits, targets)

        preds = logits.argmax(1)
        f1 = torchmetrics.functional.f1_score(
            preds, targets, task="multiclass", num_classes=29
        )
        acc = torchmetrics.functional.accuracy(
            preds, targets, task="multiclass", num_classes=29
        )
        recall = torchmetrics.functional.recall(
            preds, targets, task="multiclass", num_classes=29
        )

        self.log_dict(
            {
                "val_loss": loss.item(),
                "val_f1": f1.item(),
                "val_acc": acc.item(),
                "val_recall": recall.item(),
            },
            prog_bar=True,
            batch_size=targets.size(0),
        )

        return loss

    def predict_step(self, examples, index):
        features: torch.Tensor = examples["image"]
        logits: torch.Tensor = self(features)

        preds = logits.argmax(1)
        return preds

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.lr)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=EPOCHS * (TRAIN_LEN // BATCH_SIZE)
        )

        return {"optimizer": optimizer, "lr_scheduler": lr_scheduler}

    def configure_callbacks(self):
        swa = callbacks.StochasticWeightAveraging(swa_lrs=2.0)
        return [swa]


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs


In [5]:
import torch
import multiprocessing as mp


NUM_WORKERS = mp.cpu_count()
print(f"NUM_WORKERS={NUM_WORKERS}")

model = AslTranslator()
model

NUM_WORKERS=4


AslTranslator(
  (conv): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
      (1): GELU(approximate='none')
      (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
      (3): Dropout(p=0.5, inplace=False)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
      (1): GELU(approximate='none')
      (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
      (3): Dropout(p=0.5, inplace=False)
    )
  )
  (linear): Sequential(
    (0): Sequential(
      (0): Linear(in_features=12544, out_features=6272, bias=True)
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
    (1): Sequential(
      (0): Linear(in_features=6272, out_features=3136, bias=True)
      (1): GELU(approximate='none')
      (2): Dropout(p=0.5, inplace=False)
    )
  )
  (classifier): Linear(in_features=3136, out_features=29, bias=True)
)

In [6]:
split_dataset = TRAIN_DATASET.train_test_split(VAL_COEFF, stratify_by_column="label")
train_dataloader = torch.utils.data.DataLoader(
    split_dataset["train"],
    batch_size=BATCH_SIZE,
    shuffle=True,
    pin_memory=True,
    num_workers=NUM_WORKERS - 1,
)
eval_dataloader = torch.utils.data.DataLoader(
    split_dataset["test"],
    batch_size=BATCH_SIZE,
    shuffle=False,
    pin_memory=False,
    num_workers=NUM_WORKERS - 1,
)

In [None]:
TRAINER.fit(model, train_dataloader, eval_dataloader)