In [1]:
!pip install einops

Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.8.0


In [2]:
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from PIL import Image
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from einops import rearrange


In [3]:
# Dataset Class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        try:
            if not os.path.exists(img_path):
                print(f"Image not found: {img_path}. Skipping.")
                return self.__getitem__((idx + 1) % len(self.data))
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Skipping corrupted image: {img_path}, Error: {e}")
            return self.__getitem__((idx + 1) % len(self.data))
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)



In [4]:
# Data Module
class CustomDataModule(pl.LightningDataModule):
    def __init__(self, dataset_dir, batch_size=64):
        super().__init__()
        self.dataset_dir = dataset_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def prepare_data(self):
        self.data_list = []
        metadata_dir = os.path.join(self.dataset_dir, 'metadata')
        for file_name in os.listdir(metadata_dir):
            metadata_path = os.path.join(metadata_dir, file_name)
            with open(metadata_path, 'r') as file:
                for line in file:
                    tokens = line.strip().split()
                    img_rel_path = tokens[0] + "/" + tokens[1]
                    label = int(tokens[3])
                    img_path = os.path.join(self.dataset_dir, img_rel_path)
                    self.data_list.append((img_path, label))
        print(f"Total valid images loaded: {len(self.data_list)}")

    def setup(self, stage=None):
        dataset = CustomDataset(self.data_list, transform=self.transform)
        train_size = int(0.8 * len(dataset))
        val_size = int(0.1 * len(dataset))
        test_size = len(dataset) - train_size - val_size
        self.train_set, self.val_set, self.test_set = random_split(
            dataset, [train_size, val_size, test_size])

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=self.batch_size, shuffle=True, num_workers=4, pin_memory=True)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=self.batch_size, num_workers=4, pin_memory=True)

    def test_dataloader(self):
        return DataLoader(self.test_set, batch_size=self.batch_size, num_workers=4, pin_memory=True)


In [5]:
# MobileViT Model
class MobileViT(pl.LightningModule):
    def __init__(self, num_classes=2, learning_rate=1e-4):
        super().__init__()
        self.learning_rate = learning_rate

        # Convolutional Stem
        self.stem = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
        )

        # MobileViT Block
        self.mobilevit_block = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )

        # Transformer Encoder
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=64, nhead=4, dim_feedforward=128, activation='gelu'
        )
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=4)

        # Classification Head
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.stem(x)
        x = self.mobilevit_block(x)
        x = rearrange(x, "b c h w -> b (h w) c")  # Flatten spatial dimensions
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global Average Pooling
        return self.fc(x)

    def training_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = F.cross_entropy(logits, labels)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == labels).float().mean()
        self.log('train_loss', loss)
        self.log('train_acc', acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = F.cross_entropy(logits, labels)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == labels).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = F.cross_entropy(logits, labels)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == labels).float().mean()
        self.log('test_loss', loss)
        self.log('test_acc', acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=self.learning_rate)

    def on_train_epoch_end(self):
        train_loss = self.trainer.callback_metrics.get('train_loss', None)
        train_acc = self.trainer.callback_metrics.get('train_acc', None)
        if train_loss is not None and train_acc is not None:
            print(f"Epoch {self.current_epoch + 1}: Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")

    def on_validation_epoch_end(self):
        val_loss = self.trainer.callback_metrics.get('val_loss', None)
        val_acc = self.trainer.callback_metrics.get('val_acc', None)
        if val_loss is not None and val_acc is not None:
            print(f"Epoch {self.current_epoch + 1}: Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")



In [6]:
# Main Execution
dataset_dir = '/kaggle/input/lra-pathfinder-32/pathfinder32/curv_contour_length_14'
data_module = CustomDataModule(dataset_dir, batch_size=64)
model = MobileViT(learning_rate=1e-4)

logger = TensorBoardLogger("tb_logs", name="mobilevit")

# Callbacks
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="checkpoints",
    filename="mobilevit-{epoch:02d}-{val_loss:.2f}",
    save_top_k=-1,
    mode="min",
    save_last=True,
)
lr_monitor = LearningRateMonitor(logging_interval='step')

trainer = pl.Trainer(
    max_epochs=10,
    logger=logger,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    devices=1,
    callbacks=[checkpoint_callback, lr_monitor],
    precision=16,
)

# Training
print("Trainer setup complete. Starting data preparation...")
data_module.prepare_data()
print("Data preparation complete.")

/opt/conda/lib/python3.10/site-packages/lightning_fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!


Trainer setup complete. Starting data preparation...
Total valid images loaded: 200000
Data preparation complete.


In [7]:
start_time = time.time()
print("Starting training...")
trainer.fit(model, data_module)
end_time = time.time()
training_time = end_time - start_time
print(f"Training complete. Time taken: {training_time:.2f} seconds")


Starting training...
Total valid images loaded: 200000


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Epoch 1: Validation Loss: 0.9375, Validation Accuracy: 0.4531


Training: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1: Validation Loss: 0.6935, Validation Accuracy: 0.4972
Epoch 1: Train Loss: 0.6976, Train Accuracy: 0.4062


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2: Validation Loss: 0.6932, Validation Accuracy: 0.5027
Epoch 2: Train Loss: 0.6931, Train Accuracy: 0.5000


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3: Validation Loss: 0.6931, Validation Accuracy: 0.5037
Epoch 3: Train Loss: 0.6937, Train Accuracy: 0.4688


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4: Validation Loss: 0.6931, Validation Accuracy: 0.5038
Epoch 4: Train Loss: 0.6933, Train Accuracy: 0.5625


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5: Validation Loss: 0.6931, Validation Accuracy: 0.5020
Epoch 5: Train Loss: 0.6935, Train Accuracy: 0.5000


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 6: Validation Loss: 0.6932, Validation Accuracy: 0.4956
Epoch 6: Train Loss: 0.6936, Train Accuracy: 0.4531


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 7: Validation Loss: 0.6934, Validation Accuracy: 0.4972
Epoch 7: Train Loss: 0.6924, Train Accuracy: 0.5312


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 8: Validation Loss: 0.6931, Validation Accuracy: 0.5012
Epoch 8: Train Loss: 0.6925, Train Accuracy: 0.5312


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 9: Validation Loss: 0.6932, Validation Accuracy: 0.4970
Epoch 9: Train Loss: 0.6936, Train Accuracy: 0.4844


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 10: Validation Loss: 0.6932, Validation Accuracy: 0.4972
Epoch 10: Train Loss: 0.6941, Train Accuracy: 0.3906
Training complete. Time taken: 7571.28 seconds


In [8]:
# Testing
print("Testing the model...")
start_time = time.time()
trainer.test(model, data_module)
end_time = time.time()
testing_time = end_time - start_time
print(f"Testing complete. Time taken: {testing_time:.2f} seconds")

# Calculate Metrics
test_result = trainer.callback_metrics
test_acc = test_result.get('test_acc', None)
if test_acc is not None:
    test_acc = test_acc.item()
else:
    test_acc = 0.0

# Additional Metrics
num_params = sum(p.numel() for p in model.parameters())
efficiency = test_acc / (torch.log(torch.tensor(training_time + testing_time + 1)) * torch.log(torch.tensor(num_params + 1)))

# Print Metrics
print("\nTraining Metrics:")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Number of Parameters: {num_params}")
print(f"Efficiency Score: {efficiency:.4f}")

Testing the model...
Total valid images loaded: 200000


Testing: |          | 0/? [00:00<?, ?it/s]

Testing complete. Time taken: 29.02 seconds

Training Metrics:
Test Accuracy: 0.4975
Number of Parameters: 191298
Efficiency Score: 0.0046
