In [1]:
import pandas as pd
import time
from sklearn.metrics import classification_report
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from pathlib import Path
import time
from tqdm import tqdm

In [2]:
# Settings
train_dir = Path('./data/train')
val_dir   = Path('./data/test')
img_size = 48
batch_size = 64

# Transforms
train_transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.RandomAffine(degrees=0, shear=10, translate=(0.2, 0.2)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)  # normalize to [-1, 1]
])

val_transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# Datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
val_dataset   = datasets.ImageFolder(root=val_dir, transform=val_transforms)
print(f"Train images: {len(train_dataset)}")
print(f"Val images: {len(val_dataset)}")

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Class names
class_names = train_dataset.classes
print("Classes:", class_names)

Train images: 33595
Val images: 7178
Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [3]:
class InvertedResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expand_ratio):
        super().__init__()
        self.use_res_connect = stride == 1 and in_channels == out_channels
        hidden_dim = in_channels * expand_ratio

        layers = []

        # Expansion
        if expand_ratio != 1:
            layers.extend([
                nn.Conv2d(in_channels, hidden_dim, kernel_size=1, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.ReLU6(inplace=True)
            ])

        # Depthwise
        layers.extend([
            nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=stride, padding=1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.ReLU6(inplace=True)
        ])

        # Projection
        layers.extend([
            nn.Conv2d(hidden_dim, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels)
        ])

        self.block = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.block(x)
        else:
            return self.block(x)

class MobileNetV2_48x48(nn.Module):
    def __init__(self, num_classes=7, dropout_rate=0.0):
        super().__init__()
        self.dropout_rate = dropout_rate
        self.dropout = nn.Dropout(dropout_rate)

        # Initial convolution layer
        self.init_conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False),  # 48x48 input
            nn.BatchNorm2d(32),
            nn.ReLU6(inplace=True)
        )

        # Bottleneck configuration: (t, c, n, s)
        bottlenecks_cfg = [
            (1, 16, 1, 1),
            (6, 24, 1, 1),  # no downsampling yet
            (6, 32, 2, 2),
            (6, 64, 1, 2),
            (6, 96, 1, 1),
        ]

        blocks = []
        in_channels = 32
        for t, out_channels, n, s in bottlenecks_cfg:
            for i in range(n):
                stride = s if i == 0 else 1
                blocks.append(InvertedResidualBlock(in_channels, out_channels, stride, t))
                in_channels = out_channels
        self.bottlenecks = nn.Sequential(*blocks)

        self.last_conv = nn.Sequential(
            nn.Conv2d(in_channels, 128, kernel_size=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU6(inplace=True),
            nn.Dropout(dropout_rate)
        )

        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.init_conv(x)
        x = self.bottlenecks(x)
        x = self.last_conv(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

In [4]:
# Define MobileNetV2 model
mobilenet_model = MobileNetV2_48x48(num_classes=len(class_names))
total_params = sum(p.numel() for p in mobilenet_model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 132935


In [None]:
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")
mobilenet_model = MobileNetV2_48x48(num_classes=len(class_names)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobilenet_model.parameters(), lr=1e-3,  weight_decay=5e-5)

Using device: mps


In [6]:
def train(model, train_loader, val_loader, optimizer, criterion, num_epochs=15, model_name='mobv2_model.pth'):
    history = []  # collect metrics per epoch

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        start_time = time.time()

        # Training phase
        model.train()
        train_loss, correct, total = 0, 0, 0
        for images, labels in tqdm(train_loader, desc="Training"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        train_loss /= total

        # Validation phase
        model.eval()
        val_loss, correct, total = 0, 0, 0
        y_true, y_pred = [], []

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc="Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)

                _, predicted = outputs.max(1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        val_acc = correct / total
        val_loss /= total

        # Classification report
        report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
        val_f1_macro = report['macro avg']['f1-score']

        epoch_time = time.time() - start_time

        # Collect metrics
        epoch_data = {
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'train_acc': train_acc,
            'val_acc': val_acc,
            'val_f1_macro': val_f1_macro,
            'epoch_time_sec': epoch_time
        }

        # Add per-class metrics
        for cls in class_names:
            for metric in ['precision', 'recall', 'f1-score']:
                key = f'{cls}_{metric}'
                epoch_data[key] = report[cls][metric]

        history.append(epoch_data)

        # Console output
        print("\nValidation Report:")
        for cls in class_names:
            cls_metrics = report[cls]
            print(f"{cls}: Prec={cls_metrics['precision']:.3f} | Rec={cls_metrics['recall']:.3f} | F1={cls_metrics['f1-score']:.3f}")

        print(f"\nEpoch Summary: Train Loss={train_loss:.4f} | Val Loss={val_loss:.4f} | Train Acc={train_acc:.3f} | Val Acc={val_acc:.3f}")
        print(f"Epoch time: {epoch_time:.2f} seconds")

    torch.save(model.state_dict(), f"results/{model_name}")
    print(f"Saved model to 'results/{model_name}'")

    return history

In [None]:
def build_model(dropout):
    return MobileNetV2_48x48(num_classes=len(class_names), dropout_rate=dropout).to(device)

# Define settings to test
configs = [
    # No dropout, no weight decay
    {'name': 'baseline_adam_drop0_wd0', 'optimizer': 'adam', 'lr': 1e-3, 'dropout': 0.0, 'weight_decay': 0.0},
    # Dropout only
    {'name': 'drop15_wd0', 'optimizer': 'adam', 'lr': 1e-3, 'dropout': 0.15, 'weight_decay': 0.0},
    # Weight decay only
    {'name': 'drop0_wd5e5', 'optimizer': 'adam', 'lr': 1e-3, 'dropout': 0.0, 'weight_decay': 5e-5},
    # Combined dropout + weight decay
    {'name': 'drop15_wd5e5', 'optimizer': 'adam', 'lr': 1e-3, 'dropout': 0.15, 'weight_decay': 5e-5},
    # Same but using SGD to test optimizer difference
    {'name': 'drop15_wd5e5_sgd', 'optimizer': 'sgd', 'lr': 1e-2, 'dropout': 0.15, 'weight_decay': 5e-5},
]

for config in configs:
    print(f"\nTraining config: {config['name']}")
    model = build_model(config['dropout'])

    if config['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
    else:
        optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=0.9, weight_decay=config['weight_decay'])

    criterion = nn.CrossEntropyLoss()
    history = train(model, train_loader, val_loader, optimizer, criterion, num_epochs=15, model_name=config['name'])
    pd.DataFrame(history).to_csv(f'results/history_{config["name"]}.csv', index=False)


Training config: mobv2_adam_lr3_dropout0_bn

Epoch 1/15


Training: 100%|██████████| 525/525 [02:23<00:00,  3.65it/s]
Validation: 100%|██████████| 113/113 [00:20<00:00,  5.59it/s]



Validation Report:
angry: Prec=0.288 | Rec=0.033 | F1=0.060
disgust: Prec=0.015 | Rec=0.090 | F1=0.026
fear: Prec=0.231 | Rec=0.023 | F1=0.043
happy: Prec=0.392 | Rec=0.805 | F1=0.527
neutral: Prec=0.333 | Rec=0.079 | F1=0.128
sad: Prec=0.321 | Rec=0.381 | F1=0.348
surprise: Prec=0.555 | Rec=0.587 | F1=0.571

Epoch Summary: Train Loss=1.6617 | Val Loss=1.6650 | Train Acc=0.350 | Val Acc=0.356
Epoch time: 164.02 seconds

Epoch 2/15


Training: 100%|██████████| 525/525 [02:17<00:00,  3.82it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.66it/s]



Validation Report:
angry: Prec=0.444 | Rec=0.013 | F1=0.024
disgust: Prec=0.017 | Rec=0.459 | F1=0.032
fear: Prec=0.304 | Rec=0.014 | F1=0.026
happy: Prec=0.794 | Rec=0.414 | F1=0.544
neutral: Prec=0.422 | Rec=0.257 | F1=0.320
sad: Prec=0.308 | Rec=0.330 | F1=0.319
surprise: Prec=0.507 | Rec=0.650 | F1=0.570

Epoch Summary: Train Loss=1.4176 | Val Loss=2.1831 | Train Acc=0.460 | Val Acc=0.290
Epoch time: 157.36 seconds

Epoch 3/15


Training: 100%|██████████| 525/525 [02:13<00:00,  3.94it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.82it/s]



Validation Report:
angry: Prec=0.358 | Rec=0.124 | F1=0.184
disgust: Prec=0.020 | Rec=0.477 | F1=0.038
fear: Prec=0.214 | Rec=0.126 | F1=0.158
happy: Prec=0.810 | Rec=0.487 | F1=0.608
neutral: Prec=0.625 | Rec=0.160 | F1=0.255
sad: Prec=0.350 | Rec=0.378 | F1=0.363
surprise: Prec=0.625 | Rec=0.610 | F1=0.618

Epoch Summary: Train Loss=1.3140 | Val Loss=2.0100 | Train Acc=0.500 | Val Acc=0.326
Epoch time: 152.65 seconds

Epoch 4/15


Training: 100%|██████████| 525/525 [02:16<00:00,  3.84it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.78it/s]



Validation Report:
angry: Prec=0.442 | Rec=0.194 | F1=0.270
disgust: Prec=0.018 | Rec=0.045 | F1=0.025
fear: Prec=0.291 | Rec=0.142 | F1=0.190
happy: Prec=0.586 | Rec=0.875 | F1=0.702
neutral: Prec=0.493 | Rec=0.461 | F1=0.477
sad: Prec=0.410 | Rec=0.391 | F1=0.400
surprise: Prec=0.603 | Rec=0.715 | F1=0.654

Epoch Summary: Train Loss=1.2424 | Val Loss=1.3618 | Train Acc=0.530 | Val Acc=0.493
Epoch time: 156.40 seconds

Epoch 5/15


Training: 100%|██████████| 525/525 [02:15<00:00,  3.89it/s]
Validation: 100%|██████████| 113/113 [00:20<00:00,  5.56it/s]



Validation Report:
angry: Prec=0.432 | Rec=0.270 | F1=0.332
disgust: Prec=0.037 | Rec=0.171 | F1=0.061
fear: Prec=0.302 | Rec=0.083 | F1=0.130
happy: Prec=0.537 | Rec=0.869 | F1=0.664
neutral: Prec=0.504 | Rec=0.322 | F1=0.393
sad: Prec=0.468 | Rec=0.172 | F1=0.252
surprise: Prec=0.412 | Rec=0.829 | F1=0.551

Epoch Summary: Train Loss=1.1950 | Val Loss=1.5640 | Train Acc=0.548 | Val Acc=0.447
Epoch time: 155.39 seconds

Epoch 6/15


Training: 100%|██████████| 525/525 [02:10<00:00,  4.04it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.84it/s]



Validation Report:
angry: Prec=0.423 | Rec=0.347 | F1=0.381
disgust: Prec=0.034 | Rec=0.117 | F1=0.053
fear: Prec=0.268 | Rec=0.233 | F1=0.249
happy: Prec=0.655 | Rec=0.823 | F1=0.729
neutral: Prec=0.574 | Rec=0.292 | F1=0.387
sad: Prec=0.504 | Rec=0.217 | F1=0.304
surprise: Prec=0.400 | Rec=0.833 | F1=0.540

Epoch Summary: Train Loss=1.1622 | Val Loss=1.4717 | Train Acc=0.560 | Val Acc=0.469
Epoch time: 149.46 seconds

Epoch 7/15


Training: 100%|██████████| 525/525 [02:24<00:00,  3.63it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.91it/s]



Validation Report:
angry: Prec=0.512 | Rec=0.240 | F1=0.327
disgust: Prec=0.057 | Rec=0.216 | F1=0.090
fear: Prec=0.345 | Rec=0.157 | F1=0.216
happy: Prec=0.654 | Rec=0.821 | F1=0.728
neutral: Prec=0.500 | Rec=0.480 | F1=0.490
sad: Prec=0.443 | Rec=0.369 | F1=0.403
surprise: Prec=0.488 | Rec=0.817 | F1=0.611

Epoch Summary: Train Loss=1.1412 | Val Loss=1.3396 | Train Acc=0.570 | Val Acc=0.502
Epoch time: 163.75 seconds

Epoch 8/15


Training: 100%|██████████| 525/525 [02:22<00:00,  3.70it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.79it/s]



Validation Report:
angry: Prec=0.486 | Rec=0.350 | F1=0.407
disgust: Prec=0.065 | Rec=0.135 | F1=0.087
fear: Prec=0.350 | Rec=0.269 | F1=0.304
happy: Prec=0.829 | Rec=0.740 | F1=0.782
neutral: Prec=0.478 | Rec=0.606 | F1=0.534
sad: Prec=0.438 | Rec=0.492 | F1=0.463
surprise: Prec=0.626 | Rec=0.696 | F1=0.659

Epoch Summary: Train Loss=1.1159 | Val Loss=1.2482 | Train Acc=0.581 | Val Acc=0.540
Epoch time: 161.63 seconds

Epoch 9/15


Training: 100%|██████████| 525/525 [02:49<00:00,  3.10it/s]
Validation: 100%|██████████| 113/113 [00:23<00:00,  4.83it/s]



Validation Report:
angry: Prec=0.450 | Rec=0.286 | F1=0.350
disgust: Prec=0.051 | Rec=0.514 | F1=0.093
fear: Prec=0.404 | Rec=0.128 | F1=0.194
happy: Prec=0.727 | Rec=0.781 | F1=0.753
neutral: Prec=0.533 | Rec=0.427 | F1=0.474
sad: Prec=0.437 | Rec=0.383 | F1=0.408
surprise: Prec=0.565 | Rec=0.782 | F1=0.656

Epoch Summary: Train Loss=1.0979 | Val Loss=1.4291 | Train Acc=0.583 | Val Acc=0.488
Epoch time: 192.93 seconds

Epoch 10/15


Training: 100%|██████████| 525/525 [02:40<00:00,  3.27it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.81it/s]



Validation Report:
angry: Prec=0.647 | Rec=0.113 | F1=0.192
disgust: Prec=0.033 | Rec=0.586 | F1=0.062
fear: Prec=0.314 | Rec=0.233 | F1=0.268
happy: Prec=0.839 | Rec=0.566 | F1=0.676
neutral: Prec=0.601 | Rec=0.195 | F1=0.295
sad: Prec=0.455 | Rec=0.192 | F1=0.270
surprise: Prec=0.336 | Rec=0.869 | F1=0.484

Epoch Summary: Train Loss=1.0840 | Val Loss=2.1926 | Train Acc=0.591 | Val Acc=0.365
Epoch time: 180.24 seconds

Epoch 11/15


Training: 100%|██████████| 525/525 [02:30<00:00,  3.49it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.80it/s]



Validation Report:
angry: Prec=0.533 | Rec=0.246 | F1=0.337
disgust: Prec=0.051 | Rec=0.649 | F1=0.094
fear: Prec=0.334 | Rec=0.175 | F1=0.229
happy: Prec=0.819 | Rec=0.723 | F1=0.768
neutral: Prec=0.574 | Rec=0.388 | F1=0.463
sad: Prec=0.460 | Rec=0.283 | F1=0.351
surprise: Prec=0.438 | Rec=0.847 | F1=0.577

Epoch Summary: Train Loss=1.0653 | Val Loss=1.6030 | Train Acc=0.600 | Val Acc=0.460
Epoch time: 170.13 seconds

Epoch 12/15


Training: 100%|██████████| 525/525 [03:15<00:00,  2.68it/s]
Validation: 100%|██████████| 113/113 [00:20<00:00,  5.54it/s]



Validation Report:
angry: Prec=0.500 | Rec=0.251 | F1=0.334
disgust: Prec=0.053 | Rec=0.532 | F1=0.097
fear: Prec=0.404 | Rec=0.131 | F1=0.198
happy: Prec=0.677 | Rec=0.828 | F1=0.745
neutral: Prec=0.481 | Rec=0.524 | F1=0.501
sad: Prec=0.583 | Rec=0.163 | F1=0.255
surprise: Prec=0.493 | Rec=0.832 | F1=0.619

Epoch Summary: Train Loss=1.0551 | Val Loss=1.5317 | Train Acc=0.602 | Val Acc=0.479
Epoch time: 216.29 seconds

Epoch 13/15


Training: 100%|██████████| 525/525 [02:42<00:00,  3.24it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.68it/s]



Validation Report:
angry: Prec=0.494 | Rec=0.314 | F1=0.384
disgust: Prec=0.088 | Rec=0.505 | F1=0.150
fear: Prec=0.332 | Rec=0.271 | F1=0.299
happy: Prec=0.765 | Rec=0.778 | F1=0.772
neutral: Prec=0.606 | Rec=0.181 | F1=0.279
sad: Prec=0.587 | Rec=0.149 | F1=0.238
surprise: Prec=0.284 | Rec=0.890 | F1=0.431

Epoch Summary: Train Loss=1.0381 | Val Loss=1.7415 | Train Acc=0.612 | Val Acc=0.441
Epoch time: 182.15 seconds

Epoch 14/15


Training: 100%|██████████| 525/525 [02:39<00:00,  3.28it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.70it/s]



Validation Report:
angry: Prec=0.527 | Rec=0.247 | F1=0.337
disgust: Prec=0.039 | Rec=0.802 | F1=0.074
fear: Prec=0.372 | Rec=0.110 | F1=0.170
happy: Prec=0.770 | Rec=0.714 | F1=0.741
neutral: Prec=0.657 | Rec=0.143 | F1=0.235
sad: Prec=0.489 | Rec=0.129 | F1=0.204
surprise: Prec=0.383 | Rec=0.865 | F1=0.531

Epoch Summary: Train Loss=1.0281 | Val Loss=2.1480 | Train Acc=0.612 | Val Acc=0.385
Epoch time: 179.85 seconds

Epoch 15/15


Training: 100%|██████████| 525/525 [02:55<00:00,  2.99it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.67it/s]



Validation Report:
angry: Prec=0.550 | Rec=0.246 | F1=0.340
disgust: Prec=0.052 | Rec=0.766 | F1=0.098
fear: Prec=0.468 | Rec=0.108 | F1=0.176
happy: Prec=0.818 | Rec=0.689 | F1=0.748
neutral: Prec=0.577 | Rec=0.320 | F1=0.412
sad: Prec=0.479 | Rec=0.211 | F1=0.293
surprise: Prec=0.334 | Rec=0.866 | F1=0.483

Epoch Summary: Train Loss=1.0190 | Val Loss=1.8732 | Train Acc=0.618 | Val Acc=0.423
Epoch time: 195.71 seconds
Saved model to 'results/mobv2_adam_lr3_dropout0_bn'

Training config: mobv2_adam_lr3_dropout15_bn

Epoch 1/15


Training: 100%|██████████| 525/525 [02:37<00:00,  3.33it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.72it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Validation Report:
angry: Prec=0.000 | Rec=0.000 | F1=0.000
disgust: Prec=0.017 | Rec=0.658 | F1=0.033
fear: Prec=0.000 | Rec=0.000 | F1=0.000
happy: Prec=0.458 | Rec=0.220 | F1=0.298
neutral: Prec=0.438 | Rec=0.032 | F1=0.059
sad: Prec=0.305 | Rec=0.086 | F1=0.134
surprise: Prec=0.358 | Rec=0.670 | F1=0.467

Epoch Summary: Train Loss=1.6801 | Val Loss=3.5882 | Train Acc=0.340 | Val Acc=0.163
Epoch time: 177.32 seconds

Epoch 2/15


Training: 100%|██████████| 525/525 [02:37<00:00,  3.33it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.88it/s]



Validation Report:
angry: Prec=0.239 | Rec=0.011 | F1=0.022
disgust: Prec=0.032 | Rec=0.153 | F1=0.053
fear: Prec=0.188 | Rec=0.246 | F1=0.213
happy: Prec=0.785 | Rec=0.573 | F1=0.663
neutral: Prec=0.483 | Rec=0.272 | F1=0.348
sad: Prec=0.332 | Rec=0.439 | F1=0.378
surprise: Prec=0.406 | Rec=0.792 | F1=0.537

Epoch Summary: Train Loss=1.4294 | Val Loss=1.5734 | Train Acc=0.455 | Val Acc=0.395
Epoch time: 176.85 seconds

Epoch 3/15


Training: 100%|██████████| 525/525 [02:27<00:00,  3.56it/s]
Validation: 100%|██████████| 113/113 [00:19<00:00,  5.77it/s]



Validation Report:
angry: Prec=0.305 | Rec=0.189 | F1=0.233
disgust: Prec=0.023 | Rec=0.586 | F1=0.044
fear: Prec=0.294 | Rec=0.034 | F1=0.061
happy: Prec=0.776 | Rec=0.446 | F1=0.567
neutral: Prec=0.582 | Rec=0.118 | F1=0.196
sad: Prec=0.473 | Rec=0.097 | F1=0.161
surprise: Prec=0.340 | Rec=0.859 | F1=0.488

Epoch Summary: Train Loss=1.3071 | Val Loss=2.6365 | Train Acc=0.506 | Val Acc=0.286
Epoch time: 167.29 seconds

Epoch 4/15


Training:   2%|▏         | 11/525 [00:06<05:26,  1.57it/s]

: 

In [None]:
# Train best model for 40 epochs
best_dropout = 0.15  # CHANGE DEPENDING ON YOUR BEST CONFIG
best_lr = 1e-3
best_weight_decay = 5e-5
best_optimizer_name = 'adam'  # or 'sgd'

model = build_model(best_dropout)
if best_optimizer_name == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=best_lr, weight_decay=best_weight_decay)
else:
    optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=0.9, weight_decay=best_weight_decay)
criterion = nn.CrossEntropyLoss()

history = train(model, train_loader, val_loader, optimizer, criterion, num_epochs=40, model_name='mobv2_final_model.pth')
pd.DataFrame(history).to_csv(f'results/history_mobv2_final_model.csv', index=False)
df_metrics = pd.DataFrame(history)
display(df_metrics.head())