# EfficientNet-V2-M Hair Type Classifier

This notebook trains a hair type classifier using EfficientNet-V2-M with mixed precision training.

## Setup Instructions
1. **Enable GPU**: Go to `Runtime` ‚Üí `Change runtime type` ‚Üí Select `GPU` (T4 or better)
2. **Upload your data** to Google Drive in this structure:
```
MyDrive/
‚îî‚îÄ‚îÄ hair_data/
    ‚îî‚îÄ‚îÄ segmented/
        ‚îú‚îÄ‚îÄ 1/
        ‚îú‚îÄ‚îÄ 2a/
        ‚îú‚îÄ‚îÄ 2b/
        ‚îî‚îÄ‚îÄ ...
```

## 1. Mount Google Drive & Check GPU

In [12]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
# Check GPU availability
import torch

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"‚úÖ GPU Available: {gpu_name}")
    print(f"   Memory: {gpu_memory:.1f} GB")
else:
    print("‚ùå No GPU detected! Go to Runtime ‚Üí Change runtime type ‚Üí GPU")
    raise RuntimeError("GPU required for training")

‚úÖ GPU Available: Tesla T4
   Memory: 15.8 GB


## 2. Configuration

‚ö†Ô∏è **Update `DATA_DIR` to match your Google Drive path!**

In [14]:
import os
import time
import shutil
import numpy as np
from torch import nn
from torch.optim import AdamW
from torch.amp import autocast, GradScaler
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms, datasets
from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from tqdm.notebook import tqdm

# ============================================================
# CONFIGURATION - UPDATE THESE PATHS!
# ============================================================
DATA_DIR = "/content/drive/MyDrive/hair_data/segmented/"  # ‚Üê Update this!
OUTPUT_DIR = "/content/split/"                            # Local split folder
CHECKPOINT_DIR = "/content/drive/MyDrive/hair_data/checkpoints/"  # Save to Drive

# Training parameters (optimized for T4 16GB with V2-M)
IMG_SIZE = 600
BATCH_SIZE = 8
ACCUMULATION_STEPS = 4  # Effective batch size = 32
EPOCHS = 20
LR = 3e-4

# Data split ratios
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

# Resume training from checkpoint?
RESUME_FROM_CHECKPOINT = True

# Create directories
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [15]:
# Verify data directory exists
if not os.path.exists(DATA_DIR):
    print(f"‚ùå Data directory not found: {DATA_DIR}")
    print("\nPlease update DATA_DIR in the cell above to match your Google Drive path.")
    raise FileNotFoundError(f"Data directory not found: {DATA_DIR}")
else:
    classes = [d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))]
    print(f"‚úÖ Found data directory with {len(classes)} classes: {sorted(classes)}")

‚úÖ Found data directory with 10 classes: ['1', '2a', '2b', '2c', '3a', '3b', '3c', '4a', '4b', '4c']


## 3. Split Dataset into Train/Val/Test

In [16]:
def split_dataset(source_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """
    Split images from class folders into train/val/test structure.
    """
    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-5, "Ratios must sum to 1"

    # Create output directories
    for split in ['train', 'val', 'test']:
        split_path = os.path.join(output_dir, split)
        if os.path.exists(split_path):
            shutil.rmtree(split_path)
        os.makedirs(split_path)

    # Get class folders
    classes = [d for d in os.listdir(source_dir)
               if os.path.isdir(os.path.join(source_dir, d))]
    classes.sort()
    print(f"Found {len(classes)} classes: {classes}")

    stats = defaultdict(lambda: defaultdict(int))

    for cls in classes:
        cls_path = os.path.join(source_dir, cls)
        images = [f for f in os.listdir(cls_path)
                  if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

        if len(images) < 3:
            print(f"‚ö†Ô∏è  Warning: Class '{cls}' has only {len(images)} images")
            continue

        # Split: first into train and temp, then temp into val and test
        train_imgs, temp_imgs = train_test_split(
            images, train_size=train_ratio, random_state=42, shuffle=True
        )
        relative_val = val_ratio / (val_ratio + test_ratio)
        val_imgs, test_imgs = train_test_split(
            temp_imgs, train_size=relative_val, random_state=42, shuffle=True
        )

        # Copy images to respective folders
        for split, img_list in [('train', train_imgs), ('val', val_imgs), ('test', test_imgs)]:
            split_cls_path = os.path.join(output_dir, split, cls)
            os.makedirs(split_cls_path, exist_ok=True)
            for img in img_list:
                src = os.path.join(cls_path, img)
                dst = os.path.join(split_cls_path, img)
                shutil.copy2(src, dst)
            stats[cls][split] = len(img_list)

    # Print statistics
    print("\n" + "="*50)
    print("Dataset Split Statistics")
    print("="*50)
    print(f"{'Class':<10} {'Train':<10} {'Val':<10} {'Test':<10} {'Total':<10}")
    print("-" * 50)
    total_train, total_val, total_test = 0, 0, 0
    for cls in classes:
        total = stats[cls]['train'] + stats[cls]['val'] + stats[cls]['test']
        total_train += stats[cls]['train']
        total_val += stats[cls]['val']
        total_test += stats[cls]['test']
        print(f"{cls:<10} {stats[cls]['train']:<10} {stats[cls]['val']:<10} {stats[cls]['test']:<10} {total:<10}")
    print("-" * 50)
    print(f"{'TOTAL':<10} {total_train:<10} {total_val:<10} {total_test:<10} {total_train+total_val+total_test:<10}")

    return classes

In [17]:
# Run the split
print("=== Splitting Dataset ===")
CLASS_NAMES = split_dataset(DATA_DIR, OUTPUT_DIR, TRAIN_RATIO, VAL_RATIO, TEST_RATIO)

=== Splitting Dataset ===
Found 10 classes: ['1', '2a', '2b', '2c', '3a', '3b', '3c', '4a', '4b', '4c']

Dataset Split Statistics
Class      Train      Val        Test       Total     
--------------------------------------------------
1          1297       278        278        1853      
2a         1511       324        324        2159      
2b         839        180        180        1199      
2c         838        180        180        1198      
3a         836        179        180        1195      
3b         848        182        182        1212      
3c         969        208        208        1385      
4a         1209       259        260        1728      
4b         1402       301        301        2004      
4c         1614       346        347        2307      
--------------------------------------------------
TOTAL      11363      2437       2440       16240     


## 4. Create Data Loaders

In [18]:
# Transforms (minimal augmentation since data is already augmented)
train_tfms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_tfms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Create datasets
train_ds = datasets.ImageFolder(os.path.join(OUTPUT_DIR, "train"), train_tfms)
val_ds = datasets.ImageFolder(os.path.join(OUTPUT_DIR, "val"), val_tfms)
test_ds = datasets.ImageFolder(os.path.join(OUTPUT_DIR, "test"), val_tfms)

num_classes = len(train_ds.classes)
print(f"\n‚úÖ Detected {num_classes} classes: {train_ds.classes}")
print(f"   Train: {len(train_ds)} images")
print(f"   Val:   {len(val_ds)} images")
print(f"   Test:  {len(test_ds)} images")

# Create data loaders
train_loader = torch.utils.data.DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
    val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
test_loader = torch.utils.data.DataLoader(
    test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)


‚úÖ Detected 10 classes: ['1', '2a', '2b', '2c', '3a', '3b', '3c', '4a', '4b', '4c']
   Train: 11363 images
   Val:   2437 images
   Test:  2440 images


## 5. Model Setup

In [19]:
# Load pretrained EfficientNet-V2-M
print("Loading EfficientNet-V2-M with ImageNet weights...")
weights = EfficientNet_V2_M_Weights.IMAGENET1K_V1
model = efficientnet_v2_m(weights=weights)

# Replace classifier head for our number of classes
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model.to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n‚úÖ Model loaded successfully")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")

# Check memory usage
torch.cuda.empty_cache()
print(f"   GPU Memory allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")

Loading EfficientNet-V2-M with ImageNet weights...
Downloading: "https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_m-dc08266a.pth


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 208M/208M [00:01<00:00, 169MB/s]



‚úÖ Model loaded successfully
   Total parameters: 52,871,166
   Trainable parameters: 52,871,166
   GPU Memory allocated: 0.22 GB


## 6. Training Functions

In [20]:
def validate(loader, desc="Validating"):
    """Compute loss and accuracy on a data loader."""
    model.eval()
    total, correct = 0, 0
    running_loss = 0
    all_preds = []
    all_labels = []

    pbar = tqdm(loader, desc=desc, leave=False)

    with torch.no_grad():
        for imgs, labels in pbar:
            imgs, labels = imgs.to(device), labels.to(device)
            with autocast('cuda'):
                outputs = model(imgs)
                loss = criterion(outputs, labels)

            running_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += len(labels)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            pbar.set_postfix({'acc': f'{correct/total:.4f}'})

    accuracy = correct / total
    avg_loss = running_loss / len(loader)
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return avg_loss, accuracy, f1, all_preds, all_labels


def train_one_epoch(epoch):
    """Train for one epoch with gradient accumulation."""
    model.train()
    running_loss = 0
    optimizer.zero_grad()

    pbar = tqdm(train_loader, desc=f"Epoch {epoch:02d} Training", leave=False)

    for i, (imgs, labels) in enumerate(pbar):
        imgs, labels = imgs.to(device), labels.to(device)

        with autocast('cuda'):
            outputs = model(imgs)
            loss = criterion(outputs, labels) / ACCUMULATION_STEPS

        scaler.scale(loss).backward()

        if (i + 1) % ACCUMULATION_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        running_loss += loss.item() * ACCUMULATION_STEPS

        pbar.set_postfix({'loss': f'{running_loss/(i+1):.4f}'})

    return running_loss / len(train_loader)

In [21]:
def save_checkpoint(epoch, model, optimizer, scaler, scheduler, best_val_acc, history):
    """Save training checkpoint."""
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scaler_state_dict': scaler.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'best_val_acc': best_val_acc,
        'history': history,
    }
    torch.save(checkpoint, os.path.join(CHECKPOINT_DIR, 'latest_checkpoint.pth'))


def load_checkpoint(model, optimizer, scaler, scheduler):
    """Load training checkpoint if it exists."""
    checkpoint_path = os.path.join(CHECKPOINT_DIR, 'latest_checkpoint.pth')
    if os.path.exists(checkpoint_path) and RESUME_FROM_CHECKPOINT:
        print(f"Found checkpoint at {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path, weights_only=False)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scaler.load_state_dict(checkpoint['scaler_state_dict'])
        if 'scheduler_state_dict' in checkpoint:
            scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        best_val_acc = checkpoint['best_val_acc']
        history = checkpoint['history']
        print(f"‚úÖ Resumed from epoch {checkpoint['epoch']} (best val acc: {best_val_acc:.4f})")
        return start_epoch, best_val_acc, history

    print("Starting fresh training (no checkpoint found or resume disabled)")
    return 1, 0.0, {'train_loss': [], 'val_loss': [], 'val_acc': [], 'val_f1': [], 'lr': []}

## 7. Training Loop

In [22]:
# Setup optimizer, scaler, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=LR)
scaler = GradScaler('cuda')
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)

# Load checkpoint if available
start_epoch, best_val_acc, history = load_checkpoint(model, optimizer, scaler, scheduler)

print(f"\n{'='*60}")
print("Starting Training")
print(f"{'='*60}")
print(f"Model: EfficientNet-V2-M")
print(f"Epochs: {start_epoch} to {EPOCHS}")
print(f"Batch size: {BATCH_SIZE} (effective: {BATCH_SIZE * ACCUMULATION_STEPS})")
print(f"Learning rate: {LR} (with cosine annealing)")
print(f"Image size: {IMG_SIZE}x{IMG_SIZE}")
print(f"{'='*60}\n")

Starting fresh training (no checkpoint found or resume disabled)

Starting Training
Model: EfficientNet-V2-M
Epochs: 1 to 20
Batch size: 8 (effective: 32)
Learning rate: 0.0003 (with cosine annealing)
Image size: 600x600



In [23]:
for epoch in range(start_epoch, EPOCHS + 1):
    start = time.time()

    # Get current learning rate
    current_lr = optimizer.param_groups[0]['lr']

    # Train
    train_loss = train_one_epoch(epoch)

    # Validate
    val_loss, val_acc, val_f1, _, _ = validate(val_loader)

    # Step scheduler
    scheduler.step()

    duration = time.time() - start

    # Save history
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_f1'].append(val_f1)
    history['lr'].append(current_lr)

    # Print epoch summary
    print(f"Epoch {epoch:02d}/{EPOCHS} | "
          f"Train Loss: {train_loss:.4f} | "
          f"Val Loss: {val_loss:.4f} | "
          f"Val Acc: {val_acc:.4f} | "
          f"Val F1: {val_f1:.4f} | "
          f"LR: {current_lr:.2e} | "
          f"Time: {duration:.1f}s")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        ckpt_path = os.path.join(CHECKPOINT_DIR, "best_model.pth")
        torch.save(model.state_dict(), ckpt_path)
        print(f"  ‚úÖ New best model saved! (acc: {best_val_acc:.4f})")

    # Save checkpoint
    save_checkpoint(epoch, model, optimizer, scaler, scheduler, best_val_acc, history)
    print(f"  üíæ Checkpoint saved")

    # Print GPU memory usage
    print(f"  üìä GPU Memory: {torch.cuda.max_memory_allocated()/1e9:.1f}GB / {gpu_memory:.1f}GB")

print(f"\n{'='*60}")
print(f"Training Complete!")
print(f"Best validation accuracy: {best_val_acc:.4f}")
print(f"{'='*60}")

Epoch 01 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 01/20 | Train Loss: 1.6956 | Val Loss: 1.5616 | Val Acc: 0.4103 | Val F1: 0.3795 | LR: 3.00e-04 | Time: 837.9s
  ‚úÖ New best model saved! (acc: 0.4103)
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 02 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 02/20 | Train Loss: 1.5649 | Val Loss: 1.5513 | Val Acc: 0.3874 | Val F1: 0.3441 | LR: 2.98e-04 | Time: 829.2s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 03 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 03/20 | Train Loss: 1.4863 | Val Loss: 1.5337 | Val Acc: 0.3964 | Val F1: 0.3736 | LR: 2.93e-04 | Time: 831.3s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 04 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 04/20 | Train Loss: 1.4252 | Val Loss: 1.5638 | Val Acc: 0.3943 | Val F1: 0.3550 | LR: 2.84e-04 | Time: 832.2s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 05 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 05/20 | Train Loss: 1.3528 | Val Loss: 1.5234 | Val Acc: 0.4218 | Val F1: 0.3730 | LR: 2.71e-04 | Time: 833.8s
  ‚úÖ New best model saved! (acc: 0.4218)
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 06 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 06/20 | Train Loss: 1.2782 | Val Loss: 1.4817 | Val Acc: 0.4177 | Val F1: 0.3933 | LR: 2.56e-04 | Time: 834.6s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 07 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 07/20 | Train Loss: 1.1889 | Val Loss: 1.5499 | Val Acc: 0.4144 | Val F1: 0.3958 | LR: 2.38e-04 | Time: 835.5s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 08 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 08/20 | Train Loss: 1.0838 | Val Loss: 1.5886 | Val Acc: 0.4091 | Val F1: 0.4039 | LR: 2.18e-04 | Time: 834.2s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 09 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 09/20 | Train Loss: 0.9692 | Val Loss: 1.6488 | Val Acc: 0.4099 | Val F1: 0.3907 | LR: 1.97e-04 | Time: 834.4s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 10 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 10/20 | Train Loss: 0.8678 | Val Loss: 1.7500 | Val Acc: 0.4091 | Val F1: 0.3837 | LR: 1.74e-04 | Time: 834.1s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 11 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 11/20 | Train Loss: 0.7689 | Val Loss: 1.7956 | Val Acc: 0.4149 | Val F1: 0.4034 | LR: 1.50e-04 | Time: 834.2s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 12 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 12/20 | Train Loss: 0.6740 | Val Loss: 1.9441 | Val Acc: 0.4021 | Val F1: 0.3997 | LR: 1.27e-04 | Time: 834.2s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 13 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 13/20 | Train Loss: 0.5862 | Val Loss: 2.0538 | Val Acc: 0.4066 | Val F1: 0.4067 | LR: 1.04e-04 | Time: 834.8s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 14 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 14/20 | Train Loss: 0.5253 | Val Loss: 2.2629 | Val Acc: 0.4042 | Val F1: 0.4028 | LR: 8.26e-05 | Time: 834.2s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 15 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 15/20 | Train Loss: 0.4645 | Val Loss: 2.4180 | Val Acc: 0.3943 | Val F1: 0.3946 | LR: 6.26e-05 | Time: 834.0s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 16 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 16/20 | Train Loss: 0.4248 | Val Loss: 2.4774 | Val Acc: 0.3952 | Val F1: 0.3982 | LR: 4.48e-05 | Time: 832.7s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 17 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 17/20 | Train Loss: 0.3816 | Val Loss: 2.6881 | Val Acc: 0.3989 | Val F1: 0.3996 | LR: 2.96e-05 | Time: 833.8s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 18 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 18/20 | Train Loss: 0.3618 | Val Loss: 2.7387 | Val Acc: 0.4021 | Val F1: 0.4012 | LR: 1.73e-05 | Time: 833.3s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 19 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 19/20 | Train Loss: 0.3369 | Val Loss: 2.8737 | Val Acc: 0.4013 | Val F1: 0.3987 | LR: 8.32e-06 | Time: 823.4s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB


Epoch 20 Training:   0%|          | 0/1421 [00:00<?, ?it/s]

Validating:   0%|          | 0/305 [00:00<?, ?it/s]

Epoch 20/20 | Train Loss: 0.3328 | Val Loss: 2.9650 | Val Acc: 0.4001 | Val F1: 0.3982 | LR: 2.84e-06 | Time: 823.9s
  üíæ Checkpoint saved
  üìä GPU Memory: 8.5GB / 15.8GB

Training Complete!
Best validation accuracy: 0.4218


## 8. Evaluate on Test Set

In [24]:
print("=== Evaluating on Test Set ===")

# Load best model
best_model_path = os.path.join(CHECKPOINT_DIR, "best_model.pth")
if os.path.exists(best_model_path):
    model.load_state_dict(torch.load(best_model_path, weights_only=True))
    print(f"Loaded best model from {best_model_path}")
else:
    print("Using current model (no best_model.pth found)")

test_loss, test_acc, test_f1, test_preds, test_labels = validate(test_loader, desc="Testing")

print(f"\n{'='*40}")
print(f"Test Results")
print(f"{'='*40}")
print(f"Test Loss:     {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f} ({test_acc*100:.1f}%)")
print(f"Test F1 Score: {test_f1:.4f}")
print(f"{'='*40}")

=== Evaluating on Test Set ===


RuntimeError: Error(s) in loading state_dict for EfficientNet:
	Missing key(s) in state_dict: "classifier.1.weight", "classifier.1.bias". 
	Unexpected key(s) in state_dict: "classifier.1.fc.weight", "classifier.1.fc.bias". 

In [None]:
# Classification Report
print("\n=== Classification Report ===")
print(classification_report(test_labels, test_preds, target_names=CLASS_NAMES))

In [None]:
# Confusion Matrix
cm = confusion_matrix(test_labels, test_preds)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - Hair Type Classification')
plt.tight_layout()
plt.savefig(os.path.join(CHECKPOINT_DIR, 'confusion_matrix.png'), dpi=150)
plt.show()
print(f"Saved to {CHECKPOINT_DIR}/confusion_matrix.png")

## 9. Plot Training History

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

epochs_range = range(1, len(history['train_loss']) + 1)

# Loss
axes[0, 0].plot(epochs_range, history['train_loss'], label='Train Loss', marker='o')
axes[0, 0].plot(epochs_range, history['val_loss'], label='Val Loss', marker='o')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('Training vs Validation Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Accuracy
axes[0, 1].plot(epochs_range, history['val_acc'], label='Val Accuracy', color='green', marker='o')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].set_title('Validation Accuracy')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# F1 Score
axes[1, 0].plot(epochs_range, history['val_f1'], label='Val F1', color='orange', marker='o')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('F1 Score')
axes[1, 0].set_title('Validation F1 Score')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Learning Rate
axes[1, 1].plot(epochs_range, history['lr'], label='Learning Rate', color='red', marker='o')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Learning Rate')
axes[1, 1].set_title('Learning Rate Schedule')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].set_yscale('log')

plt.tight_layout()
plt.savefig(os.path.join(CHECKPOINT_DIR, 'training_history.png'), dpi=150)
plt.show()
print(f"Saved to {CHECKPOINT_DIR}/training_history.png")

## 10. Inference Helper

In [None]:
from PIL import Image

def predict_hair_type(image_path, model, class_names, device='cuda'):
    """Predict hair type from an image path."""
    model.eval()

    # Load and transform image
    img = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    img_tensor = transform(img).unsqueeze(0).to(device)

    # Predict
    with torch.no_grad():
        with autocast('cuda'):
            outputs = model(img_tensor)
            probs = torch.softmax(outputs, dim=1)
            pred_idx = outputs.argmax(dim=1).item()

    pred_class = class_names[pred_idx]
    confidence = probs[0, pred_idx].item()

    return pred_class, confidence, probs[0].cpu().numpy()


# Example usage:
# pred_class, conf, all_probs = predict_hair_type("/path/to/image.jpg", model, CLASS_NAMES)
# print(f"Predicted: {pred_class} (confidence: {conf:.2%})")