In [1]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. Install necessary packages (YOLOv8 and torchsummary for model summary)
!pip install -q ultralytics torchsummary

# 3. Imports and Device Setup
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchsummary import summary
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
from tqdm import tqdm
from PIL import Image
import random
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# 4. Load Pretrained YOLOv8s‑CLS v8.0 via ultralytics
from ultralytics import YOLO

# 'yolov8s-cls.pt' will automatically download v8.0’s official classification checkpoint.
hub_model = YOLO('yolov8s-cls.pt').to(device)
hub_model.model.eval()
print("Successfully loaded YOLOv8s‑CLS via ultralytics.\n")

# 5. Inspect the ClassificationModel to find its final Linear layer
print("--- hub_model.model (ClassificationModel) structure ---")
print(hub_model.model)

# 6. Replace the final “Linear(in_features=1280, out_features=1000)” with “Linear(1280→4)”
#    In YOLOv8, hub_model.model is a ClassificationModel. Its last module is a Classify(...) block.
#    We locate that final .linear and swap it out for nn.Linear(1280, 4).
classify_block = hub_model.model.model[-1]         # final Classify(...) module
in_features    = classify_block.linear.in_features  # should be 1280
print(f"\nReplacing final Linear: in_features = {in_features}, out_features = 4")
classify_block.linear = nn.Linear(in_features, 4).to(device)

# Now grab the raw nn.Sequential that does exactly “backbone → head → final 4‐way linear”
classifier = hub_model.model.model.to(device)

# 7. Display a summary to confirm ~ 7 M params and final head output = 4
print("\n--- Model summary (YOLOv8s‑CLS backbone + new 4‑class head) ---")
# summary() expects a single nn.Module. We pass classifier, which is the core nn.Sequential.
summary(classifier, input_size=(3, 224, 224))

# 8. Data Paths and Transforms
data_dir  = "/content/drive/MyDrive/spectrograms_split"
train_dir = os.path.join(data_dir, "train")
val_dir   = os.path.join(data_dir, "val")
test_dir  = os.path.join(data_dir, "test")

for path in [train_dir, val_dir, test_dir]:
    assert os.path.isdir(path), f"Directory not found: {path}"

# Resize → ToTensor → Normalize (ImageNet stats)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    )
])

train_dataset = datasets.ImageFolder(train_dir, transform=transform)
val_dataset   = datasets.ImageFolder(val_dir,   transform=transform)
test_dataset  = datasets.ImageFolder(test_dir,  transform=transform)

print("\nClasses:", train_dataset.classes)  # e.g. ['mild','moderate','normal','severe']
num_classes = len(train_dataset.classes)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False, num_workers=2)

# 9. Ensure all parameters in `classifier` are trainable
for param in classifier.parameters():
    param.requires_grad = True

# 10. Set up Loss, Optimizer, and Learning‑Rate Scheduler
criterion    = nn.CrossEntropyLoss()
optimizer    = torch.optim.Adam(classifier.parameters(), lr=1e-4)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# 11. Training + Validation Loop (20 Epochs)
num_epochs = 20
for epoch in range(num_epochs):
    # — Train Phase —
    classifier.train()
    running_loss = 0.0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        imgs, labels = imgs.to(device), labels.to(device)

        outputs = classifier(imgs)        # shape = (batch_size, 4)
        loss    = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1:2d} Train Loss: {avg_train_loss:.4f}")
    lr_scheduler.step()

    # — Validation Phase —
    classifier.eval()
    val_loss    = 0.0
    correct_val = 0
    total_val   = 0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]"):
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = classifier(imgs)    # shape = (batch_size, 4)
            loss    = criterion(outputs, labels)
            val_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)
            correct_val += (preds == labels).sum().item()
            total_val   += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    val_acc      = 100.0 * correct_val / total_val
    print(f"Epoch {epoch+1:2d} Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.2f}%\n")

print("Training complete")

# 12. Test‑Set Evaluation
classifier.eval()
all_preds    = []
all_labels   = []
correct_test = 0
total_test   = 0

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="Testing"):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = classifier(imgs)      # shape = (batch_size, 4)
        _, preds = torch.max(outputs, dim=1)
        correct_test += (preds == labels).sum().item()
        total_test   += labels.size(0)
        all_preds.append(preds.cpu())
        all_labels.append(labels.cpu())

test_acc = 100.0 * correct_test / total_test
print(f"\nTest Accuracy: {test_acc:.2f}%")

all_preds  = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

precision_test = precision_score(all_labels, all_preds, average="macro", zero_division=0)
recall_test    = recall_score(all_labels, all_preds, average="macro", zero_division=0)
f1_test        = f1_score(all_labels, all_preds, average="macro", zero_division=0)

print(f"Test Precision (macro): {precision_test:.4f}")
print(f"Test Recall    (macro): {recall_test:.4f}")
print(f"Test F1‑Score  (macro): {f1_test:.4f}\n")

print("Test: Per‑class Precision / Recall / F1:\n")
print(classification_report(
    all_labels,
    all_preds,
    target_names=test_dataset.classes,
    zero_division=0
))

cm = confusion_matrix(all_labels, all_preds)
print("Test Confusion Matrix (rows=true, cols=predicted):\n", cm)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cuda
Successfully loaded YOLOv8s‑CLS via ultralytics.

--- hub_model.model (ClassificationModel) structure ---
ClassificationModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C2f(
      (cv1): Conv(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inpl

Epoch 1/20 [Train]: 100%|██████████| 453/453 [05:04<00:00,  1.49it/s]


Epoch  1 Train Loss: 1.0399


Epoch 1/20 [Val]:   0%|          | 0/97 [00:01<?, ?it/s]


TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple