In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Install necessary packages (YOLOv8 and torchsummary for model summary)
!pip install -q ultralytics torchsummary

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m104.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# Imports and Device Setup
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchsummary import summary
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
from tqdm import tqdm
from PIL import Image
import random
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [5]:
# Load Pretrained YOLOv8s‑CLS v8.0 via ultralytics
# This pulls both the architecture and weights for the ~7 M‑parameter classification network.
from ultralytics import YOLO

# The string 'yolov8s-cls.pt' will download v8.0's official classification checkpoint automatically.
hub_model = YOLO('yolov8s-cls.pt').to(device)
hub_model.model.eval()
print("Successfully loaded YOLOv8s‑CLS via ultralytics.")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s-cls.pt to 'yolov8s-cls.pt'...


100%|██████████| 12.3M/12.3M [00:00<00:00, 48.7MB/s]


Successfully loaded YOLOv8s‑CLS via ultralytics.


In [6]:
# Inspect the internal Classify block to locate the final Linear layer
# We need to replace that final “Linear(in_features=1280, out_features=1000)” with “Linear(1280→4)”.
print(hub_model.model)

ClassificationModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C2f(
      (cv1): Conv(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
   

In [7]:
# Replace the final “Linear(in_features=1280, out_features=1000)” with “Linear(1280→4)”
# In YOLOv8, hub_model.model refers directly to a ClassificationModel instance.
# Its final linear layer is at hub_model.model.model[-1].linear
classify_block = hub_model.model.model[-1]               # This is the final Classify() module
in_features    = classify_block.linear.in_features       # Should be 1280
print(f"Replacing final Linear: in_features = {in_features}, out_features = 4")
classify_block.linear = nn.Linear(in_features, 4)        # Now outputs 4 classes
hub_model = hub_model.to(device)


Replacing final Linear: in_features = 1280, out_features = 4


In [8]:
# Display a summary to confirm ~ 7 M parameters and final head output = 4
summary(hub_model.model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
              SiLU-4         [-1, 32, 112, 112]               0
              SiLU-5         [-1, 32, 112, 112]               0
              SiLU-6         [-1, 32, 112, 112]               0
              SiLU-7         [-1, 32, 112, 112]               0
              SiLU-8         [-1, 32, 112, 112]               0
              SiLU-9         [-1, 32, 112, 112]               0
             SiLU-10         [-1, 32, 112, 112]               0
             SiLU-11         [-1, 32, 112, 112]               0
             SiLU-12         [-1, 32, 112, 112]               0
             SiLU-13         [-1, 32, 112, 112]               0
             SiLU-14         [-1, 32, 1

In [9]:
# Data paths and transforms
data_dir  = "/content/drive/MyDrive/spectrograms_split"
train_dir = os.path.join(data_dir, "train")
val_dir   = os.path.join(data_dir, "val")
test_dir  = os.path.join(data_dir, "test")

for path in [train_dir, val_dir, test_dir]:
    assert os.path.isdir(path), f"Directory not found: {path}"

In [10]:
# Resize to 224×224 → convert to tensor → normalize with ImageNet stats
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    )
])

In [11]:
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
val_dataset   = datasets.ImageFolder(val_dir,   transform=transform)
test_dataset  = datasets.ImageFolder(test_dir,  transform=transform)

print("Classes:", train_dataset.classes)  # e.g. ['mild','moderate','normal','severe']
num_classes = len(train_dataset.classes)

Classes: ['mild', 'moderate', 'normal', 'severe']


In [12]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False, num_workers=2)

In [13]:
model = hub_model.model
for param in model.parameters():
    param.requires_grad = True

In [14]:
# Set up Loss, Optimizer, and Learning‑Rate Scheduler
criterion    = nn.CrossEntropyLoss()
optimizer    = torch.optim.Adam(model.parameters(), lr=1e-4)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [15]:
# Training + Validation Loop (20 Epochs)
num_epochs = 20
for epoch in range(num_epochs):
    # Train Phase
    model.train()
    running_loss = 0.0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)       # shape: (batch, 4)
        loss    = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_train_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1:2d} Train Loss: {avg_train_loss:.4f}")
    lr_scheduler.step()

    # Validation Phase
    model.eval()
    val_loss    = 0.0
    correct_val = 0
    total_val   = 0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]"):
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs,  dim=1)
            correct_val += (preds == labels).sum().item()
            total_val   += labels.size(0)
    avg_val_loss = val_loss / len(val_loader)
    val_acc      = 100.0 * correct_val / total_val
    print(f"Epoch {epoch+1:2d} Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.2f}%\n")

print("Training complete.")

Epoch 1/20 [Train]:  31%|███       | 140/453 [28:30<1:03:43, 12.22s/it]


KeyboardInterrupt: 

In [None]:
# Test‑Set Evaluation
model.eval()
all_preds    = []
all_labels   = []
correct_test = 0
total_test   = 0

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="Testing"):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, preds = torch.max(outputs, 1)
        correct_test += (preds == labels).sum().item()
        total_test   += labels.size(0)
        all_preds.append(preds.cpu())
        all_labels.append(labels.cpu())

test_acc = 100.0 * correct_test / total_test
print(f"\nTest Accuracy: {test_acc:.2f}%")

all_preds  = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

precision_test = precision_score(all_labels, all_preds, average="macro", zero_division=0)
recall_test    = recall_score(all_labels, all_preds, average="macro", zero_division=0)
f1_test        = f1_score(all_labels, all_preds, average="macro", zero_division=0)

print(f"Test Precision (macro): {precision_test:.4f}")
print(f"Test Recall    (macro): {recall_test:.4f}")
print(f"Test F1‑Score  (macro): {f1_test:.4f}\n")

print("Test: Per‑class Precision / Recall / F1:\n")
print(classification_report(
    all_labels,
    all_preds,
    target_names=test_dataset.classes,
    zero_division=0
))

cm = confusion_matrix(all_labels, all_preds)
print("Test Confusion Matrix (rows=true, cols=predicted):\n", cm)