In [None]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# ===============================
# 1. Custom Dataset
# ===============================
class SpectrogramDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.labels = []
        self.classes = sorted(os.listdir(root_dir))  # ['bird', 'cat', 'dog']
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}
        self.transform = transform

        for label in self.classes:
            csv_files = glob.glob(os.path.join(root_dir, label, "*.csv"))
            for f in csv_files:
                self.samples.append(f)
                self.labels.append(self.class_to_idx[label])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path = self.samples[idx]
        label = self.labels[idx]

        # Load spectrogram from CSV
        spec = pd.read_csv(path, header=None).values.astype(np.float32)

        # Normalize to 0..1
        spec = (spec - spec.min()) / (spec.max() - spec.min() + 1e-8)

        # Add channel dimension for CNN: [1, H, W]
        spec = torch.tensor(spec).unsqueeze(0)

        return spec, label

# ===============================
# 2. Simple CNN model
# ===============================
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.AdaptiveAvgPool2d((4,4))  # compress to fixed size
        )
        self.fc = nn.Linear(64*4*4, num_classes)

    def forward(self, x):
        x = self.net(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# ===============================
# 3. Training & Evaluation Loop
# ===============================
def train_model(data_root, epochs=10, batch_size=16, lr=1e-3):
    # Dataset split
    dataset = SpectrogramDataset(data_root)
    train_idx, val_idx = train_test_split(range(len(dataset)), test_size=0.2, stratify=dataset.labels, random_state=42)

    train_set = torch.utils.data.Subset(dataset, train_idx)
    val_set = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size)

    # Model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = CNNClassifier(num_classes=len(dataset.classes)).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training loop
    for epoch in range(epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, pred = out.max(1)
            total += y.size(0)
            correct += pred.eq(y).sum().item()

        acc = 100. * correct / total

        # Validation
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                out = model(x)
                _, pred = out.max(1)
                val_total += y.size(0)
                val_correct += pred.eq(y).sum().item()
        val_acc = 100. * val_correct / val_total

        print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss/len(train_loader):.4f} - Train Acc: {acc:.2f}% - Val Acc: {val_acc:.2f}%")

    return model, dataset.classes

# ===============================
# 4. Run training
# ===============================
if __name__ == "__main__":
    data_root = r"\processed_data"
    model, classes = train_model(data_root, epochs=15, batch_size=32, lr=1e-3)

    # Save model
    torch.save({"model": model.state_dict(), "classes": classes}, "spectrogram_classifier.pth")
    print("✅ Model saved to spectrogram_classifier.pth")


Epoch 1/15 - Loss: 1.0860 - Train Acc: 37.11% - Val Acc: 51.19%
Epoch 2/15 - Loss: 0.8570 - Train Acc: 60.99% - Val Acc: 80.00%
Epoch 3/15 - Loss: 0.4357 - Train Acc: 83.23% - Val Acc: 91.90%
Epoch 4/15 - Loss: 0.2570 - Train Acc: 91.66% - Val Acc: 94.17%
Epoch 5/15 - Loss: 0.1916 - Train Acc: 93.45% - Val Acc: 94.05%
Epoch 6/15 - Loss: 0.1495 - Train Acc: 94.88% - Val Acc: 92.62%
Epoch 7/15 - Loss: 0.1393 - Train Acc: 95.29% - Val Acc: 95.24%
Epoch 8/15 - Loss: 0.1067 - Train Acc: 96.81% - Val Acc: 91.90%
Epoch 9/15 - Loss: 0.1145 - Train Acc: 96.66% - Val Acc: 92.86%
Epoch 10/15 - Loss: 0.0878 - Train Acc: 97.38% - Val Acc: 93.10%
Epoch 11/15 - Loss: 0.0838 - Train Acc: 97.53% - Val Acc: 97.86%
Epoch 12/15 - Loss: 0.0692 - Train Acc: 97.92% - Val Acc: 97.50%
Epoch 13/15 - Loss: 0.0646 - Train Acc: 98.24% - Val Acc: 97.86%
Epoch 14/15 - Loss: 0.0657 - Train Acc: 98.03% - Val Acc: 98.21%
Epoch 15/15 - Loss: 0.0647 - Train Acc: 98.15% - Val Acc: 98.33%
✅ Model saved to spectrogram_class