In [2]:
import numpy as np
import os
import tensorflow as tf
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, models, transforms
import torch.nn.init as init
import matplotlib.pyplot as plt
from IPython.display import Audio
import librosa.display
import librosa
import zipfile
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from torchvision import datasets, models, transforms
from torch.optim import lr_scheduler
import time
from PIL import Image
import copy
from collections import Counter
import cv2
torch.cuda.empty_cache()

In [1]:
from melspecdataset import MelSpecDataset, normalize_by_255

In [12]:
%pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl (35.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.4/35.4 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.9.0.80
Note: you may need to restart the kernel to use updated packages.


In [3]:
transform = transforms.Compose([
    # transforms.Resize((224, 224)),  # Optionally resize images
    transforms.ToTensor(),            # Convert images to tensors
    normalize_by_255        # Normalize by dividing by 255
])


# Define the mapping from class names to class indices
class_mapping = {
    'car_horn': 1,
    'dog_barking': 2,
    'drilling': 3,
    'Fart': 4,
    'Guitar': 5,
    'Gunshot_and_gunfire': 6,
    'Hi-hat': 7,
    'Knock': 8,
    'Laughter': 9,
    'Shatter': 10,
    'siren': 11,
    'Snare_drum': 12,
    'Splash_and_splatter': 13
}

# Define the directories
train_directory = "train"
val_directory = "val"

# Create datasets
train_dataset = MelSpecDataset(train_directory, class_mapping, transform)
val_dataset = MelSpecDataset(val_directory, class_mapping, transform)

# Create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


datasets = {"train": train_dataset, "val": val_dataset}
dataloaders = {"train": train_dataloader, "val": val_dataloader}
dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}

In [4]:
train_dataset.class_data, sum(train_dataset.class_data.values())

({'Snare_drum': 1000,
  'siren': 1000,
  'Hi-hat': 1000,
  'Gunshot_and_gunfire': 1000,
  'car_horn': 1000,
  'drilling': 1000,
  'Guitar': 1000,
  'Fart': 1000,
  'Laughter': 1000,
  'Splash_and_splatter': 1000,
  'dog_barking': 1000,
  'Shatter': 1000,
  'Knock': 1000},
 13000)

In [5]:
class CNNModel(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(CNNModel, self).__init__()
        self.input_shape = input_shape
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Calculate input size for fully connected layers
        self.fc_input_size = self._calculate_fc_input_size()

        self.fc1 = nn.Linear(self.fc_input_size, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def _calculate_fc_input_size(self):
        # Calculate the size of the flattened output after convolution and pooling
        with torch.no_grad():
            x = torch.zeros(1, 3, self.input_shape[0], self.input_shape[1])  # Create dummy input tensor
            x = self.pool(nn.functional.relu(self.conv1(x)))
            x = self.pool(nn.functional.relu(self.conv2(x)))
            x = self.pool(nn.functional.relu(self.conv3(x)))
            return x.view(1, -1).shape[1]

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = self.pool(nn.functional.relu(self.conv3(x)))
        x = x.view(-1, self.fc_input_size)  # Flatten the output for fully connected layers
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [6]:
def save_model(model, model_name):
  torch.save(model.state_dict(), f'{model_name}_weights.pth')
  torch.save(model, f'{model_name}.pth')

In [7]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        print("HELLO")

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            index = 0
            print("STARTING ITERATION")
            for inputs, labels in dataloaders[phase]:
              print("BATCH NUMBER = ", index)
              # inputs = inputs.to(device)
              # labels = labels.to(device)

              index += 1
              optimizer.zero_grad()
              with torch.set_grad_enabled(phase == 'train'):
                  outputs = model(inputs)
                  _, preds = torch.max(outputs, 1)
                  loss = criterion(outputs, labels)

                  if phase == 'train':
                      loss.backward()
                      optimizer.step()

              running_loss += loss.item() * inputs.size(0)
              running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
              print("STEPPING SCEHEDULER")
              scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'EPOCH: {epoch} {phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model weights for the model which has the highest acc.
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [8]:
# Define input shape and number of classes
input_shape = (128, 345, 3)
num_classes = 13  # Assuming 14 output classes

# Instantiate the model
model_ft = CNNModel(input_shape, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.ExponentialLR(optimizer_ft, gamma=0.9)

In [23]:
model_ft = model_ft.to(device)

In [None]:
# [SimplifiedResNet] AUGMENTED [& ENSURED VAL HAS BEEN PREPROCESSED]
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=10)

Epoch 0/9
----------
HELLO
STARTING ITERATION
BATCH NUMBER =  0
BATCH NUMBER =  1
BATCH NUMBER =  2
BATCH NUMBER =  3
BATCH NUMBER =  4
BATCH NUMBER =  5
BATCH NUMBER =  6
BATCH NUMBER =  7
BATCH NUMBER =  8
BATCH NUMBER =  9
BATCH NUMBER =  10
BATCH NUMBER =  11
BATCH NUMBER =  12
BATCH NUMBER =  13
BATCH NUMBER =  14
BATCH NUMBER =  15
BATCH NUMBER =  16
BATCH NUMBER =  17
BATCH NUMBER =  18
BATCH NUMBER =  19
BATCH NUMBER =  20
BATCH NUMBER =  21
BATCH NUMBER =  22
BATCH NUMBER =  23
BATCH NUMBER =  24
BATCH NUMBER =  25
BATCH NUMBER =  26
BATCH NUMBER =  27
BATCH NUMBER =  28
BATCH NUMBER =  29
BATCH NUMBER =  30
BATCH NUMBER =  31
BATCH NUMBER =  32
BATCH NUMBER =  33
BATCH NUMBER =  34
BATCH NUMBER =  35
BATCH NUMBER =  36
BATCH NUMBER =  37
BATCH NUMBER =  38
BATCH NUMBER =  39
BATCH NUMBER =  40
BATCH NUMBER =  41
BATCH NUMBER =  42
BATCH NUMBER =  43
BATCH NUMBER =  44
BATCH NUMBER =  45
BATCH NUMBER =  46
BATCH NUMBER =  47
BATCH NUMBER =  48
BATCH NUMBER =  49
BATCH NUMBER =