## Install Dependencies

In [None]:
!pip install -q -r /kaggle/input/spectrogrand-size-reduction/requirements.txt

## Migrate code from HouseX repo

In [1]:
import numpy as np
import torch
import json

class log(object):
    def __init__(self) -> None:
        self.data = {
            'train_loss': [], 'val_loss': [], 'test_loss': [],
            'train_acc': [], 'val_acc': [], 'test_acc': []
        }
        
    def push(self, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc):
        self.data['train_loss'] += [train_loss]
        self.data['val_loss'] += [val_loss]
        self.data['test_loss'] += [test_loss]
        self.data['train_acc'] += [train_acc]
        self.data['val_acc'] += [val_acc]
        self.data['test_acc'] += [test_acc]
        
    def save(self, tar_path):
        with open(tar_path, 'w') as f:
            json.dump(self.data, f)

In [2]:
import random
import os
import json
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, classification_report
from tqdm import tqdm, trange
import librosa
from librosa import display
import matplotlib.pyplot as plt

import torchvision
from torchvision import transforms, models
from torch.utils.tensorboard import SummaryWriter
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import argparse
from datetime import date
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import time 


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

BATCH_SIZE = 8
data_path = "./"
song_types = ['future house', 'bass house', 'progressive house', 'melodic house']
EPOCHS = 20

2024-04-26 11:08:20.232455: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-26 11:08:20.232519: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-26 11:08:20.234120: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
def get_tensors(transform, path='./melspecgrams/', mode=None):
    # Collect data
    image_tensors = []
    label_tensors = []

    spec_dir = os.path.join(path, mode)
    img_list = [ele for ele in os.listdir(spec_dir) if '.jpg' in ele]
    for img in img_list:
        # song_type = img[:img.index('-')] # Expected file name example: bass house-1.jpg
        song_type = img.split('/')[-1].split("_")[0]
        # print(img, song_type)
        img_path = spec_dir + '/' + img
        img_tensor = transform(Image.open(img_path).convert('RGB'))
        image_tensors.append(img_tensor)
        label_tensors.append(song_types.index(song_type))

    return image_tensors, label_tensors


class MelSpectrogramDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # input, target
        return self.images[idx], torch.tensor(self.labels[idx], dtype=torch.long)

In [4]:
def one_epoch(model, loader, mode, device=DEVICE, epoch_id=None):
    def run(mode, device=device):
        criterion = nn.CrossEntropyLoss()
        losses = []
        correct_preds = 0
        length = 0
        for batch in tqdm(loader, desc='Epoch '+str(epoch_id+1)+' '+mode):
            images, labels = batch[0].to(device), batch[1].to(device)
            length += images.shape[0]

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            loss = criterion(outputs, labels)

            correct_preds += torch.sum(preds == labels).item()
            losses.append(loss.item())

            if mode == 'train':
                opt.zero_grad()
                loss.backward()
                opt.step()
        return {'loss': np.mean(losses), 'accuracy': correct_preds / length}

    if mode == 'train':
        opt = optim.SGD(model.parameters(), lr=LR)
        model = model.train()
        return run('train', device)
    else:
        model = model.eval()
        with torch.no_grad():
            return run('test', device)


def train(train_loader, val_loader, test_loader, model, epochs=EPOCHS, device=DEVICE, writer=None, eval_first=True):
    model = model.to(device)
    epoch = -1
    if eval_first:
        evaluate(model, device, val_loader, 'val', epoch)
        evaluate(model, device, test_loader, 'test', epoch)

    cur_log = log()
    for epoch in range(epochs):
        ret = one_epoch(model, train_loader, 'train', device, epoch)
        train_loss = ret['loss']
        train_acc = ret['accuracy']
        print('Epoch {}: '.format(epoch+1))
        print(f"train loss: {train_loss}")
        print(f"train accuracy: {train_acc}")

        val_loss, val_acc = evaluate(model, device, val_loader, 'val', epoch)
        test_loss, test_acc = evaluate(model, device, test_loader, 'test', epoch)
        if writer:
            writer.add_scalar('loss/train', train_loss, epoch)
            writer.add_scalar('accuracy/train', train_acc, epoch)
            writer.add_scalar('loss/val', val_loss, epoch)
            writer.add_scalar('accuracy/val', val_acc, epoch)
            writer.add_scalar('loss/test', test_loss, epoch)
            writer.add_scalar('accuracy/test', test_acc, epoch)
            
            cur_log.push(train_loss, train_acc, val_loss, val_acc, test_loss, test_acc)
    model = model.cpu()
    return cur_log


def evaluate(model, device=DEVICE, loader=None, comment='val', epoch_id=None):
    model = model.to(device)
    ret = one_epoch(model, loader, 'test', device, epoch_id)
    loss = ret['loss']
    accuracy = ret['accuracy']

    print(f"{comment} loss: {loss}")
    print(f"{comment} accuracy: {accuracy}")
    return loss, accuracy

## Load Data

In [5]:
train_transform = transforms.Compose([
            transforms.Resize((96, 96)),
            transforms.RandomPosterize(2, p = 0.25),
            transforms.ColorJitter(brightness = (0.50, 1.00)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

test_transform = transforms.Compose([
            transforms.Resize((96, 96)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
DATA_DIR="/kaggle/input/housex-spectrograms/melspecgrams_data"

train_set = MelSpectrogramDataset(*get_tensors(train_transform, DATA_DIR, mode='train'))
val_set = MelSpectrogramDataset(*get_tensors(test_transform, DATA_DIR, mode='val'))
test_set = MelSpectrogramDataset(*get_tensors(test_transform, DATA_DIR, mode='test'))

train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, BATCH_SIZE, shuffle=True)

print('dataset length:', len(train_set), len(val_set), len(test_set))
print('dataloader length:', len(train_loader), len(val_loader), len(test_loader))


dataset length: 3440 537 337
dataloader length: 430 68 43


## Load Resnet101 model

In [7]:
BASE_MODEL_PATH = "/kaggle/input/spectrogrand-size-reduction/resnet_finetuned_full.pth"

base_model = torch.load(BASE_MODEL_PATH)
base_model.to(DEVICE)
base_model.eval()

Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0)

## Benchmark performance of the base model

In [8]:
with torch.no_grad():
    loss, accuracy = evaluate(base_model, device=DEVICE, loader=val_loader, comment='val', epoch_id=0)
    print('='*90)
    loss, accuracy = evaluate(base_model, device=DEVICE, loader=test_loader, comment='test', epoch_id=0)

Epoch 1 test: 100%|██████████| 68/68 [00:01<00:00, 50.45it/s]


val loss: 1.3441875016600107
val accuracy: 0.7523277467411545


Epoch 1 test: 100%|██████████| 43/43 [00:00<00:00, 65.90it/s]

test loss: 1.5759864330622042
test accuracy: 0.7566765578635015





## Post-training dynamic quantisation

In [12]:
base_model.to("cpu")
model_int8 = torch.ao.quantization.quantize_dynamic(
    base_model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights
model_int8.eval()
model_int8.to("cpu")

Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0)

In [13]:
with torch.no_grad():
    loss, accuracy = evaluate(model_int8, device="cpu", loader=val_loader, comment='val', epoch_id=0)
    print('='*90)
    loss, accuracy = evaluate(model_int8, device="cpu", loader=test_loader, comment='test', epoch_id=0)

Epoch 1 test: 100%|██████████| 68/68 [00:15<00:00,  4.46it/s]


val loss: 1.3355378502130728
val accuracy: 0.750465549348231


Epoch 1 test: 100%|██████████| 43/43 [00:09<00:00,  4.63it/s]

test loss: 1.5678785251545568
test accuracy: 0.7566765578635015





In [14]:
with torch.no_grad():
    loss, accuracy = evaluate(base_model, device="cpu", loader=val_loader, comment='val', epoch_id=0)
    print('='*90)
    loss, accuracy = evaluate(base_model, device="cpu", loader=test_loader, comment='test', epoch_id=0)

Epoch 1 test: 100%|██████████| 68/68 [00:14<00:00,  4.57it/s]


val loss: 1.337478140642976
val accuracy: 0.7523277467411545


Epoch 1 test: 100%|██████████| 43/43 [00:09<00:00,  4.37it/s]

test loss: 1.570918446272709
test accuracy: 0.7566765578635015





## Print model sizes for the base model and the quantised model

In [16]:
# Ref: https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html
def print_size_of_model(model, label=""):
    torch.save(model.state_dict(), "temp.p")
    size=os.path.getsize("temp.p")
    print("model: ",label,' \t','Size (KB):', size/1e3)
    os.remove('temp.p')
    return size

In [18]:
_ = print_size_of_model(base_model, "base_model")

model:  base_model  	 Size (KB): 178798.05


In [19]:
_ = print_size_of_model(model_int8, "dynamic_quantisation")

model:  dynamic_quantisation  	 Size (KB): 172643.534


In [20]:
print(f"Percentage of model size: {172643.534/178798.05}")

Percentage of model size: 0.9655783941715249
