In [None]:
!conda install -y gdown
#this line is for kaggle

Retrieving notices: ...working... done


In [2]:
!gdown --id 1mOMQcc7CtbNAYQZLd81Nfy_BG9hCK2ui

Downloading...
From (original): https://drive.google.com/uc?id=1mOMQcc7CtbNAYQZLd81Nfy_BG9hCK2ui
From (redirected): https://drive.google.com/uc?id=1mOMQcc7CtbNAYQZLd81Nfy_BG9hCK2ui&confirm=t&uuid=6cb65312-5f53-4617-b06f-2a453c03308d
To: /kaggle/working/IRMAS-TrainingData.zip
100%|██████████████████████████████████████| 3.18G/3.18G [00:36<00:00, 87.3MB/s]


In [None]:
!unzip IRMAS.zip -d /working

In [4]:
import pandas as pd
import numpy as np
import librosa
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from skimage.transform import rescale, resize, downscale_local_mean
from sklearn.metrics import f1_score, roc_curve, precision_score, roc_auc_score, confusion_matrix
import random
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
from torch.nn.parameter import Parameter
from torchvision import models
import torchaudio as ta
import copy
import os

from scipy.special import logit, expit

import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import warnings
warnings.filterwarnings('ignore')

In [None]:
classes_path = "/working/IRMAS/"
classes = os.listdir(classes_path)

train_dict = {'path': [], 'target':[]}

for inst in classes:
    files_path = classes_path + inst + "/"
    if inst[0] != 'R':
        files = os.listdir(files_path)
        train_dict['path'].extend(["/working/IRMAS/" + inst + "/" + file for file in files])
        train_dict['target'].extend([inst] * len(files))
    
df = pd.DataFrame(train_dict)
df.head()

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(train_dict['path'],train_dict['target'], test_size=0.20, random_state=42, shuffle=True)
x_train, x_val, y_train, y_val = train_test_split(x_train,y_train, test_size=0.125, random_state=42, shuffle=True)

test_df = pd.DataFrame({'path': x_test, 'target': y_test})
train_df = pd.DataFrame({'path': x_train, 'target': y_train})
val_df = pd.DataFrame({'path': x_val, 'target': y_val})

In [7]:
SR = 44100

len_audio = 3.0
n_mels = 128
fmin = 20
fmax = 16000
n_fft = 1024
hop_length = 512

device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
    
epochs = 15
batch_size = 32
batch_size_val = 64

In [8]:
INS_CODE = {}
INV_INS_CODE = {}
for i,label in enumerate(sorted(df.target.unique())):
    INS_CODE[label] = i
    INV_INS_CODE[i] = label

In [9]:
def get_audio(path,sr=SR):
    y, sample_rate = librosa.load(path, sr=sr)
    return y

def mono_to_color(X, eps=1e-6):
    X = np.stack([X, X, X], axis=-1)
    
    mean = X.mean()
    std = X.std()
    X = (X - mean) / (std + eps)
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V

def normalize(image, mean=None, std=None):
    image = image / 255.0
    return np.moveaxis(image, 2, 0).astype(np.float32)

def compute_melspec(y):
    melspec = librosa.feature.melspectrogram(
        y=y, sr=SR, n_mels=n_mels, fmin=fmin, fmax=fmax, n_fft=n_fft, hop_length=hop_length
    )

    melspec = librosa.power_to_db(melspec).astype(np.float32)
    return melspec

In [10]:
class MusicDataset(Dataset):
    def __init__(self, data, test=False):
        super(MusicDataset, self).__init__()
        self.data = data
        self.test = test
        self.effective_length = int(SR * len_audio)

    def __len__(self):
        return len(self.data)


    def __getitem__(self, idx: int):
        sample = self.data.loc[idx]

        waveform = get_audio(sample['path'])
    

        if len(waveform) > self.effective_length:
            waveform = waveform[:int(SR * len_audio)]
        else:
            new_wave = np.zeros(self.effective_length, dtype=waveform.dtype)
            start = np.random.randint(self.effective_length - len(waveform))
            new_wave[start:start + len(waveform)] = waveform
            waveform = new_wave.astype(np.float32)
        
        melspec = compute_melspec(waveform)
        image = mono_to_color(melspec)
        image = normalize(image)
        
        targets = INS_CODE[sample['target']]    
        return image, targets


In [15]:
class Net(nn.Module):
    def __init__(self,out_dim):
        super(Net, self).__init__()
        self.out_dim = out_dim
        self.resnet = models.resnet50(weights='DEFAULT')
        self.num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(self.num_features,self.out_dim)

    def forward(self, x):        
        return self.resnet(x)

In [16]:
train_dataset = MusicDataset(pd.DataFrame({'path': x_train, 'target':y_train}))
val_dataset = MusicDataset(pd.DataFrame({'path': x_val, 'target':y_val}))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size_val, shuffle=True)

test_set = MusicDataset(test_df)
test_loader =  DataLoader(test_set, batch_size=1, shuffle=True)

In [27]:
model = Net(11).to(device)

In [29]:
dataloader = {
    'train':train_loader,
    'val':val_loader
}
optimizer = torch.optim.Adam(params_to_update, lr=0.0001)
criterion = nn.CrossEntropyLoss()

In [32]:
def train_model(model, dataloaders, optimizer, criterion=criterion, num_epochs=20):
    start = time.time()
    val_acc_history = []

    best_model_weights = copy.deepcopy(model.state_dict())
    best_acc = 0

    train_loss_history = []
    val_loss_history = []
    unfreezed = False
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-'*10)
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0
            running_corrects = 0
            for x_batch, y_batch in dataloaders[phase]:
                inputs, labels = x_batch, y_batch
                inputs = inputs.to('cuda')
                labels = labels.to('cuda')
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds==labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

            if phase == 'train':
                train_loss_history.append(epoch_loss)
            else:
                val_loss_history.append(epoch_loss)

    time_elapsed = time.time()

    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # Best model weights are loaded here
    model.load_state_dict(best_model_weights)
    return model, val_acc_history, train_loss_history, val_loss_history

In [None]:
model, val_acc_history, train_loss_history, val_loss_history = train_model(model, dataloader, optimizer, criterion, 20)

In [34]:
import numpy as np

def plot_training_history(train_loss_history, val_loss_history):
    plt.figure(figsize=(10, 6))
    plt.plot(train_loss_history, label='Training Loss')
    plt.plot(val_loss_history, label='Validation Loss')

    # Calculate the average losses
    average_train_loss = np.mean(train_loss_history)
    average_val_loss = np.mean(val_loss_history)

    # Display average losses on the plot
    plt.axhline(y=average_train_loss, color='r', linestyle='--', label=f'Average Training Loss: {average_train_loss:.4f}')
    plt.axhline(y=average_val_loss, color='b', linestyle='--', label=f'Average Validation Loss: {average_val_loss:.4f}')

    plt.title('Training and Validation Losses Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
plot_training_history(train_loss_history, val_loss_history)

In [37]:
def test_model(model, dataloader):

    model.eval() 
    total_loss = 0.0
    total_samples = 0
    all_preds = []
    all_labels = []
    incorrect_samples = []
    incorrect_count = 0


    with torch.no_grad():
        for x_batch, y_batch in dataloader:
            inputs, labels = x_batch, y_batch
            inputs = inputs.to('cuda')
            labels = labels.to('cuda')
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    return all_labels, all_preds

In [39]:
all_labels, all_preds = test_model(model, test_loader)

In [None]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
print(classification_report(all_labels, all_preds))

In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(all_labels, all_preds))

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(all_labels, all_preds), display_labels=INS_CODE.keys())
disp.plot()
plt.savefig('confusion_matrix_resnet50nofreeze.png')
plt.show()

In [44]:
from sklearn.metrics import recall_score

In [None]:
print("Precision:", precision_score(all_labels, all_preds, average='macro'))
print("Recall:",recall_score(all_labels, all_preds, average='macro'))
print("F1-score:",f1_score(all_labels, all_preds, average='macro'))