# Imports

In [None]:
import keras
from keras.utils import Sequence

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os
from scipy.io import wavfile
import pickle as pk
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score
import matplotlib.pyplot as plt

###### Parameters

In [None]:
mod_type = 'lstm' # 'raw' 'mel' 'lstm'
batch_size = 2
n_splits = 4
n_gpus = 1

# Paths
dataset_dir = '../data/MagnaTagATune/rawwav_2/'
checkpoint_dir = './checkpoints_RAW/'

if mod_type == 'mel':
    dataset_dir = '../data/MagnaTagATune/mel_default_hop/'
    checkpoint_dir = './checkpoints_mel_32f_V3/'
    
if mod_type == 'lstm':
        dataset_dir = '../data/MagnaTagATune/MEL_default_hop/'
        checkpoint_dir = './checkpoints_MEL_LSTM_V2.save/'

annotations_path = '../data/MagnaTagATune/annotation_reduced_50.csv'

##### Functions

In [None]:
class MagnaTagATuneSequenceRaw(Sequence):

    def __init__(self, train_set_paths, train_set_labels, batch_size):
        self.paths, self.y = train_set_paths, train_set_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x_paths = self.paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = []
        for value in batch_x_paths:
            path = dataset_dir + value[:-3]+'wav'
            _, data = wavfile.read(path)
            batch_x.append(data)
        batch_x = np.array(batch_x)[:,:,np.newaxis]        
        return (batch_x,batch_y) 
    
class MagnaTagATuneSequenceMEL(Sequence):

    def __init__(self, train_set_paths, train_set_labels, batch_size):
        self.paths, self.y = train_set_paths, train_set_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x_paths = self.paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = []
        for value in batch_x_paths:
            path = dataset_dir + value[:-3]+'p'
            S = pk.load(open(path,'rb'))
            batch_x.append(S)
        batch_x = np.array(batch_x)[:,:,:,np.newaxis]        
        return (batch_x,batch_y)  
    
class MagnaTagATuneSequenceLSTM(Sequence):

    def __init__(self, train_set_paths, train_set_labels, batch_size, n_splits):
        self.paths, self.y = train_set_paths, train_set_labels
        self.batch_size = batch_size
        
        path = dataset_dir + self.paths[0][:-3]+'p'
        S = pk.load(open(path,'rb'))
        timestamps = S.shape[1]
        self.n_splits = n_splits
        self.split_size = int(timestamps/n_splits)
        #print(self.split_size)

    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
   
    def __getitem__(self, idx):
        batch_x_paths = self.paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = []
        for value in batch_x_paths:
            path = dataset_dir + value[:-3]+'p'
            S = pk.load(open(path,'rb'))
            #print(S)
            for split in range(1,(self.n_splits+1)):
                splitmat = S.T[ (split-1)*self.split_size : split*self.split_size]
                batch_x.append(splitmat)
        batch_x = np.array(batch_x)[:,:,:]
        batch_y = np.repeat(batch_y, self.n_splits, axis=0)
        return (batch_x,batch_y)    

In [None]:
def find_best_checkpoint(prev_chkpts):
    best_ratio = np.inf
    best_chkpt = ''
    best_epoch = 0
    for chkpt in prev_chkpts:
        epoch = int(chkpt[8:11])
        ratio = float(chkpt[12:19])
        
        if ratio < best_ratio:
            best_ratio = ratio
            best_chkpt = chkpt
            best_epoch = epoch
    print('\n starting from model {} \n'.format(best_chkpt))
    return best_chkpt, best_epoch

In [None]:
def accuracy_opt_th(y_true, y_pred, th):
    y_pred_th = y_pred >= th
    return accuracy_score(y_true, y_pred_th)
    

# Preparation

###### Dataset

In [None]:
to_drop = ['0/american_bach_soloists-j_s__bach__cantatas_volume_v-02-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_ii_recitative__gleichwie_der_regen_und_schnee-30-59.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-30-59.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-59-88.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-88-117.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-146-175.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-175-204.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-204-233.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-233-262.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-262-291.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-291-320.mp3',
          '6/norine_braun-now_and_zen-08-gently-117-146.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-0-29.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-0-29.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-117-146.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-175-204.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-204-233.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-233-262.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-262-291.mp3',
          '8/jacob_heringman-josquin_des_prez_lute_settings-19-gintzler__pater_noster-204-233.mp3',
          '9/american_baroque-dances_and_suites_of_rameau_and_couperin-25-le_petit_rien_xiveme_ordre_couperin-88-117.mp3']
urzi_pc=False

In [None]:
annotations = pd.read_csv(annotations_path, sep='\t')

tot_t_size = 0.866203
tot_train_set, test_set = train_test_split(annotations, train_size=tot_t_size, test_size=(1-tot_t_size), random_state=42)

t_size = 0.91429
train_set, val_set = train_test_split(tot_train_set, train_size=t_size, test_size=(1-t_size), random_state=42)

if urzi_pc:
    annotations = annotations.drop(index = annotations.loc[annotations['mp3_path'].isin(to_drop)].index)\
                                                                                            .reset_index(drop=True)
    train_set = annotations.drop(index = train_set.loc[train_set['mp3_path'].isin(to_drop)].index)\
                                                                                            .reset_index(drop=True)
    val_set = val_set.drop(index = val_set.loc[val_set['mp3_path'].isin(to_drop)].index)\
                                                                                            .reset_index(drop=True)
    test_set = test_set.drop(index = test_set.loc[test_set['mp3_path'].isin(to_drop)].index)\
                                                                                            .reset_index(drop=True)

data_set_paths = annotations['mp3_path'].values
data_set_labels = annotations.drop(columns=['mp3_path','Unnamed: 0']).values

train_set_paths = train_set['mp3_path'].values
train_set_labels = train_set.drop(columns=['mp3_path','Unnamed: 0']).values

val_set_paths = val_set['mp3_path'].values
val_set_labels = val_set.drop(columns=['mp3_path','Unnamed: 0']).values

test_set_paths = test_set['mp3_path'].values
test_set_labels = test_set.drop(columns=['mp3_path','Unnamed: 0']).values

###### Model

In [None]:
previous_checkpoints = os.listdir(checkpoint_dir)
best_checkpoint, best_epoch = find_best_checkpoint(previous_checkpoints)
model = keras.models.load_model(checkpoint_dir + best_checkpoint)
if n_gpus > 1:
    model = keras.utils.multi_gpu_model(model, gpus=n_gpus)

###### Dataset Portion Choice

In [None]:
evaluation_paths = test_set_paths
evaluation_labels = test_set_labels

###### Prediction Calculation

In [None]:
if mod_type == 'raw':
    predictions = predictions = model.predict_generator(MagnaTagATuneSequenceRaw(evaluation_paths, evaluation_labels, batch_size),
                                                        verbose=1)
if mod_type == 'mel':
    predictions = predictions = model.predict_generator(MagnaTagATuneSequenceMEL(evaluation_paths, evaluation_labels, batch_size),
                                                        verbose=1)
if mod_type == 'lstm':
    predictions = predictions = model.predict_generator(MagnaTagATuneSequenceLSTM(evaluation_paths, evaluation_labels, batch_size, n_splits),
                                                        verbose=1)
    
    predictions_averaged = np.empty((0,50), np.float32)
    for i in range(0,predictions.shape[0], n_splits):
        beg = i
        end = beg + n_splits
        predictions_averaged = np.append(predictions_averaged, np.array([np.mean(predictions[beg:end], axis = 0)]), axis = 0)
    predictions = predictions_averaged

# Evaluation

In [None]:
global_auc = roc_auc_score(evaluation_labels, predictions)
print("Global AUC score is : {}".format(global_auc))

In [None]:
labels = annotations.columns.values[1:51]
fpr = dict()
tpr = dict()
roc_auc = dict()
thresholds = dict()
best_th = dict()
accuracy = []
for i in range(evaluation_labels.shape[1]):
    fpr[i], tpr[i], thresholds[i] = roc_curve(evaluation_labels[:, i], predictions[:, i])
    best_th[i] = (np.argmin(np.abs(tpr[i] - (1-fpr[i]))) , thresholds[i][np.argmin(np.abs(tpr[i] - (1-fpr[i])))])
    roc_auc[i] = auc(fpr[i], tpr[i])
    accuracy = np.append(accuracy,accuracy_opt_th(evaluation_labels[:, i], predictions[:, i], best_th[i][1]))
    
with open('best_th_'+mod_type+'.p', 'wb') as handle:
    pk.dump(best_th, handle)

In [None]:
best_th = pk.load(open('best_th_'+mod_type+'.p','rb'))

In [None]:
best_tpr_per_label = []
for label in range(evaluation_labels.shape[1]):
    best_tpr_per_label = np.append(best_tpr_per_label,tpr[label][best_th[label][0]])
    
print('TPR : {}'.format(np.mean(best_tpr_per_label)))
    
best_fpr_per_label = []
for label in range(evaluation_labels.shape[1]):
    best_fpr_per_label = np.append(best_fpr_per_label,fpr[label][best_th[label][0]])

print('FPR : {}'.format(np.mean(best_fpr_per_label)))

auc_per_label = []
for label in range(evaluation_labels.shape[1]):
    auc_per_label = np.append(auc_per_label,roc_auc[label])

%matplotlib qt

ind = np.arange(50)

fig, ax = plt.subplots()

accs = np.ones(50)*(np.sum(accuracy)/50)

rects1 = ax.bar(ind, accuracy, color='b')
rects2 = ax.plot(accs, color='r')

ax.legend( (rects1[0], rects2[0]), ('Label Accuracy', 'Global Accuracy'))
ax.set_xticks(ind)
ax.set_xticklabels(labels)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')

plt.xticks(rotation='vertical')
plt.show()



fig, ax = plt.subplots()
width = 0.35

rects1 = ax.bar(ind + width,best_tpr_per_label, width, color='g')
rects2 = ax.bar(ind + 2*width, best_fpr_per_label, width, color='r')

ax.legend( (rects1[0], rects2[0]), ('TP Rate', 'FP Rate'))
ax.set_xticks(ind + width / 2)
ax.set_xticklabels(labels)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')

plt.xticks(rotation='vertical')
plt.show()


fix, ax = plt.subplots()


aucs = np.ones(50)*global_auc

rects1 = ax.bar(ind, auc_per_label, color='b')
rects2 = ax.plot(aucs, color='r')

ax.legend( (rects1[0], rects2[0]), ('Label AUC', 'Global AUC'))
ax.set_xticks(ind)
ax.set_xticklabels(labels)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')

plt.xticks(rotation='vertical')
plt.show()

# Predict new song from .wav

In [None]:
sr, song = wavfile.read('../data/MagnaTagATune/songs/back.wav')
predictions = model.predict(song[np.newaxis,:,np.newaxis])

In [None]:
th_predictions=np.zeros(50)
for (key,value) in best_th.items():
    th_predictions[key] = predictions[0][key] > value[1]
th_predictions_idx = np.where(th_predictions==1)
predicted_labels = annotations.columns.values[1:-1][th_predictions_idx]
print(predicted_labels)

# Predict new song mel

In [None]:
song = pk.load(open('../data/MagnaTagATune/MEL_songs/starwars.p','rb'))
predictions = model.predict(song[np.newaxis,:,:, np.newaxis])

In [None]:
th_predictions=np.zeros(50)
for (key,value) in best_th.items():
    th_predictions[key] = predictions[0][key] > value[1]
th_predictions_idx = np.where(th_predictions==1)
predicted_labels = annotations.columns.values[1:-1][th_predictions_idx]
print(predicted_labels)

# Predict new song LSTM

In [None]:
song = pk.load(open('../data/MagnaTagATune/MEL_LSTM_songs/starwars.p','rb'))
split_song =[]
timestamps = song.shape[1]
split_size = int(timestamps/n_splits)
for split in range(1,n_splits+1):
        splitmat = song.T[ (split-1)*split_size : split*split_size]
        split_song.append(splitmat)
split_song= np.array(split_song)[:,:,:]
predictions = model.predict(split_song)
predictions = np.mean(predictions, axis = 0)

In [None]:
th_predictions=np.zeros(50)
for (key,value) in best_th.items():
    th_predictions[key] = predictions[key] > value[1]
th_predictions_idx = np.where(th_predictions==1)
predicted_labels = annotations.columns.values[1:-1][th_predictions_idx]
print(predicted_labels)

# Plot Spectrogram

In [None]:
song = pk.load(open('../data/MagnaTagATune/MEL_LSTM_songs/astronomia.p','rb'))

In [None]:
import librosa.display as disp
%matplotlib qt
disp.specshow(song)