# Imports

In [1]:
import keras
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.io import wavfile

from sklearn.metrics import accuracy_score, hamming_loss, zero_one_loss, auc

from bokeh.plotting import figure, show
from bokeh.io import output_notebook

from sklearn.metrics import roc_curve, auc, roc_auc_score
import matplotlib.pyplot as plt
import random

import librosa
import pickle as pk

from keras.layers import Bidirectional, Activation, Dense, Input, Dropout, LSTM, Flatten, Input, MaxPool1D, Conv1D
from keras.losses import binary_crossentropy
from keras.optimizers import SGD
from keras.utils import Sequence
import keras.backend as K
import tensorflow as tf

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


###### Parameters

In [2]:
#Hardware Parameters
n_gpus = 4

#Mel parameters
sr = 22050
n_sample_fft = 2048 
hop_length = 512

#Training Parameters
batch_size = 32
max_epochs = 200
max_trainings = 5
kernel_initializer = 'glorot_uniform'#'he_uniform'

if batch_size % n_gpus != 0:
    print("Batch size should be dividibile per n_gpus")

# SGD parameters
starting_learning_rate = 0.01
momentum = 0.9
global_decay = 0.2
local_decay = 1e-6

# EarlyStopping Parameters
min_improvement = 0
patience = 10

# Paths
dataset_dir = '../data/MagnaTagATune/MEL_LSTM/'
annotations_path = '../data/MagnaTagATune/annotation_reduced_50.csv'

checkpoint_dir = './checkpoints_MEL_LSTM/'
checkpoint_file_name = 'weights-{epoch:03d}-{val_loss:.5f}.hdf5'
log_dir ='./logs_MEL_LSTM/'

# Functions

###### Data reading during training

In [3]:
class MagnaTagATuneSequence(Sequence):

    def __init__(self, train_set_paths, train_set_labels, batch_size):
        self.paths, self.y = train_set_paths, train_set_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x_paths = self.paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = []
        for value in batch_x_paths:
            path = dataset_dir + value[:-3]+'p'
            S = pk.load(open(path,'rb'))
            batch_x.append(S.T)
        batch_x = np.array(batch_x)[:,:,:]        
        return (batch_x,batch_y)    

###### Performance Metrics (not used anymore)

In [4]:
def ratio_wrong_over_correct_ones(y_true, y_pred):
    op1 = K.sum(K.abs(K.cast(y_true - K.round(y_pred), dtype='float32')))
    op2 = K.sum(K.cast(K.equal(y_true,1.0),dtype='float32'))
    return op1/op2

def ratio_correct_ones(y_true, y_pred):
    op1 = K.sum(K.cast(K.equal(y_true + K.round(y_pred),2.0),dtype='float32'))
    op2 = K.sum(K.cast(K.equal(y_true,1.0),dtype='float32'))
    return op1/op2

def auc_roc(y_true, y_pred):
    # any tensorflow metric
    value, update_op = tf.metrics.auc(y_true, y_pred, summation_method='careful_interpolation')

    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        value = tf.identity(value)
        return value


###### Best checkpoint selection

In [5]:
def find_best_checkpoint(prev_chkpts):
    best_ratio = np.inf
    best_chkpt = ''
    best_epoch = 0
    for chkpt in prev_chkpts:
        epoch = int(chkpt[8:11])
        ratio = float(chkpt[12:19])
        
        if ratio < best_ratio:
            best_ratio = ratio
            best_chkpt = chkpt
            best_epoch = epoch
    print('\n starting from model {} \n'.format(best_chkpt))
    return best_chkpt, best_epoch

##### Align dataset split to batch size

In [6]:
def align_split(split, batch_size, num_songs):
    num_songs_split = split*num_songs
    return int(num_songs_split - num_songs_split%batch_size)/num_songs

# Preparation

###### Prepare Dataset

In [7]:
to_drop = ['0/american_bach_soloists-j_s__bach__cantatas_volume_v-02-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_ii_recitative__gleichwie_der_regen_und_schnee-30-59.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-30-59.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-59-88.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-88-117.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-146-175.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-175-204.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-204-233.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-233-262.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-262-291.mp3',
          '0/american_bach_soloists-j_s__bach__cantatas_volume_v-03-gleichwie_der_regen_und_schnee_vom_himmel_fallt_bwv_18_iii_recitative_and_litany__mein_gott_hier_wird_mein_herze_sein-291-320.mp3',
          '6/norine_braun-now_and_zen-08-gently-117-146.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-0-29.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-0-29.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-117-146.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-175-204.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-204-233.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-233-262.mp3',
          '2/ensemble_sreteniye___three_holies_church_choristers-dont_cry_rachael-09-who_is_the__great_lord__a_byzantine_tradition_b_bulgarian_tradition_c_russian_tradition-262-291.mp3',
          '8/jacob_heringman-josquin_des_prez_lute_settings-19-gintzler__pater_noster-204-233.mp3',
          '9/american_baroque-dances_and_suites_of_rameau_and_couperin-25-le_petit_rien_xiveme_ordre_couperin-88-117.mp3']


In [16]:
annotations = pd.read_csv(annotations_path, sep='\t')

tot_t_size = align_split(0.99, batch_size, len(annotations))
tot_train_set, test_set = train_test_split(annotations, train_size=tot_t_size, random_state=42) 

print("Complete Train set size: {}".format(tot_train_set.shape[0]))
print("Test set size: {} \n".format(test_set.shape[0]))

t_size = align_split(0.99, batch_size, tot_train_set.shape[0])
train_set, val_set = train_test_split(tot_train_set, train_size=t_size, random_state=42) 

print("Train set size: {}".format(train_set.shape[0]))
print("Validation set size: {} \n".format(val_set.shape[0]))

'''annotations = annotations.drop(index = annotations.loc[annotations['mp3_path'].isin(to_drop)].index.reset_index(drop=True)
train_set = annotations.drop(index = train_set.loc[train_set['mp3_path'].isin(to_drop)].index.reset_index(drop=True)
val_set = val_set.drop(index = val_set.loc[val_set['mp3_path'].isin(to_drop)].index.reset_index(drop=True)
test_set = test_set.drop(index = test_set.loc[test_set['mp3_path'].isin(to_drop)].index).reset_index(drop=True)'''

train_set_paths = train_set['mp3_path'].values
train_set_labels = train_set.drop(columns=['mp3_path','Unnamed: 0']).values

y_dimension = train_set_labels.shape[1]
S = pk.load(open(dataset_dir + annotations['mp3_path'][0][:-3]+ 'p','rb'))
x_dimension = S.shape

print("X dimension: {}\nY dimension: {} \n".format(x_dimension, y_dimension))

   
val_set_paths = val_set['mp3_path'].values
val_set_labels = val_set.drop(columns=['mp3_path','Unnamed: 0']).values

Complete Train set size: 25600
Test set size: 260 

Train set size: 25344
Validation set size: 256 

X dimension: (128, 228)
Y dimension: 50 





In [14]:
tot_train_set, test_set = train_test_split(annotations, train_size=tot_t_size, random_state=42) 
tot_train_set.shape[0]-len(annotations)



-260

In [301]:
print('\n* * * Loading Validation Set into Memory * * *\n')

val_set_data = []
for value in tqdm(val_set_paths):
    path = dataset_dir+value[:-3]+'p'
    S = pk.load(open(path,'rb'))
    val_set_data.append(S.T)  
val_set_data = np.array(val_set_data)[:,:,:] 

 18%|█▊        | 462/2592 [00:00<00:00, 4613.36it/s]


* * * Loading Validation Set into Memory * * *



100%|██████████| 2592/2592 [00:00<00:00, 4134.09it/s]


In [302]:
#pick up random song in training
#np.random.seed(0)
random_song = np.random.randint(0,train_set.shape[0],)
song_path = train_set.iloc[random_song]['mp3_path']
print('Tot train set shape(df): {}'.format(train_set.shape))
print('Tot train set paths: {}'.format(train_set_paths.shape))
print('Song from train set (df): {}'.format(train_set.iloc[random_song]['mp3_path']))
print('Song from train set paths: {}'.format(train_set_paths[random_song]))

labels_from_annotation = annotations.loc[annotations['mp3_path'] == song_path]
print(labels_from_annotation.values[0][1:-1])

print(train_set_labels[random_song])

Tot train set shape(df): (10336, 52)
Tot train set paths: (10336,)
Song from train set (df): 9/sitar-cd1_the_sowebo_concert-02-raga_maru_bihag_gatjay_kishor-146-175.mp3
Song from train set paths: 9/sitar-cd1_the_sowebo_concert-02-raga_maru_bihag_gatjay_kishor-146-175.mp3
[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]


###### Modify session

In [303]:
K.clear_session()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
keras.backend.set_session(session)

######  Building Model

In [304]:
n_filters = 100

model = keras.Sequential()
model.add(Bidirectional(LSTM(n_filters, dropout=0.2, return_sequences=True, kernel_regularizer=keras.regularizers.l2(0.01)), input_shape=(x_dimension[1],x_dimension[0])))
model.add(Bidirectional(LSTM(n_filters, dropout=0.2, return_sequences=True, kernel_regularizer=keras.regularizers.l2(0.001))))
model.add(Conv1D(filters=50,kernel_size=3,strides=1,activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)))
model.add(Conv1D(filters=50,kernel_size=3,strides=1,activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)))
model.add(MaxPool1D(2))
model.add(Flatten())
model.add(Dropout(0.3))
model.add(Dense(units=y_dimension, activation='sigmoid'))

In [305]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 228, 200)          183200    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 226, 50)           30050     
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 224, 50)           7550      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 112, 50)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 5600)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5600)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 50)                280050    
Total para

###### Callbacks definition

In [306]:
class MyCallBack(keras.callbacks.Callback):
    def __init__(self, callbacks, model, is_tb=False):
            super().__init__()
            self.callback = callbacks
            self.is_tb = is_tb
            if not self.is_tb:
                self.model = model
                self.model_original = model

    def on_epoch_begin(self,epoch,logs=None):
            if not self.is_tb:
                self.model = self.model_original
            self.callback.on_epoch_begin(epoch, logs=logs)

    def on_epoch_end(self,epoch,logs=None):
            if not self.is_tb:
                self.model = self.model_original
            else:
                y_pred = self.model.predict(self.validation_data[0])
                auc_skl = roc_auc_score(self.validation_data[1], y_pred)
                print('\nSKLearn validation auc: {}'.format(auc_skl))
            self.callback.on_epoch_end(epoch, logs=logs)

    def on_batch_end(self, batch, logs=None):
            if not self.is_tb:
                self.model = self.model_original
            self.callback.on_batch_end(batch, logs=logs)

    def on_batch_begin(self, batch, logs=None):
            if not self.is_tb:
                self.model = self.model_original
            self.callback.on_batch_begin(batch, logs=logs)
            
    def on_train_begin(self, logs=None):
            if not self.is_tb:
                self.model = self.model_original
            self.callback.set_model(self.model)
            self.callback.on_train_begin(logs=logs)


cbk_tb = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, batch_size=batch_size, write_graph=True,
                                         write_grads=False, write_images=False, embeddings_freq=0,
                                         embeddings_layer_names=None, embeddings_metadata=None)

cbk_es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min',
                                          min_delta=min_improvement, patience=patience, verbose=1)

cbk_mc = keras.callbacks.ModelCheckpoint(monitor='val_loss', mode='min', save_best_only=True, 
                                            filepath=checkpoint_dir+checkpoint_file_name, 
                                            verbose=1)

cbk = MyCallBack(cbk_tb, model, is_tb=True)
cbk2 = MyCallBack(cbk_mc, model)

callbacks = [cbk,cbk_es,cbk2]

### Training

In [307]:
initial_epoch = 0
training_nr = 0

parallel_model = keras.utils.multi_gpu_model(model, gpus=n_gpus)

if not os.path.exists(checkpoint_dir):
    os.mkdir(checkpoint_dir)

while (initial_epoch <= max_epochs) and (training_nr <= max_trainings):
    
    best_checkpoint = ''
    best_epoch = 0
    
    previous_checkpoints = os.listdir(checkpoint_dir)
    
    if previous_checkpoints != []:
        best_checkpoint, best_epoch = find_best_checkpoint(previous_checkpoints)
        initial_epoch = best_epoch             
    
    print('\n\n* * * * Starting training {0} from epoch {1} * * * * \n\n'.format(training_nr,  initial_epoch+1))
    
    #update lr
    decay = global_decay ** training_nr
    learning_rate = starting_learning_rate * decay
    
    
    training_nr = training_nr + 1
    
    optimizer = SGD(lr=learning_rate, momentum=momentum, decay=local_decay , nesterov=True)
    
    if len(previous_checkpoints)!=0:
        model.load_weights(checkpoint_dir + best_checkpoint)
        parallel_model = keras.utils.multi_gpu_model(model, gpus=n_gpus)
    
    
    parallel_model.compile(optimizer=optimizer, loss='binary_crossentropy')
    
    parallel_model.fit_generator(MagnaTagATuneSequence(train_set_paths, train_set_labels, batch_size),
                                 validation_data = (val_set_data, val_set_labels),
                                 epochs=max_epochs, callbacks = callbacks, initial_epoch = initial_epoch)



* * * * Starting training 0 from epoch 1 * * * * 


Epoch 1/200

SKLearn validation auc: 0.5347977440061455

Epoch 00001: val_loss improved from inf to 0.35724, saving model to ./checkpoints_MEL_LSTM/weights-001-0.35724.hdf5
Epoch 2/200

SKLearn validation auc: 0.6152819997245511

Epoch 00002: val_loss improved from 0.35724 to 0.33519, saving model to ./checkpoints_MEL_LSTM/weights-002-0.33519.hdf5
Epoch 3/200

SKLearn validation auc: 0.671663428507219

Epoch 00003: val_loss improved from 0.33519 to 0.30995, saving model to ./checkpoints_MEL_LSTM/weights-003-0.30995.hdf5
Epoch 4/200

SKLearn validation auc: 0.6750040002938458

Epoch 00004: val_loss improved from 0.30995 to 0.29222, saving model to ./checkpoints_MEL_LSTM/weights-004-0.29222.hdf5
Epoch 5/200

SKLearn validation auc: 0.6909693672443246

Epoch 00005: val_loss improved from 0.29222 to 0.27651, saving model to ./checkpoints_MEL_LSTM/weights-005-0.27651.hdf5
Epoch 6/200

SKLearn validation auc: 0.6877944128235781

Epoch 000

KeyboardInterrupt: 

###### Prepare Test Set

In [None]:
test_set_paths = test_set['mp3_path'].values
test_set_labels = test_set.drop(columns=['mp3_path','Unnamed: 0']).values
test_set_size = len(test_set_paths)
print("Test set size: {} ".format(test_set_size))

###### Load best Model

In [308]:
previous_checkpoints = os.listdir(checkpoint_dir)
best_checkpoint, best_epoch = find_best_checkpoint(previous_checkpoints)
#model.load_weights(checkpoint_dir + best_checkpoint)
model = keras.models.load_model(checkpoint_dir + best_checkpoint)
parallel_model = keras.utils.multi_gpu_model(model, gpus=n_gpus)


 starting from model weights-021-0.18294.hdf5 





In [310]:
model.get_weights()

[array([[-0.01143363,  0.01398   , -0.01894769, ...,  0.00137403,
         -0.02520425,  0.0145237 ],
        [-0.02781792, -0.00911206,  0.02293602, ...,  0.02248163,
          0.01056745, -0.01991016],
        [-0.02708839,  0.00275579, -0.00786875, ..., -0.01013751,
         -0.02602806,  0.00189592],
        ...,
        [-0.00244332, -0.00722779, -0.00467002, ..., -0.00727308,
         -0.01022487,  0.025536  ],
        [ 0.02427233,  0.01307735, -0.00551412, ...,  0.01467789,
         -0.0290134 , -0.00142406],
        [-0.01105003,  0.01469794, -0.00262264, ...,  0.01614231,
         -0.00745407, -0.02591244]], dtype=float32),
 array([[-0.01493038,  0.00555527,  0.04103458, ...,  0.01212055,
          0.05008824, -0.03967217],
        [ 0.01520943, -0.01771687, -0.01423614, ...,  0.0877684 ,
         -0.03586233, -0.0180759 ],
        [ 0.01192095,  0.03728289, -0.00301965, ..., -0.03486981,
         -0.0083151 , -0.04416079],
        ...,
        [ 0.06255458, -0.05715908,  0.0

###### Prediction and evaluation

In [None]:
predictions = parallel_model.predict_generator(MagnaTagATuneSequence(test_set_paths, test_set_labels, batch_size), verbose=1)
#predictions = parallel_model.predict(test_set_data,batch_size=batch_size,verbose=1)

In [None]:
try:
    roc_auc = roc_auc_score(test_set_labels, predictions)
    print("Test roc auc result: {} ".format(roc_auc))
except Error as e:
    print(e)

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(test_set_labels.shape[1]):
    fpr[i], tpr[i], _ = roc_curve(test_set_labels[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    
    
fpr["micro"], tpr["micro"], _ = roc_curve(test_set_labels.ravel(), predictions.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])



plt.figure()
lw = 3
label = 25
plt.plot(fpr[label], tpr[label], color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[label])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

In [None]:
test = predictions[0]
print("All the same: {}".format(all([all(i) for i in [test-i<np.finfo(np.float32).eps for i in predictions]])))

In [None]:
a=500
print(predictions[a])
print(test_set_labels[a])

In [None]:
import math
weights = np.concatenate([ i.flatten() for i in model.get_weights() ])
print('Are there NaN weights? {}'.format(any([ math.isnan(i) for i in weights])))