In [1]:
import keras
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.io import wavfile

from sklearn.metrics import accuracy_score, hamming_loss, zero_one_loss, auc

from bokeh.plotting import figure, show
from bokeh.io import output_notebook


Using TensorFlow backend.


In [2]:
from keras.layers import Conv1D, MaxPool1D, Activation, Dense, Input, Flatten, BatchNormalization
from keras.losses import binary_crossentropy
from keras.utils import Sequence
import keras.backend as K
import tensorflow as tf

# Prepare Training set

In [3]:
annotations_path = '../data/MagnaTagATune/annotation_reduced.csv'
annotations = pd.read_csv(annotations_path, sep='\t')
train_set, test_set = train_test_split(annotations['mp3_path'], train_size=0.05, test_size=0.01) 

#train_set= train_set.loc[train_set.str.len()<70]
#test_set= test_set.loc[test_set.str.len()<70]

In [4]:
train_set_paths = train_set.values
train_set_labels = annotations.loc[annotations['mp3_path'].isin(train_set)].drop(columns=['mp3_path','Unnamed: 0']).values
train_set_size = len(train_set_paths)
print("Train set size: {} ".format(train_set_size))

y_dimension = train_set_labels.shape[1]

_, data = wavfile.read( '../data/MagnaTagATune/rawwav/' + annotations['mp3_path'][0][:-3]+ 'wav')
x_dimension = len(data)

print("X dimension: {}\nY dimension: {}".format(x_dimension, y_dimension))

Train set size: 1293 
X dimension: 465984
Y dimension: 40


In [18]:
class MagnaTagATuneSequence(Sequence):

    def __init__(self, train_set_paths, train_set_labels, batch_size):
        self.paths, self.y = train_set_paths, train_set_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x_paths = self.paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = []
        for value in batch_x_paths:
            path = '../data/MagnaTagATune/rawwav/'+value[:-3]+'wav'
            _, data = wavfile.read(path)
            batch_x.append(data)
        batch_x = np.array(batch_x)[:,:,np.newaxis]
        return (batch_x,batch_y)

def hamming_loss(y_true, y_pred):
    op1 =  K.sum(K.cast(K.equal(y_true,K.round(y_pred)),dtype='float32'))
    op2 =  K.sum(K.cast(K.equal(y_true,0),dtype='float32')) + K.sum(K.cast(K.equal(y_true,1),dtype='float32'))
    return op1 / op2

def ratio_wrong_over_correct_ones(y_true, y_pred):
    op1 = K.sum(K.abs(K.cast(y_true - K.round(y_pred), dtype='float32')))
    op2 = K.sum(K.cast(K.equal(y_true,1),dtype='float32'))
    return op1/op2

def ratio_correct_ones(y_true, y_pred):
    op1 = K.sum(K.cast(K.equal(y_true + K.round(y_pred),2),dtype='float32'))
    op2 = K.sum(K.cast(K.equal(y_true,1),dtype='float32'))
    return op1/op2 
    

In [19]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
keras.backend.set_session(session)

model = keras.Sequential()

model.add(Conv1D(filters=10, kernel_size=13, strides=7, input_shape=(x_dimension,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool1D(pool_size=3))

model.add(Conv1D(filters=8, kernel_size=7, strides=3, input_shape=(x_dimension,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool1D(pool_size=3))

model.add(Flatten())
model.add(Dense(units=y_dimension, activation='sigmoid'))

In [20]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 66568, 10)         140       
_________________________________________________________________
batch_normalization_3 (Batch (None, 66568, 10)         40        
_________________________________________________________________
activation_3 (Activation)    (None, 66568, 10)         0         
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 22189, 10)         0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 7395, 8)           568       
_________________________________________________________________
batch_normalization_4 (Batch (None, 7395, 8)           32        
_________________________________________________________________
activation_4 (Activation)    (None, 7395, 8)           0         
__________

In [21]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[ratio_wrong_over_correct_ones, ratio_correct_ones])

In [22]:
batch_size = 32
epochs = 1
callbacks = [keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=batch_size, write_graph=True,
                                         write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, 
                                         embeddings_metadata=None)]


model.fit_generator(MagnaTagATuneSequence(train_set_paths, train_set_labels, batch_size),
                    epochs=epochs, callbacks = callbacks)
#model.fit(x_train,y_train,epochs=10, batch_size=32, callbacks=callbacks)

Epoch 1/1


<keras.callbacks.History at 0x7f4811357668>

# Build test set

In [None]:
test_set_paths = test_set.values
test_set_labels = annotations.loc[annotations['mp3_path'].isin(test_set)].drop(columns=['mp3_path','Unnamed: 0']).values
test_set_size = len(test_set_paths)
print("Test set size: {} ".format(test_set_size))

In [None]:
predictions = model.predict_generator(MagnaTagATuneSequence(train_set_paths, train_set, batch_size), verbose=1)

In [None]:
train_set_labels.s

In [None]:
import keras.backend as K
acc_keras = np.mean(np.equal(train_set_labels, np.round(predictions)))
print("Accuracy computed as Keras: {}".format(acc_keras))

In [None]:
m1 = np.array([[1,0,1],
               [0,1,1],
               [1,0,0]])
m2 = np.array([[1,0,0],
               [1,1,0],
               [1,0,1]])

In [None]:
hamming_loss(m1,m2)

In [None]:
np.sum(np.logical_and(m1==m2, np.equal(m2,1)))/ np.sum(m1)

In [None]:
np.sum(np.logical_and(m1==m2, np.equal(m2,1)))

In [None]:
m1 = tf.Variable([[1,0,1],
               [0,1,1],
               [1,0,0]],dtype='float32')

m2 = tf.Variable([[1,0,1],
               [0,1,1],
               [1,0,1]], dtype='float32')
def sign_accuracy(y_true, y_pred):
    return K.mean(K.greater(y_true * y_pred, 0.), axis=-1)


In [None]:
acc = K.eval(hamming_loss(m1, m2))

In [None]:
acc