In [1]:
import keras
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

import matplotlib.pyplot as plt
import librosa
import librosa.display



from bokeh.plotting import figure, show
from bokeh.io import output_notebook

from keras.layers import Conv2D, MaxPool2D, Activation, Dense, Input, Flatten, BatchNormalization, Dropout
from keras.losses import binary_crossentropy
from keras.optimizers import SGD
from keras.utils import Sequence
import keras.backend as K

from scipy.io import wavfile
from scipy.fftpack import fft

from sklearn.model_selection import train_test_split

import time

from pylab import rcParams

%matplotlib inline

Using TensorFlow backend.


In [2]:
checkpoint_dir = './checkpoints_RAW/'

def find_best_checkpoint(prev_chkpts):
    best_ratio = np.inf
    best_chkpt = ''
    best_epoch = 0
    for chkpt in prev_chkpts:
        epoch = int(chkpt[8:11])
        ratio = float(chkpt[12:19])
        
        if ratio < best_ratio:
            best_ratio = ratio
            best_chkpt = chkpt
            best_epoch = epoch
    print('\n starting from model {} \n'.format(best_chkpt))
    return best_chkpt, best_epoch

In [3]:
previous_checkpoints = os.listdir(checkpoint_dir)
best_checkpoint, best_epoch = find_best_checkpoint(previous_checkpoints)
#model.load_weights(checkpoint_dir + best_checkpoint)
model = keras.models.load_model(checkpoint_dir + best_checkpoint)


 starting from model weights-031-0.13050.hdf5 





In [4]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 155328, 128)       512       
_________________________________________________________________
batch_normalization_1 (Batch (None, 155328, 128)       512       
_________________________________________________________________
activation_1 (Activation)    (None, 155328, 128)       0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 155328, 128)       49280     
_________________________________________________________________
batch_normalization_2 (Batch (None, 155328, 128)       512       
_________________________________________________________________
activation_2 (Activation)    (None, 155328, 128)       0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 51776, 128)        0         
__________

In [5]:
def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())

In [6]:
annotations_path = '../data/MagnaTagATune/annotation_reduced_50.csv'
dataset_dir = '../data/MagnaTagATune/rawwav_2/'

annotations = pd.read_csv(annotations_path, sep='\t')


tot_t_size = 0.866203
tot_train_set, test_set = train_test_split(annotations, train_size=tot_t_size, test_size=(1-tot_t_size), random_state=42) 

print("Complete Train set size: {}".format(tot_train_set.shape[0]))
print("Test set size: {} \n".format(test_set.shape[0]))

t_size = 0.91429
train_set, val_set = train_test_split(tot_train_set, train_size=t_size, test_size=(1-t_size), random_state=42) 

print("Train set size: {}".format(train_set.shape[0]))
print("Validation set size: {} \n".format(val_set.shape[0]))

train_set_paths = train_set['mp3_path'].values
train_set_labels = train_set.drop(columns=['mp3_path','Unnamed: 0']).values

y_dimension = train_set_labels.shape[1]
_, data = wavfile.read( dataset_dir + annotations['mp3_path'][0][:-3]+ 'wav')
x_dimension = len(data)

print("X dimension: {}\nY dimension: {} \n".format(x_dimension, y_dimension))

   
val_set_paths = val_set['mp3_path'].values
val_set_labels = val_set.drop(columns=['mp3_path','Unnamed: 0']).values
    

Complete Train set size: 22400
Test set size: 3460 

Train set size: 20480
Validation set size: 1920 

X dimension: 465984
Y dimension: 50 



In [7]:
class MagnaTagATuneSequence(Sequence):

    def __init__(self, train_set_paths, train_set_labels, batch_size):
        self.paths, self.y = train_set_paths, train_set_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x_paths = self.paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = []
        for value in batch_x_paths:
            path = dataset_dir + value[:-3]+'wav'
            _, data = wavfile.read(path)
            batch_x.append(data)
        batch_x = np.array(batch_x)[:,:,np.newaxis]        
        return (batch_x,batch_y)    

In [10]:
import pickle as pk
y_pred = pk.load(open('y_train_pred_raw.p','rb'))
y_pred.shape

(20480, 50)

In [68]:
# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[0:] if 'activation' in layer.name])
layer_dict

conv_layers = [layer.name for layer in model.layers if 'conv' in layer.name]
conv_layers

output_conv_path = './conv_outputs'.format(labels[label])
if not os.path.exists(output_conv_path):
    os.makedirs(output_conv_path)

labels = annotations.columns.values[1:-1]

In [70]:
for label in range(len(labels)):
    pred_label = y_pred[:,label]
    max_song_for_label = np.argmax(pred_label)
    print('Song with maximum pred for tag: {}, \n\tprob: {}, \n\tsong: {} '.format(labels[label],
                                                                           pred_label[max_song_for_label],
                                                                          train_set_paths[max_song_for_label]))


    base_dir = '../data/MagnaTagATune/rawwav_2/'
    song_path = base_dir + train_set_paths[max_song_for_label][:-3] + 'wav'
    sr, song = wavfile.read(song_path)



    print('Original labels: {}'.format(labels[np.where(train_set_labels[max_song_for_label] == 1)]))


    for layer in conv_layers:
        intermediate_layer_model = keras.Model(inputs=model.input,
                                     outputs=model.get_layer(layer).output)
        outputs_layers[layer] = intermediate_layer_model.predict(song[np.newaxis,:,np.newaxis])


    output_label_path = output_conv_path + '/' + labels[label]
    if not os.path.exists(output_label_path):
        os.makedirs(output_label_path)

    wavfile.write('{}/song.wav'.format(output_label_path) , sr, song)
    for layer in conv_layers:
        output_layer_path = output_label_path + '/' + layer
        if not os.path.exists(output_layer_path):
            os.makedirs(output_layer_path)
        for fil in range(outputs_layers[layer].shape[2]):
            wavfile.write('{}/{}.wav'.format(output_layer_path, fil) , sr, outputs_layers[layer][0,:,fil])

Song with maximum pred for tag: spanish, 
	prob: 0.6763861775398254, 
	song: 6/curandero-curandero-05-corriendo_juntos-204-233.mp3 
Original labels: ['spanish' 'fast' 'guitar']
Song with maximum pred for tag: classical guitar, 
	prob: 0.7240307331085205, 
	song: 6/ed_martin-luis_milan__el_maestro-09-fantasia_9-146-175.mp3 
Original labels: ['classical guitar' 'string' 'slow' 'classical' 'guitar']
Song with maximum pred for tag: chorus, 
	prob: 0.9412891268730164, 
	song: f/kyiv_chamber_choir-masterpieces_of_the_ukrainian_choral_baroque-09-i_have_cried_to_the_lord_with_my_voice-407-436.mp3 
Original labels: ['choir' 'opera' 'classical']
Song with maximum pred for tag: folk, 
	prob: 0.6618261337280273, 
	song: 8/mercy_machine-mercy_machine-09-rachel-175-204.mp3 
Original labels: ['folk' 'female' 'male' 'vocal' 'guitar']
Song with maximum pred for tag: trance, 
	prob: 0.6184954643249512, 
	song: 4/dj_markitos-evolution_of_the_mind-09-starship_earth-204-233.mp3 
Original labels: ['trance' 

Song with maximum pred for tag: rock, 
	prob: 0.9944033622741699, 
	song: c/soulprint-in_spite_of_it_all-02-show_me_fear-175-204.mp3 
Original labels: ['rock']
Song with maximum pred for tag: electro, 
	prob: 0.9706289172172546, 
	song: b/wicked_allstars-dark_clouds-05-new_idm-88-117.mp3 
Original labels: ['synth' 'techno']
Song with maximum pred for tag: vocal, 
	prob: 0.9867423176765442, 
	song: d/kitka-wintersongs-12-bozha_zvezda_bulgarian_macedonia-30-59.mp3 
Original labels: ['foreign' 'india']
Song with maximum pred for tag: string, 
	prob: 0.9207773208618164, 
	song: 9/vito_paternoster-cd2bach_sonatas_and_partitas_for_solo_violin-11-partita_terza_in_la_maggiore__loure-146-175.mp3 
Original labels: ['baroque' 'orchestra' 'cello' 'solo' 'violin' 'string' 'slow' 'classical']
Song with maximum pred for tag: techno, 
	prob: 0.9985270500183105, 
	song: f/memories_of_tomorrow-waiting_for_dawn-09-not_like_this-262-291.mp3 
Original labels: ['fast' 'techno']
Song with maximum pred for ta

In [63]:
outputs_layers['conv1d_1'].shape

(1, 155328, 128)

In [None]:
plt.plot(time_range, input_song_data[0,:,0])
plt.show()

In [None]:
file_dir = './grad_ascent_songs/'
S = []
for file in os.listdir(file_dir):
    sr, data = wavfile.read(file_dir + file)
    S.append(data)
S = np.array(S)

In [None]:
def plot_song(song, sr):
    time_interval = 1/sr
    time_range = (np.arange(0, len(song))*time_interval)
    plt.plot(time_range, song)
    
def plot_fft(song, sr):
    n = len(song) 
    p = fft(song)
    nUniquePts = int(np.ceil((n+1)/2.0))
    p = p[0:nUniquePts]
    p = np.abs(p)

    if n % 2 > 0: # we've got odd number of points fft
        p[1:len(p)] = p[1:len(p)] * 2
    else:
        p[1:len(p) -1] = p[1:len(p) - 1] * 2
    
    freqArray = np.arange(0, nUniquePts, 1.0) * (sr / n);
    plt.plot(freqArray, p, color='k')

In [None]:
plot_song(S[0,:], 16000)

In [None]:
plot_fft(S[49,:], 16000)

In [None]:
plot_song(S[2,:], 16000)

In [None]:
plot_song(S[40,:], 16000)

In [None]:
np.random.randint(-30000,30000, size = (1, song_lenght, 1)).shape

In [None]:
input_song_data = (np.random.randint(-30000,30000, size = (1, song_lenght, 1)))
input_song_data[0,:,0] = np.float(input_song_data[0,:,0])