In [None]:
import keras
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

import matplotlib.pyplot as plt
import librosa
import librosa.display

from bokeh.plotting import figure, show
from bokeh.io import output_notebook

from keras.layers import Conv2D, MaxPool2D, Activation, Dense, Input, Flatten, BatchNormalization, Dropout
from keras.losses import binary_crossentropy
from keras.optimizers import SGD
from keras.utils import Sequence
import keras.backend as K

from scipy.io import wavfile
from scipy.fftpack import fft

import time

from pylab import rcParams

#%matplotlib inline

In [None]:
checkpoint_dir = './checkpoints_RAW/'

def find_best_checkpoint(prev_chkpts):
    best_ratio = np.inf
    best_chkpt = ''
    best_epoch = 0
    for chkpt in prev_chkpts:
        epoch = int(chkpt[8:11])
        ratio = float(chkpt[12:19])
        
        if ratio < best_ratio:
            best_ratio = ratio
            best_chkpt = chkpt
            best_epoch = epoch
    print('\n starting from model {} \n'.format(best_chkpt))
    return best_chkpt, best_epoch

In [None]:
previous_checkpoints = os.listdir(checkpoint_dir)
best_checkpoint, best_epoch = find_best_checkpoint(previous_checkpoints)
#model.load_weights(checkpoint_dir + best_checkpoint)
model = keras.models.load_model(checkpoint_dir + best_checkpoint)

In [None]:
model.summary()

# this is the placeholder for the input images
input_model = model.input
song_lenght = 465984

In [None]:
def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())

# Gradient ascent

In [None]:
# we only scan through the first 200 filters
# but there are actually 512 of them


annotations = pd.read_csv('../data/MagnaTagATune/annotation_reduced_50.csv', sep='\t')

start_time = time.time()
# we build a loss function that maximizes the activation
# of the nth filter of the layer considered
output = model.output
des = [1.3541062e-03, 7.7507906e-07, 1.6718840e-03, 1.0865202e-04,
    1.1998077e-02, 2.5674235e-03, 2.2245251e-07, 1.4077784e-05,
    4.3773609e-03, 7.8408426e-05, 1.7920894e-03, 1.3590881e-03,
    1.3103581e-04, 1.8276019e-03, 5.2062300e-04, 3.5621008e-05,
    9.0753223e-05, 2.6076185e-04, 5.5475216e-06, 4.7324235e-03,
    1.2859477e-01, 3.7038975e-04, 1.7310167e-02, 5.1931955e-05,
    1.3747557e-04, 1.3612196e-05, 1.1201203e-02, 1.0006372e-04,
    8.4008680e-06, 2.4455197e-01, 2.0188638e-05, 1.1828544e-03,
    2.0245390e-02, 4.9718535e-03, 4.1305888e-02, 1.2869519e-02,
    1.3738136e-03, 5.1308460e-05, 6.5179415e-02, 1.9230554e-01,
    5.4180467e-01, 1.6391259e-02, 2.4036810e-01, 6.3938588e-02,
    2.3827245e-03, 8.5998207e-01, 1.3587170e-04, 4.1804422e-02,
    1.2138104e-03, 2.0679317e-03]
print('Finding input minimizing label: {}\n'.format(des))
desired_output = K.variable(np.array(des))
loss = K.mean(K.pow(output - desired_output,2))

# we compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_model)[0]
# normalization trick: we normalize the gradient
grads = normalize(grads)
# this function returns the loss and grads given the input picture
iterate = K.function([input_model], [loss, grads])
# step size for gradient ascent
step = 1000.

# we start from a gray image with some random noise
input_song_data = np.float32(np.random.randint(-30000,30000, size = (1, song_lenght, 1)))

In [None]:
for i in range(20):
    print(i)
    loss_value, grads_value = iterate([input_song_data])
    print(i)
    input_song_data -= grads_value * step
    print('Current loss value:', loss_value)
end_time = time.time()
wavfile.write('sound.wav', 16000, input_song_data[0,:,0])
print('processed in {}s'.format(end_time - start_time))

In [None]:
input_song_data