In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
from keras.models import model_from_json
from data_prep import one_hot_decode, get_bars_dataset
from tqdm import tqdm

timesteps = 16
cardinality = 131
latent_dim = 64

### Some details

The MDN will predict sequences of bars, and the sequences must be equal. Therefore, some preparation must be done. Some songs in the dataset are long and some songs are short, this calls for some decisions to be made.  
  
Here's the solution:  
An [average song length](url=https://www.statcrunch.com/5.0/viewreport.php?groupid=948&reportid=28647) is 226 seconds.  
  
The [average bpm](url=https://learningmusic.ableton.com/make-beats/tempo-and-genre.html) for a song is 120. 

The [semiquaver length](url=http://bradthemad.org/guitar/tempo_explanation.php) is 15 / bpm.  
  
15 / 120 = 0.125 seconds per semiquaver  
0.125 * 16 notes per vector = 2 seconds per bar   
226 / 2 = 113 bars per song.  
  
### So!

something

In [None]:
for path, dirs, files in os.walk("songs_encoder_inputs"):
    for file in files:
        song_len = len(np.load(path + os.sep + file))
        
        # Removing songs shorter than one bar
        if song_len == 0: 
            os.remove(path + os.sep + file)
            print("Removing empty list.")

### Making sure no huge songs are occupying the data

In [None]:
song_lengths = []
for path, dirs, files in os.walk("songs_encoder_inputs"):
    for file in files:
        song_lengths.append(len(np.load(path + os.sep + file)))
        
sort_songlist = sorted(song_lengths)
print("Lenght of list before checking song lengths: ", len(sort_songlist))

new_list = []    

for i in range(len(sort_songlist)):
    if sort_songlist[i] < 1000:
        new_list.append(sort_songlist[i])
    else:
        print("Song too long. {} bars. Removing.".format(sort_songlist[i]))
        
print("Lenght of list after checking song lengths: ", len(sort_songlist))
print("Remaining number of bars in dataset: ", sum(new_list))
plt.hist(new_list,bins=100) 
plt.title("Histogram of bar lengths.")
plt.ylabel("No. of items")
plt.xlabel("Bar length")
plt.xlim(min(new_list),max(new_list))
plt.show()

## Loading model and weights

In [None]:
# load json and create model
json_file = open('encoder_512_64.json', 'r')
loaded_infenc = json_file.read()
json_file.close()

infenc = model_from_json(loaded_infenc)

# load weights into new model
infenc.load_weights("encoder_512_64.h5")
print("Loaded infenc model and weights from disk")


json_file = open('decoder_512_64.json', 'r')
loaded_infdec = json_file.read()
json_file.close()

infdec = model_from_json(loaded_infdec)

# load weights into new model
infdec.load_weights("decoder_512_64.h5")
print("Loaded infdec model and weights from disk")

### Get no of files

In [None]:
path, dirs, files = next(os.walk("songs_encoder_inputs"))
file_count = len(files)

file_count

### Check the data

In [None]:
# one song file
f = np.load("songs_encoder_inputs/id-0.npy")

print("Shape of first song file: ", f.shape)
print("No. of slices: ", f.shape[0])
print("Timesteps: ", f.shape[1])
print("One encoded note: ", f[0][0])
print("One decoded bar :", one_hot_decode(f[31]))

### Define function to get z-list from song

In [None]:
# generate target given source sequence
def create_z_list(infenc, song, latent_dim):
    z_list = []
    
    for bar in song:
        
        bar = bar.reshape(1, timesteps, cardinality)
        
        # encode
        encoder_output = infenc.predict(bar)

        z = encoder_output[2]
        z = z.reshape(1, 1, latent_dim)
        z_list.append(z)
    
    return z_list

### Do the work

In [None]:
h5f = h5py.File('z_dataset.h5', 'w')

for i in tqdm(range(file_count)):
    
    song_from_file = np.load("songs_encoder_inputs/id-" + str(i) + ".npy")
    song_len = song_from_file.shape[0]
    
    # reshaping to work as input to lstm
    song = np.array(song_from_file).reshape(song_len, timesteps, cardinality)
    
    # predicting list of z's
    z_list = create_z_list(infenc, song, latent_dim)
    
    # appending list of z's to dataset
    h5f.create_dataset('z_list_'+str(counter), data= np.array(z_list))