In [1]:
import os
import json

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from app.features.DataMIDI import File

In [2]:
import pandas

dtype = {
    'canonical_composer': 'category',
    'canonical_title': 'object',
    'split': 'category',
    'year': 'int64',
    'midi_filename': 'object',
    'audio_filename': 'object',
    'duration': 'float64'
}
dataset = pandas.read_json('../../data/maestro-v2.0.0/maestro-v2.0.0.json')
dataset = dataset.astype(dtype)

In [19]:
def filesize(filename):
    return os.path.getsize('../../data/maestro-v2.0.0/' + filename)

def read_file(filename):
    with open('../../data/maestro-v2.0.0/' + filename, 'rb') as f:
        return f.read()
    
def bytes_as_array(limit):
    def with_limit(file):
        return np.frombuffer(file[0:limit], dtype='int8') / 255.0
    return with_limit

def code_to_label(code):
    return dataset.canonical_composer.cat.categories[code][0]

testing = pandas.DataFrame(dataset).sample(frac=1).reset_index(drop=True)
testing['midi_filesize'] = testing['midi_filename'].apply(filesize)
testing['midi_file'] = testing['midi_filename'].apply(read_file)
testing['bytes_features'] = testing['midi_file'].apply(bytes_as_array(testing.midi_filesize.min()))
testing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1282 entries, 0 to 1281
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   canonical_composer  1282 non-null   category
 1   canonical_title     1282 non-null   object  
 2   split               1282 non-null   category
 3   year                1282 non-null   int64   
 4   midi_filename       1282 non-null   object  
 5   audio_filename      1282 non-null   object  
 6   duration            1282 non-null   float64 
 7   midi_filesize       1282 non-null   int64   
 8   midi_file           1282 non-null   object  
 9   bytes_features      1282 non-null   object  
dtypes: category(2), float64(1), int64(2), object(5)
memory usage: 85.8+ KB


In [20]:
input_dim = 1
# input_dim = 28

units = 128
output_size = len(testing.canonical_composer.cat.categories)
print(f'output_size: {output_size}')
# output_size = 10

# Build the RNN model
def build_model():
    lstm_layer = keras.layers.LSTM(units, input_shape=(None, input_dim))
    return keras.models.Sequential(
        [
            lstm_layer,
            keras.layers.BatchNormalization(),
            keras.layers.Dense(output_size),
        ]
    )

output_size: 61


In [54]:
train = testing[testing.split == 'train']
test = testing[testing.split == 'test']
x_train = np.vstack(train['bytes_features']).reshape(len(train.index), -1, 1)
y_train = np.asarray(train['canonical_composer'].cat.codes)
x_test = np.vstack(test['bytes_features']).reshape(len(test.index), -1, 1)
y_test = np.asarray(test['canonical_composer'].cat.codes)

print('dataset created:')
print(f'train: {x_train.shape}, {y_train.shape}')
print(f'test:  {x_test.shape}, {y_test.shape}')

model = build_model()
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer="adam",
    metrics=["accuracy"],
)

model.summary()


checkpoint = keras.callbacks.ModelCheckpoint(
    os.path.join(os.getcwd(), 'saved_models', 'best.hdf5'),
    monitor='val_accuracy', verbose=1,
    save_best_only=True, mode='max'
)


model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=16, epochs=20, callbacks=[checkpoint])

dataset created:
train: (967, 3096, 1), (967,)
test:  (178, 3096, 1), (178,)
Model: "sequential_34"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_34 (LSTM)               (None, 128)               66560     
_________________________________________________________________
batch_normalization_34 (Batc (None, 128)               512       
_________________________________________________________________
dense_34 (Dense)             (None, 61)                7869      
Total params: 74,941
Trainable params: 74,685
Non-trainable params: 256
_________________________________________________________________
Epoch 1/20
Epoch 00001: val_accuracy improved from -inf to 0.15169, saving model to C:\Users\Armand\Code\music-neural-network\app\lstm\saved_models\best.hdf5
Epoch 2/20
Epoch 00002: val_accuracy did not improve from 0.15169
Epoch 3/20
Epoch 00003: val_accuracy did not improve from 0.15169
Epoch 4/20

<tensorflow.python.keras.callbacks.History at 0x204c1f0ca08>

In [71]:
idx = np.random.choice(len(x_train))
sample, sample_label = x_train[idx], y_train[idx]

test_model = build_model()
test_model.load_weights(os.path.join(os.getcwd(), 'saved_models', 'best.hdf5'))
result = tf.argmax(test_model.predict_on_batch(tf.expand_dims(sample, 0)), axis=1)
print(f'Predicted result is: {code_to_label(result.numpy())}, target result is: {code_to_label([sample_label])}')

Predicted result is: Johann Sebastian Bach, target result is: Franz Liszt


In [41]:
from sklearn.model_selection import KFold

save_dir = './k_fold_models/'

def get_model_name(k):
    return 'model_' + str(k) + '.h5'

x_data = np.vstack(testing['bytes_features']).reshape(len(testing.index), -1, 1)
y_data = np.asarray(testing['canonical_composer'].cat.codes)

kf = KFold(5)
fold_no = 1

k_fold_model = build_model()
k_fold_model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer="adam",
    metrics=["accuracy"]
)
    
for train_index, test_index in kf.split(x_data):
    x_train, x_test = x_data[train_index], x_data[test_index]
    y_train, y_test = y_data[train_index], y_data[test_index]
    
    checkpoint = keras.callbacks.ModelCheckpoint(save_dir + get_model_name(fold_no),
        monitor='val_accuracy', verbose=1,
        save_best_only=True, mode='max')

    k_fold_model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=4, epochs=10, callbacks=[checkpoint])
    
    fold_no += 1

Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.17510, saving model to ./k_fold_models/model_1.h5
Epoch 2/10
Epoch 00002: val_accuracy did not improve from 0.17510
Epoch 3/10
Epoch 00003: val_accuracy did not improve from 0.17510
Epoch 4/10
Epoch 00004: val_accuracy did not improve from 0.17510
Epoch 5/10
Epoch 00005: val_accuracy did not improve from 0.17510
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.17510
Epoch 7/10
Epoch 00007: val_accuracy did not improve from 0.17510
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.17510
Epoch 9/10
Epoch 00009: val_accuracy did not improve from 0.17510
Epoch 10/10
Epoch 00010: val_accuracy did not improve from 0.17510
Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.05058, saving model to ./k_fold_models/model_2.h5
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.05058 to 0.12451, saving model to ./k_fold_models/model_2.h5
Epoch 3/10
Epoch 00003: val_accuracy did not improve from 0.12451
Epo

Epoch 9/10
Epoch 00009: val_accuracy did not improve from 0.15234
Epoch 10/10
Epoch 00010: val_accuracy did not improve from 0.15234
Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.00781, saving model to ./k_fold_models/model_4.h5
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.00781 to 0.09375, saving model to ./k_fold_models/model_4.h5
Epoch 3/10
Epoch 00003: val_accuracy did not improve from 0.09375
Epoch 4/10
Epoch 00004: val_accuracy did not improve from 0.09375
Epoch 5/10
Epoch 00005: val_accuracy did not improve from 0.09375
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.09375
Epoch 7/10
Epoch 00007: val_accuracy did not improve from 0.09375
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.09375
Epoch 9/10
Epoch 00009: val_accuracy did not improve from 0.09375
Epoch 10/10
Epoch 00010: val_accuracy did not improve from 0.09375
Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.02734, saving model to ./k_fold_models/model_5.h5
Ep

In [53]:
idx = np.random.choice(len(x_train))
sample, sample_label = x_train[idx], y_train[idx]

test_model = build_model()
test_model.set_weights(k_fold_model.get_weights())
result = tf.argmax(test_model.predict_on_batch(tf.expand_dims(sample, 0)), axis=1)
print(f'Predicted result is: {code_to_label(result.numpy())}, target result is: {code_to_label([sample_label])}')

for model_file in os.listdir(os.path.join(os.getcwd(), 'k_fold_models')):
    test_model.load_weights(os.path.join(os.getcwd(), 'k_fold_models', model_file))
    result = tf.argmax(test_model.predict_on_batch(tf.expand_dims(sample, 0)), axis=1)
    print(f'{model_file} predicted result is: {code_to_label(result.numpy())}')

Predicted result is: Pyotr Ilyich Tchaikovsky, target result is: Johann Sebastian Bach
model_1.h5 predicted result is: Frédéric Chopin
model_2.h5 predicted result is: Franz Schubert
model_3.h5 predicted result is: Franz Schubert
model_4.h5 predicted result is: Johann Sebastian Bach
model_5.h5 predicted result is: Johann Sebastian Bach
