# LSTM Genre Identification

Take in audio files and feed them through a Long-Short Term Memory Neural Network (CNN)

In [1]:
# Reload modules I'm working on
%load_ext autoreload
%autoreload 2

# IPython specific
%matplotlib inline
import IPython.display
from ipywidgets import interact, interactive, fixed

# required packages
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from tqdm.autonotebook import tqdm
import os
from utils import profile

import copy
from scipy.io import wavfile
import scipy.ndimage

# Deep Learning packages
import tensorflow as tf



ModuleNotFoundError: No module named 'tensorflow'

## Parameters

In [None]:
FFT_SIZE = 2048
MELS = 128
RATE = 22050
EPOCHS = 20
FILES_PATH = "audio_files/genres"
FILES_NUM = 1
MODEL_DIR = "models/saved_models"

## Parameters (cont'd)

In [3]:
GENRES = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
TRAIN_SPLIT = 0.9

TRAIN_BUFFER_SIZE = 1000

tf.random.set_seed(777)
np.random.seed(9)

## Load Training and Test Data

Uses the files in `./${FILE_PATH}` to create test data

In [4]:
print("Loading data...")

extension = '.wav'

transformed_data = []

for label, genre in enumerate(GENRES):
    new_dir = FILES_PATH + '/' + genre
    print("Genre: ", new_dir)

    for n in tqdm(range(FILES_NUM)):
        file_name = f"{genre}.{n:05}{extension}"
        try:
            samples, _ = librosa.load(f"{FILES_PATH}/{genre}/{file_name}", sr=RATE, mono=True)
            # print("Length:", len(samples) / RATE, "seconds, samples:", len(samples))
        except:
            continue
        audio_data = librosa.feature.melspectrogram(np.asfortranarray(samples), sr=RATE, n_fft=FFT_SIZE, n_mels=MELS)
        # audio_data = librosa.power_to_db(audio_data, ref=np.max)
        # print(audio_data.shape)
        transformed_data.append((np.swapaxes(audio_data, 0, 1), label))

Loading data...
Genre:  audio_files/genres/blues


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/classical


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/country


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/disco


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/hiphop


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/jazz


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/metal


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/pop


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/reggae


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Genre:  audio_files/genres/rock


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




## Look at data

In [5]:
print("Number Wav Files Loaded:", len(transformed_data))

# for x in transformed_data:
#     print("label:", GENRES[x[1]], "| data.shape =", x[0].shape)

Number Wav Files Loaded: 10


## Draw a spectrogram

In [6]:
librosa.display.specshow(librosa.power_to_db(transformed_data[0][0], ref=np.max).T, x_axis="time", y_axis="mel", sr=RATE, fmax=8000)

IndexError: list index out of range

### Set up training and test batch

In [44]:
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
# x_train, x_test = x_train / 255.0, x_test / 255.0

from parse_data import create_batch_from_file

# minimum_length = min([x["data"].shape[0] for x in transformed_data])
def min_sample_len(data_list):
    return min([x.shape[0] for x in data_list])

def truncate_samples(data_list):
    min_len = min_sample_len(data_list)
    return list(map(lambda data: data[0:min_len], data_list))

# for data in transformed_data:
#     # data["data"] = data["data"][0:minimum_length]
#     data[0] = data[0][0:minimum_length]

val_indices = np.random.choice(len(transformed_data), int(round(len(transformed_data) * (1.0 - TRAIN_SPLIT))), replace=False)
train_indices = list(filter(lambda ind: ind not in val_indices, range(len(transformed_data))))

# print(test_indices)
# print([transformed_data[x][1] for x in test_indices])

def separate_into_lists(data_list, indices):
    datas = []
    labels = []
    for ind in indices:
        x, y = data_list[ind]
        datas.append(x)
        labels.append(y)
    return datas, labels

def prepare_data(transformed_data, indices):
    data, labels = separate_into_lists(transformed_data, indices)
    data = truncate_samples(data)

    data = np.stack(data, axis=0)
    labels = np.stack(labels, axis=0)
    
    return data.astype('float64'), labels.astype('float64')

x_train, y_train = prepare_data(transformed_data, train_indices)
x_val, y_val = prepare_data(transformed_data, val_indices)

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)

# batch = np.stack(tuple(x[0] for x in transformed_data), axis=0)
# batches = np.split(batch, 10, axis=0)
# len(batches[0])

(900, 1290, 128)
(900,)
(100, 1292, 128)
(100,)


### Build the LSTM

using code in `model/lstm.py`

In [45]:
from models.lstm import MusicGenreModel_v0, MusicGenreModel_v1, MusicGenreModel_v2

model = MusicGenreModel_v2()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

train_loss = tf.keras.metrics.Mean(name="train_loss")
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="train_accuracy")

val_loss = tf.keras.metrics.Mean(name="val_loss")
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="val_accuracy")

test_loss = tf.keras.metrics.Mean(name="test_loss")
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="test_accuracy")

Create instance of model

In [26]:
print("Input Shape:", x_train[0:64].shape)
print("actuals", y_train[0:64])

@profile
def predict():
    return model(x_train[0:64].astype('float64'))

prediction = predict()

tf.argmax(prediction, axis=1)
# loss_value = loss_object(labels, predictions)

Input Shape: (64, 1290, 128)
actuals [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
1.4261858463287354 seconds


<tf.Tensor: id=86863, shape=(64,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 9, 0, 5, 0, 0, 0, 5, 5, 7, 0, 0, 0, 0, 0, 0,
       0, 0, 5, 0, 0, 0, 9, 0, 0, 2, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 2, 0,
       0, 0, 4, 0, 5, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 9, 0],
      dtype=int64)>

### Load Trained Weights

In [47]:
model = tf.saved_model.load(f'{MODEL_DIR}/{model.__class__.__name__}')

## Train

In [48]:
from machine_learning.ml_operations import ml_closure

run_epoch, train_step, val_step = ml_closure(model, loss_object, optimizer, train_loss, train_accuracy, val_loss, val_accuracy)
run_epoch = profile(run_epoch)

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(TRAIN_BUFFER_SIZE).batch(64)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(64)

In [49]:
for epoch in tqdm(range(EPOCHS)):
    run_epoch(train_dataset, val_dataset)

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

StagingError: in converted code:
    relative to C:\Users\Daniel:

    Documents\CS 6220\Project\src\machine_learning\ml_operations.py:34 train_step  *
        grads = tape.gradient(loss_value, model.trainable_variables)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\backprop.py:1014 gradient
        unconnected_gradients=unconnected_gradients)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\imperative_grad.py:76 imperative_grad
        compat.as_str(unconnected_gradients.value))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:738 _backward_function
        return self._rewrite_forward_and_call_backward(call_op, *args)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:661 _rewrite_forward_and_call_backward
        forward_function, backwards_function = self.forward_backward(len(doutputs))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:582 forward_backward
        forward, backward = self._construct_forward_backward(num_doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:629 _construct_forward_backward
        func_graph=backwards_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\framework\func_graph.py:915 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:619 _backprop_function
        src_graph=self._func_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 _GradientsHelper
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:350 _MaybeCompile
        return grad_fn()  # Exit early
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 <lambda>
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:715 _registered_grad_fn
        return self._rewrite_forward_and_call_backward(op, *doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:661 _rewrite_forward_and_call_backward
        forward_function, backwards_function = self.forward_backward(len(doutputs))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:582 forward_backward
        forward, backward = self._construct_forward_backward(num_doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:629 _construct_forward_backward
        func_graph=backwards_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\framework\func_graph.py:915 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:619 _backprop_function
        src_graph=self._func_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 _GradientsHelper
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:350 _MaybeCompile
        return grad_fn()  # Exit early
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 <lambda>
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:715 _registered_grad_fn
        return self._rewrite_forward_and_call_backward(op, *doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:661 _rewrite_forward_and_call_backward
        forward_function, backwards_function = self.forward_backward(len(doutputs))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:582 forward_backward
        forward, backward = self._construct_forward_backward(num_doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:629 _construct_forward_backward
        func_graph=backwards_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\framework\func_graph.py:915 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:619 _backprop_function
        src_graph=self._func_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 _GradientsHelper
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:350 _MaybeCompile
        return grad_fn()  # Exit early
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 <lambda>
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:715 _registered_grad_fn
        return self._rewrite_forward_and_call_backward(op, *doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:661 _rewrite_forward_and_call_backward
        forward_function, backwards_function = self.forward_backward(len(doutputs))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:582 forward_backward
        forward, backward = self._construct_forward_backward(num_doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:629 _construct_forward_backward
        func_graph=backwards_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\framework\func_graph.py:915 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:619 _backprop_function
        src_graph=self._func_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 _GradientsHelper
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:350 _MaybeCompile
        return grad_fn()  # Exit early
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:679 <lambda>
        lambda: grad_fn(op, *out_grads))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:715 _registered_grad_fn
        return self._rewrite_forward_and_call_backward(op, *doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:661 _rewrite_forward_and_call_backward
        forward_function, backwards_function = self.forward_backward(len(doutputs))
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:582 forward_backward
        forward, backward = self._construct_forward_backward(num_doutputs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:629 _construct_forward_backward
        func_graph=backwards_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\framework\func_graph.py:915 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\eager\function.py:619 _backprop_function
        src_graph=self._func_graph)
    Anaconda3\envs\cs6220\lib\site-packages\tensorflow_core\python\ops\gradients_util.py:637 _GradientsHelper
        (op.name, op.type))

    LookupError: No gradient defined for operation 'while' (op type: While)


### Save model

In [16]:
model.summary()

try:
    os.mkdir(f'{MODEL_DIR}/{model.__class__.__name__}')
except FileExistsError:
    pass
tf.saved_model.save(model, f'{MODEL_DIR}/{model.__class__.__name__}')

Model: "music_genre_model_v2_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                multiple                  49408     
_________________________________________________________________
dense_4 (Dense)              multiple                  2080      
_________________________________________________________________
dense_5 (Dense)              multiple                  1056      
_________________________________________________________________
dense_6 (Dense)              multiple                  330       
Total params: 52,874
Trainable params: 52,874
Non-trainable params: 0
_________________________________________________________________
INFO:tensorflow:Assets written to: models/saved_models/MusicGenreModel_v2\assets


### Test Model

In [41]:
test_data = []

for label, genre in enumerate(GENRES):
    new_dir = FILES_PATH + '/' + genre
    print("Genre: ", new_dir)

    for n in tqdm(range(20)):
        file_name = f"{genre}.{n:05}{extension}"
        try:
            samples, _ = librosa.load(f"{FILES_PATH}/{genre}/{file_name}", sr=RATE, mono=True)
            # print("Length:", len(samples) / RATE, "seconds, samples:", len(samples))
        except:
            continue
        audio_data = librosa.feature.melspectrogram(np.asfortranarray(samples), sr=RATE, n_fft=FFT_SIZE, n_mels=MELS)
        test_data.append((np.swapaxes(audio_data, 0, 1), label))

Genre:  audio_files/genres/blues


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/classical


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/country


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/disco


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/hiphop


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/jazz


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/metal


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/pop


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/reggae


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Genre:  audio_files/genres/rock


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




In [42]:
test_indices = list(range(len(test_data)))

x_test, y_test = prepare_data(test_data, test_indices)

print(x_test.shape)
print(y_test.shape)

x_test = tf.slice(x_test, [0, 0, 0], [x_test.shape[0], 1290, x_test.shape[2]])

print(x_test.shape)
print(y_test.shape)

(200, 1292, 128)
(200,)
(200, 1290, 128)
(200,)


In [43]:
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(200).batch(64)

for x, labels in test_dataset:    
    predictions = model(x)
    val_loss_value = loss_object(labels, predictions)

    val_loss(val_loss_value)
    val_accuracy(labels, predictions)

template = "Loss: {}, Accuracy: {}, Validation Loss: {}, Validation Accuracy: {}"
print(template.format(train_loss.result(),
                      train_accuracy.result() * 100,
                      val_loss.result(),
                      val_accuracy.result() * 100))

Loss: 0.0, Accuracy: 0.0, Validation Loss: 1.071116778999567, Validation Accuracy: 68.0
