**Piano Recognition in Classical Music Using MLP**  
  
Alina Dima (s3919951)  
Lisa Koopmans (s3933083)  
Júlia Vághy (s3994759)  
Maria Kapusheva (s3946231)  
Group number 13

In [None]:
from google.colab import drive
import numpy as np
from sklearn.datasets import make_classification
from imblearn.under_sampling import RandomUnderSampler
from intervaltree import Interval, IntervalTree
!pip install python_speech_features
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from collections import Counter
import pandas as pd
import seaborn as sn



Collecting python_speech_features
  Downloading https://files.pythonhosted.org/packages/ff/d1/94c59e20a2631985fbd2124c45177abaa9e0a4eee8ba8a305aa26fc02a8e/python_speech_features-0.6.tar.gz
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-cp37-none-any.whl size=5888 sha256=c07a89bb95c41d86bfcc2b1b6852eae4089bfe81574b55a70f68ad1074df3e72
  Stored in directory: /root/.cache/pip/wheels/3c/42/7c/f60e9d1b40015cd69b213ad90f7c18a9264cd745b9888134be
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


**Data Preprocessing**  
The following cell extracts the inputs (u) and the outputs (y).  
u - a 13-dimensional vector of mel-frequency cepstrum coeffcients using 2048 window size  
y - the label of the evaluated interval (1 - piano plays in the interval; 0 - piano does not play in the interval)

In [None]:
train_data = np.load(open('/content/drive/Shareddrives/Neural Networks/musicnet.npz','rb'), encoding = 'latin1', allow_pickle = True) # Change the path accordingly

u = []
y = []
fs = 44100
window_size = 2048
stride = 1024 # represents the distance between the beginning index of consecutive intervals

for key in train_data.keys():
  signal, labels = train_data[key]
  mfcc_feat = mfcc(signal, fs, winlen=window_size/fs, winstep=stride/fs, nfft=2048)
  start_idx = 0
  for i in range(mfcc_feat.shape[0]):
    start_idx += stride
    label = 0
    allYInInterval = sorted(labels[start_idx:start_idx+window_size])
    for j in range(0, len(allYInInterval)):
      (start,end,(instrument,note,measure,beat,note_value)) = allYInInterval[j]
      if(instrument == 1):
        label = 1
        break
    y.append(label)
  u += np.ndarray.tolist(mfcc_feat)

np.save('/content/drive/Shareddrives/Neural Networks/u', u)     # Change paths accordingly
np.save('/content/drive/Shareddrives/Neural Networks/y', y)

KeyboardInterrupt: ignored

In [None]:
u = np.load(open('/content/drive/Shareddrives/Neural Networks/u.npy','rb'), encoding = 'latin1', allow_pickle = True)  # Change paths accordingly
y = np.load(open('/content/drive/Shareddrives/Neural Networks/y.npy','rb'), encoding = 'latin1', allow_pickle = True)

**Training the MLP**

In [None]:
accuracy_per_fold = []
loss_per_fold = []
accuracy_history = []
loss_history = []
AUC_scores = []
y_true = []
y_predictions = []
confusion_matrices = []

# Setting up a k-fold cross validation scheme
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
kf.get_n_splits(u)

# MLP training
fold_number = 1
for train, test in kf.split(u, y):
  print('Fold ', fold_number)
  fold_number += 1
  # Model definition
  model = Sequential()
  model.add(Dense(9, activation='sigmoid', 
                  kernel_initializer='glorot_normal', 
                  input_shape=(13,), 
                  bias_initializer='ones', 
                  kernel_regularizer=regularizers.l2(0.2)))
  model.add(Dense(1, activation='sigmoid'))

  # Model compilation and fit
  optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.95)
  model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy'])

  early_stopping = EarlyStopping(monitor='loss', mode='min', patience=5)
  history = model.fit(u[train], y[train],
              batch_size=32,
              epochs=30,
              verbose=0,
              callbacks=[early_stopping],
              shuffle=True)

  # Evaluate model on validation data
  scores = model.evaluate(u[test], y[test], verbose=0)
  accuracy_per_fold.append(scores[1])
  loss_per_fold.append(scores[0])
  accuracy_history += [history.history['accuracy']]
  loss_history += [history.history['loss']]
    
  y_pred = (model.predict(u[test]) > 0.5).astype("int32")
  confusion_matrices += [confusion_matrix(y[test], y_pred)]

  y_true.append(y[test])
  y_predictions.append(y_pred)

  # Plotting accuracy
  plt.plot(range(1, len(history.history['accuracy'])+1), history.history['accuracy'])
  plt.title('Accuracy History')
  plt.xlabel('Epoch')
  plt.ylabel('Value')
  plt.show()
    
  # Plotting loss
  plt.plot(range(1, len(history.history['loss'])+1), history.history['loss'])
  plt.title('Loss History')
  plt.xlabel('Epoch')
  plt.ylabel('Value')
  plt.show()
    
  print()

Fold  1


KeyboardInterrupt: ignored

In [None]:
# Mean scores
print(f'Mean accuracy of all folds: {np.mean(accuracy_per_fold)} (+- {np.std(accuracy_per_fold)})')
print(f'Mean loss of all folds: {np.mean(loss_per_fold)} (+- {np.std(loss_per_fold)})')

------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Fold 1 - Loss: 0.44489869475364685 - Accuracy: 81.4037024974823%
------------------------------------------------------------------------
> Fold 2 - Loss: 0.4966578185558319 - Accuracy: 77.3828387260437%
------------------------------------------------------------------------
> Fold 3 - Loss: 0.48799434304237366 - Accuracy: 78.54465842247009%
------------------------------------------------------------------------
Average scores for all folds:
> Accuracy: 79.1103998819987 (+- 1.6895529815879358)
> Loss: 0.4765169521172841
------------------------------------------------------------------------


Plotting the PR-curves and computing the f1 score

In [None]:
f_scores = []
# Plotting PR-curves and computing the f1 score per fold.
for i in range(10):
  precision, recall, _ = precision_recall_curve(y_true[i], y_predictions[i])
  plt.plot(recall, precision, lw=1, alpha=0.6, label='PR fold %d (AUC = %0.2f)' % (i+1, average_precision_score(y_true[i], y_predictions[i])))
  f_scores.append(f1_score(y_true[i], y_predictions[i]))

y_true_all_folds = np.concatenate(y_true)
y_predictions_all_folds = np.concatenate(y_predictions)

precision, recall, _ = precision_recall_curve(y_true_all_folds, y_predictions_all_folds)

# Getting the baseline PR-curve
baseline = Counter(y_true_all_folds)
baseline = baseline[1]/(baseline[1] + baseline[0])

# Plotting the PR-curve over all folds and the baseline
plt.axhline(y=baseline, linestyle='dashed', color='grey', label='PR baseline (AUC = %0.2f)'% (baseline))
plt.plot(recall, precision, lw=2, alpha=1, label='PR all folds (AUC = %0.2f)' % (average_precision_score(y_true_all_folds, y_predictions_all_folds)), color='b')
sn.set_style('whitegrid')
plt.title('Precision-Recall Curves')
plt.axis([None, None, 0, 1.1])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend(loc='lower left', prop={"size":8.5})
plt.show()

print(f'Mean f1 score: {np.mean(f_scores)} (+- {np.std(f_scores)})')

Plotting the confusion matrix and getting the FP and FN

In [None]:
# source: https://stackoverflow.com/questions/35572000/how-can-i-plot-a-confusion-matrix

matrix = confusion_matrix(y_true_all_folds, y_predictions_all_folds)

print('FP rate: %.3f' % (matrix[0][1]/(matrix[0][0] + matrix[0][1])))
print('FN rate:  %.3f' % (matrix[1][0]/(matrix[1][0] + matrix[1][1])))

df = pd.DataFrame(matrix, range(2), range(2))
sn.set(font_scale=1.0)
sn.heatmap(df, annot=True, annot_kws={"size":14}, fmt='.8g', xticklabels=['No Piano', 'Piano'], yticklabels=['No Piano', 'Piano'])
plt.ylabel('Actual class', fontsize=14)
plt.xlabel('Predicted class', fontsize=14)
plt.title('Confusion matrix of stratified 10-fold cross-validation', fontsize=15)
plt.show()

Plotting accuracy and loss over all folds and per fold.

In [None]:
for i in range(10):
  plt.plot(loss_history[i], lw=1.5, alpha=1, label='Fold %d' % (i+1))

sn.set_style('whitegrid')
plt.legend(loc='upper right', prop={"size":11})
plt.title('Loss per epoch')
plt.xlabel('epoch')
plt.ylabel('Loss value')
plt.savefig('Loss')
plt.show()

for i in range(10):
  plt.plot(accuracy_history[i], lw=1, alpha=1, label='Fold %d' % (i+1))

sn.set_style('whitegrid')
plt.legend(loc='lower right', prop={"size":11})
plt.title('Accuracy per epoch')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.savefig('Accuracy')
plt.show()