<a href="https://colab.research.google.com/github/tuomaseerola/audio/blob/master/audio_corpus_analysis_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Music and Science – Audio Corpus Analysis Tutorial 

[Tuomas Eerola](https://www.durham.ac.uk/staff/tuomas-eerola/), Durham University, Music Department, 2022.

In [None]:
#PROMPT: Press the play button to set up the technical system (import libraries etc.)
import os
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd
from matplotlib import pyplot as plt 
%matplotlib inline
print(librosa.__version__)

Install mirdata

In [None]:
pip install mirdata

# 1. Obtain a dataset

Let's look at the classic genre categorization dataset by Tzanetakis. 

In [None]:
import mirdata
print(mirdata.list_datasets())
gtzan_genre = mirdata.initialize('gtzan_genre')

In [None]:
gtzan_genre.track_ids[999]

In [None]:
gtzan = mirdata.initialize('gtzan_genre', version='mini')
gtzan.download()
len(gtzan.track_ids)


In [None]:
tracks = gtzan.load_tracks()
print(tracks.keys())
ex = tracks[gtzan.track_ids[0]]
print(ex)


In [None]:
from librosa.core import audio
import numpy as np
print(ex)
ex.audio_path

import librosa
import matplotlib.pyplot as plt
import librosa.display
librosa.display.waveshow(ex.audio[0],ex.audio[1])

## Extract features

Let's extract some features and use them to predict genre.

In [None]:
import numpy as np
import librosa
import librosa.display

import pandas as pd
import os
import csv
import sys
import natsort
import warnings
warnings.filterwarnings('ignore')

df = pd.DataFrame(columns = ['genre','rmse', 'spec_cent','spec_bw','rolloff','zcr','mfcc1','mfcc2','mfcc3','mfcc4','mfcc5','mfcc6','mfcc7','mfcc8','mfcc9','mfcc10','mfcc11','mfcc12','mfcc13','mfcc14','mfcc15','mfcc16','mfcc17','mfcc18','mfcc19']) 
for track in tracks:
  print(track)
  ex = tracks[track]
  y, sr = librosa.load(ex.audio_path)
  chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
  rmse = librosa.feature.rms(y=y)
  spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
  spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
  rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
  zcr = librosa.feature.zero_crossing_rate(y)
  mfcc = librosa.feature.mfcc(y=y, sr=sr)
  df.loc[len(df)] = [ex.genre,np.mean(rmse),np.mean(spec_cent),np.mean(spec_bw),np.mean(rolloff),np.mean(zcr),np.mean(mfcc[1]),np.mean(mfcc[2]),np.mean(mfcc[3]),np.mean(mfcc[4]),np.mean(mfcc[5]),np.mean(mfcc[6]),np.mean(mfcc[7]),np.mean(mfcc[8]),np.mean(mfcc[9]),np.mean(mfcc[10]),np.mean(mfcc[11]),np.mean(mfcc[12]),np.mean(mfcc[13]),np.mean(mfcc[14]),np.mean(mfcc[15]),np.mean(mfcc[16]),np.mean(mfcc[17]),np.mean(mfcc[18]),np.mean(mfcc[19])]


## Classify


In [None]:
ex = tracks[track]
y, sr = librosa.load(ex.audio_path)
mfcc = librosa.feature.mfcc(y=y, sr=sr)
len(mfcc)


In [None]:
df.head(3)


In [None]:
from sklearn import preprocessing

X = df.drop('genre', axis = 1)
X = preprocessing.normalize(X)
y = df['genre']
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
test_size = 0.30 # taking 70:30 training and test set
seed = 7  # Random numbmer seeding for reapeatability of the code
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)
NN = KNeighborsClassifier()
NN.fit(X_train,y_train)


In [None]:
y_pred = NN.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_pred,y_test)


In [None]:
metrics.confusion_matrix(y_test, y_pred)

In [None]:
print(("Test accuracy: ", NN.score(X_test, y_test)))
print(("Train accuracy: ",NN.score(X_train, y_train)))

In [None]:
from sklearn.model_selection import cross_val_score
print(X_train.shape[0])
print (int(np.sqrt(X_train.shape[0])))
maxK = int(np.sqrt(X_train.shape[0]))
print(maxK)

In [None]:
# creating odd list of K for KNN
myList = list(range(1,15))
# subsetting just the odd ones
neighbors = list(filter(lambda x: x % 2 != 0, myList))

In [None]:
# empty list that will hold cv scores
cv_scores = []
# perform 10-fold cross validation
for k in neighbors:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=8, scoring='accuracy')
    cv_scores.append(scores.mean())

In [None]:
# changing to misclassification error
misError = [1 - x for x in cv_scores]
print(misError)

In [None]:
optimal_k = neighbors[misError.index(min(misError))]
print("The optimal number of neighbors is %d" % optimal_k)

Visualise

In [None]:
df.head(3)

In [None]:
df.drop('genre', axis=1).plot(kind='box', subplots=True,figsize=(15,15), layout=(5,5), sharex=False, sharey=False)
#plt.savefig('fruits_box')
plt.show()