In [2]:
import IPython.display
import librosa
import librosa.display
import pandas as pd
import os
import struct
import glob
import soundfile as sf
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint 
from datetime import datetime
from sklearn import metrics 
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten,Activation, Dense, Dropout, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [3]:
# Audio files and CSV file containing metadata
file_path = '../input/urbansound8k'
urbansound8k = pd.read_csv('../input/urbansound8k/UrbanSound8K.csv')

In [4]:
pd.set_option('display.max_rows', None)
urbansound8k.head()

In [16]:
# Feature extraction using librosa
def features_extract(file):
    # load the audio file
    audio,sample_rate = librosa.load(file_name,res_type='kaiser_fast')
    
    # extract the features
    feature = librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=40)
    feature_norm = (feature - feature.mean())/feature.std()
    
    # feature scaling
    scaled_feature = np.mean(feature_norm.T,axis=0)
    
    # return the scaled features
    return scaled_feature

# list containg all the features
extracted = []

# for each row in the csv
for index_num,row in tqdm(urbansound8k.iterrows()):
    
    # get the file 
    file_name = os.path.join(os.path.abspath(file_path),'fold'+str(row["fold"])+'/',str(row['slice_file_name']))
    
    # get file label
    final_class_labels = row['class']
    
    # extract feature
    data= features_extract(file_name)
    
    # store it in a list
    extracted.append([data,final_class_labels])

In [17]:
# Creating DataFrame from the extracted features:
df_extracted = pd.DataFrame(extracted,columns=['feature','label'])

# Adding 'fold' column to new DataFrame which contains extracted feature and label
df_extracted['fold'] = urbansound8k['fold']
df_extracted.head()

In [18]:
#Constructing ANN model:
le = LabelEncoder()
y = np.array(df_extracted.label.tolist())

filter_size = 3
y = np_utils.to_categorical(le.fit_transform(y))

num_labels = y.shape[1]

# build model
model = Sequential()
model.add(Dense(512, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(128))
model.add(Dense(num_labels))
model.add(Activation('softmax'))
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'],
)
model.summary()

In [86]:
train_data = df_extracted[(df_extracted["fold"] < 5) | (df_extracted["fold"] == 6)] 
validation_data = df_extracted[(df_extracted["fold"] == 5) | (df_extracted["fold"] >= 7)]
print("Train the model on folds: " + str([i for i in sorted(train_data["fold"].unique())]))
print("Test the model on folds: " + str([i for i in sorted(validation_data["fold"].unique())]))

In [87]:
predicted = []
actual = []

for i in range(1,11):
    validation_data = df_extracted[df_extracted['fold'] == i]
    train_data = df_extracted[df_extracted['fold'] != i]
    
    x = np.array(train_data.feature.tolist())
    y = np.array(train_data.label.tolist())
    
    x_val = np.array(validation_data.feature.tolist())
    y_val = np.array(validation_data.label.tolist())
    
    y = np_utils.to_categorical(le.fit_transform(y))
    y_val = np_utils.to_categorical(le.fit_transform(y_val))
    
    fitting = model.fit(x, y, batch_size=64, epochs=10, validation_data=(x_val, y_val), shuffle=False)
    pred = model.predict(x_val)
    
    predicted.append(pred)
    actual.append(y_val)

In [88]:
acc = []
for i in range(0,10):
    predict_conv = np.argmax(predicted[i],axis=1)
    actual_conv = np.argmax(actual[i],axis=1)
    acc.append(accuracy_score(actual_conv,predict_conv))
print("Accuracy for 10 fold cross validation:",np.mean(acc))


In [89]:
# Plotting ANN Loss
metrics = fitting.history
plt.plot(fitting.epoch, metrics['loss'], metrics['val_loss'])
plt.legend(['train_loss', 'test_loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()

#ANN Accuracy
plt.plot(fitting.history['accuracy'], label='train_accuracy')
plt.plot(fitting.history['val_accuracy'], label='test_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)