In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from tqdm.notebook import tqdm

In [3]:
#forming a panda dataframe from the metadata file
data=pd.read_csv("../UrbanSound8K/metadata/UrbanSound8K.csv")

In [4]:
#head of the dataframe
data.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [4]:
#count of datapoints in each of the folders
data["fold"].value_counts()

4     990
5     936
3     925
2     888
1     873
7     838
10    837
6     823
9     816
8     806
Name: fold, dtype: int64

In [5]:
data["class"].value_counts()

jackhammer          1000
children_playing    1000
engine_idling       1000
drilling            1000
air_conditioner     1000
dog_bark            1000
street_music        1000
siren                929
car_horn             429
gun_shot             374
Name: class, dtype: int64

In [6]:
# a look at the class distribution of each fold to see how balanced the dataset is, it looks like the dataset is not perfectly balanced.
appended = []
for i in range(1,11):
    appended.append(data[data.fold == i]['class'].value_counts())
    
class_distribution = pd.DataFrame(appended)
class_distribution = class_distribution.reset_index()
class_distribution['index'] = ["fold"+str(x) for x in range(1,11)]
class_distribution

Unnamed: 0,index,jackhammer,air_conditioner,children_playing,street_music,dog_bark,drilling,engine_idling,siren,car_horn,gun_shot
0,fold1,120,100,100,100,100,100,96,86,36,35
1,fold2,120,100,100,100,100,100,100,91,42,35
2,fold3,120,100,100,100,100,100,107,119,43,36
3,fold4,120,100,100,100,100,100,107,166,59,38
4,fold5,120,100,100,100,100,100,107,71,98,40
5,fold6,68,100,100,100,100,100,107,74,28,46
6,fold7,76,100,100,100,100,100,106,77,28,51
7,fold8,78,100,100,100,100,100,88,80,30,30
8,fold9,82,100,100,100,100,100,89,82,32,31
9,fold10,96,100,100,100,100,100,93,83,33,32


In [5]:
from librosa import display
import librosa

In [9]:
#feature set
#This file is of a dog bark
y,sr=librosa.load("../UrbanSound8K/audio/fold5/100032-3-0-0.wav")
mfccs = librosa.feature.mfcc(y, sr, n_mfcc=40)
melspectrogram =librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000)
chroma_stft=librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40)
chroma_cq =librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40)
chroma_cens =librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40)
melspectrogram.shape,chroma_stft.shape,chroma_cq.shape,chroma_cens.shape,mfccs.shape

((40, 14), (40, 14), (40, 14), (40, 14), (40, 14))

# !!!

In [6]:
#preprocessing using entire feature set
x_train=[]
x_test=[]
y_train=[]
y_test=[]
XX=[]
YY=[]
path="../UrbanSound8K/audio/fold"
for i in tqdm(range(len(data))):
    fold_no=str(data.iloc[i]["fold"])
    file=data.iloc[i]["slice_file_name"]
    label=data.iloc[i]["classID"]
    filename=path+fold_no+"/"+file
    y,sr=librosa.load(filename)
    
    chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0)
    melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0)
    mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0)
    
#     rmse = librosa.feature.rms(y=y); rmse40 = np.tile(rmse,40)[0][:40]
#     spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr) ; spec_cent40=np.tile(spec_cent[0],40)[:40]
#     spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr) ; spec_bw40=np.tile(spec_bw[0],40)[:40]
#     contrast = np.mean(librosa.feature.spectral_contrast(y, sr=sr).T,axis=0); contrast40=np.tile(contrast,40)[:40]
#     flatness = librosa.feature.spectral_flatness(y=y); flatness40=np.tile(flatness[0],40)[:40]
#     rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr); rolloff40=np.tile(rolloff[0],40)[:40]
#     poly = librosa.feature.poly_features(y=y,sr=sr); poly40=np.tile(np.append(poly[0][:20],poly[1][:20]),40)[:40]
#     tonnetz = np.mean(librosa.feature.tonnetz(y=y,sr=sr).T,axis=0); tonnetz40 = np.tile(tonnetz,40)[:40]
#     zcr = librosa.feature.zero_crossing_rate(y=y); zcr40=np.tile(zcr[0][:40],40)[:40]

    features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens
#                                   ,rmse40,spec_cent40,spec_bw40,contrast40,flatness40,rolloff40,poly40,tonnetz40,zcr40)
                                  )
                                 ),(40,5))
#     if(fold_no=='10'):
#         x_test.append(features)
#         y_test.append(label)
#     else:
#         x_train.append(features)
#         y_train.append(label)
    XX.append(features)
    YY.append(label)

HBox(children=(IntProgress(value=0, max=8732), HTML(value='')))






In [7]:
#converting the lists into numpy arrays
XX=np.array(XX)

YY=np.array(YY)

XX.shape,YY.shape

((8732, 40, 5), (8732,))

In [8]:
#reshaping into 2d to save in csv format
XX_2d=np.reshape(XX,(XX.shape[0],XX.shape[1]*XX.shape[2]))
XX_2d.shape

(8732, 200)

In [83]:
np.savetxt("features_5_data.csv",XX_2d,delimiter=",")
np.savetxt("labels_5_data.csv",YY,delimiter=",")

In [None]:
# XXX = genfromtxt('features_5_data.csv', delimiter=',')
# YYY = genfromtxt('labels_5_data.csv', delimiter=',')

In [9]:
for x in range(1,11):
    globals()['XX%s' % x] = XX[data.index[data['fold'] == x].tolist(),:]

In [11]:
XX1.shape,XX8.shape,XX10.shape

((873, 40, 5), (806, 40, 5), (837, 40, 5))

In [10]:
for x in range(1,11):
    globals()['YY%s' % x] = YY[data.index[data['fold'] == x].tolist()]

In [12]:
YY1.shape,YY8.shape,YY10.shape

((873,), (806,), (837,))

In [13]:
from keras.utils.np_utils import to_categorical
yy1 = to_categorical(YY1, num_classes=10)
yy2 = to_categorical(YY2, num_classes=10)
yy3 = to_categorical(YY3, num_classes=10)
yy4 = to_categorical(YY4, num_classes=10)
yy5 = to_categorical(YY5, num_classes=10)
yy6 = to_categorical(YY6, num_classes=10)
yy7 = to_categorical(YY7, num_classes=10)
yy8 = to_categorical(YY8, num_classes=10)
yy9 = to_categorical(YY9, num_classes=10)
yy10 = to_categorical(YY10, num_classes=10)

Using TensorFlow backend.


In [91]:
#reshaping to shape required by CNN
x_train_n1=np.reshape(x_train_n1,(x_train_n1.shape[0], 40,5,1))
x_test_1=np.reshape(x_test_1,(x_test_1.shape[0], 40,5,1))

#shapes
x_train_n1.shape,x_test_1.shape

((7859, 40, 5, 1), (873, 40, 5, 1))

In [19]:
from keras import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout

In [15]:
x_train_n1=np.vstack((XX2,XX3,XX4,XX5,XX6,XX7,XX8,XX9,XX10))
x_test_1=XX1
y_train_n1=np.vstack((yy2,yy3,yy4,yy5,yy6,yy7,yy8,yy9,yy10))
y_test_1=yy1

x_train_n2=np.vstack((XX1,XX3,XX4,XX5,XX6,XX7,XX8,XX9,XX10))
x_test_2=XX2
y_train_n2=np.vstack((yy1,yy3,yy4,yy5,yy6,yy7,yy8,yy9,yy10))
y_test_2=yy2

x_train_n3=np.vstack((XX1,XX2,XX4,XX5,XX6,XX7,XX8,XX9,XX10))
x_test_3=XX3
y_train_n3=np.vstack((yy1,yy2,yy4,yy5,yy6,yy7,yy8,yy9,yy10))
y_test_3=yy3

x_train_n4=np.vstack((XX1,XX2,XX3,XX5,XX6,XX7,XX8,XX9,XX10))
x_test_4=XX4
y_train_n4=np.vstack((yy1,yy2,yy3,yy5,yy6,yy7,yy8,yy9,yy10))
y_test_4=yy4

x_train_n5=np.vstack((XX1,XX2,XX3,XX4,XX6,XX7,XX8,XX9,XX10))
x_test_5=XX5
y_train_n5=np.vstack((yy1,yy2,yy3,yy4,yy6,yy7,yy8,yy9,yy10))
y_test_5=yy5

x_train_n6=np.vstack((XX1,XX2,XX3,XX4,XX5,XX7,XX8,XX9,XX10))
x_test_6=XX6
y_train_n6=np.vstack((yy1,yy2,yy3,yy4,yy5,yy7,yy8,yy9,yy10))
y_test_6=yy6

x_train_n7=np.vstack((XX1,XX2,XX3,XX4,XX5,XX6,XX8,XX9,XX10))
x_test_7=XX7
y_train_n7=np.vstack((yy1,yy2,yy3,yy4,yy5,yy6,yy8,yy9,yy10))
y_test_7=yy7

x_train_n8=np.vstack((XX1,XX2,XX3,XX4,XX5,XX6,XX7,XX9,XX10))
x_test_8=XX8
y_train_n8=np.vstack((yy1,yy2,yy3,yy4,yy5,yy6,yy7,yy9,yy10))
y_test_8=yy8

x_train_n9=np.vstack((XX1,XX2,XX3,XX4,XX5,XX6,XX7,XX8,XX10))
x_test_9=XX9
y_train_n9=np.vstack((yy1,yy2,yy3,yy4,yy5,yy6,yy7,yy8,yy10))
y_test_9=yy9

x_train_n10=np.vstack((XX1,XX2,XX3,XX4,XX5,XX6,XX7,XX8,XX9))
x_test_10=XX10
y_train_n10=np.vstack((yy1,yy2,yy3,yy4,yy5,yy6,yy7,yy8,yy9))
y_test_10=yy10


In [16]:
#reshaping to shape required by CNN
x_train_n1=np.reshape(x_train_n1,(x_train_n1.shape[0], 40,5,1))
x_test_1=np.reshape(x_test_1,(x_test_1.shape[0], 40,5,1))

x_train_n2=np.reshape(x_train_n2,(x_train_n2.shape[0], 40,5,1))
x_test_2=np.reshape(x_test_2,(x_test_2.shape[0], 40,5,1))

x_train_n3=np.reshape(x_train_n3,(x_train_n3.shape[0], 40,5,1))
x_test_3=np.reshape(x_test_3,(x_test_3.shape[0], 40,5,1))

x_train_n4=np.reshape(x_train_n4,(x_train_n4.shape[0], 40,5,1))
x_test_4=np.reshape(x_test_4,(x_test_4.shape[0], 40,5,1))

x_train_n5=np.reshape(x_train_n5,(x_train_n5.shape[0], 40,5,1))
x_test_5=np.reshape(x_test_5,(x_test_5.shape[0], 40,5,1))

x_train_n6=np.reshape(x_train_n6,(x_train_n6.shape[0], 40,5,1))
x_test_6=np.reshape(x_test_6,(x_test_6.shape[0], 40,5,1))

x_train_n7=np.reshape(x_train_n7,(x_train_n7.shape[0], 40,5,1))
x_test_7=np.reshape(x_test_7,(x_test_7.shape[0], 40,5,1))

x_train_n8=np.reshape(x_train_n8,(x_train_n8.shape[0], 40,5,1))
x_test_8=np.reshape(x_test_8,(x_test_8.shape[0], 40,5,1))

x_train_n9=np.reshape(x_train_n9,(x_train_n9.shape[0], 40,5,1))
x_test_9=np.reshape(x_test_9,(x_test_9.shape[0], 40,5,1))

x_train_n10=np.reshape(x_train_n10,(x_train_n10.shape[0], 40,5,1))
x_test_10=np.reshape(x_test_10,(x_test_10.shape[0], 40,5,1))




In [17]:
xtrains=[x_train_n1,x_train_n2,x_train_n3,x_train_n4,x_train_n5,
         x_train_n6,x_train_n7,x_train_n8,x_train_n9,x_train_n10]

xtests=[x_test_1,x_test_2,x_test_3,x_test_4,x_test_5,
         x_test_6,x_test_7,x_test_8,x_test_9,x_test_10]

ytrains=[y_train_n1,y_train_n2,y_train_n3,y_train_n4,y_train_n5,
         y_train_n6,y_train_n7,y_train_n8,y_train_n9,y_train_n10]

ytests=[y_test_1,y_test_2,y_test_3,y_test_4,y_test_5,
         y_test_6,y_test_7,y_test_8,y_test_9,y_test_10]

In [41]:
cvscores=[]
for xn,yn,xt,yt in zip(xtrains,ytrains,xtests,ytests):
  # create model
    model = Sequential()
    model.add(Conv2D(64,kernel_size=5,strides=1,padding="Same",activation="relu",input_shape=(40,5,1)))
    model.add(MaxPooling2D(padding="same"))

    model.add(Conv2D(128,kernel_size=5,strides=1,padding="same",activation="relu"))
    model.add(MaxPooling2D(padding="same"))
    model.add(Dropout(0.4))

    model.add(Flatten())

    model.add(Dense(256,activation="relu"))
    model.add(Dropout(0.4))

    model.add(Dense(512,activation="relu"))
    model.add(Dropout(0.4))
    
#     model.add(Dense(512,activation="relu"))###
#     model.add(Dropout(0.4))###
        
    model.add(Dense(10,activation="softmax"))
    # Compile model
    model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
    # Fit the model
    model.fit(xn,yn,batch_size=50,epochs=30,validation_data=(xt,yt),    verbose=1)    
    # evaluate the model
    train_loss_score=model.evaluate(xn,yn)
    test_loss_score=model.evaluate(xt,yt)
    print("%s: %.2f%%" % (model.metrics_names[1], test_loss_score[1]*100))
    cvscores.append(test_loss_score[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

Train on 7859 samples, validate on 873 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
acc: 56.70%
Train on 7844 samples, validate on 888 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
acc: 60.14%
Train on 7807 samples, validate on 925 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/3

In [42]:
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
# https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/

63.90% (+/- 5.58%)


In [43]:
cvscores

[56.70103092783505,
 60.13513518883301,
 57.62162170539031,
 59.79797974981443,
 66.34615384615384,
 60.87484818182768,
 65.75178997613365,
 65.88089328545792,
 71.81372549019608,
 74.07407407407408]

#### 