In [10]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram



In [53]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [54]:
def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [55]:
meta_data = pd.read_csv('../mixed/mixed_metadata.csv')

#x = []
#sr = []
audio_class = []
folder_name = '../mixed/'
features, labels = np.empty((0,193)), np.empty(0)

for filename in os.listdir(folder_name):
    if filename != "mixed_metadata.csv":
        file = os.path.join(folder_name, filename)

        
        mfccs, chroma, mel, contrast,tonnetz = extract_feature(file)
        ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        features = np.vstack([features,ext_features])

        temp_index = meta_data[meta_data['slice_file_name'] == filename].index
        audio_class.append(meta_data['classID'][temp_index[0]])


  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):


In [197]:
np.unique(meta_data['class'])

array(['air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
       'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren',
       'street_music'], dtype=object)

In [None]:
pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                   columns=['a', 'b', 'c'])

In [224]:
class_name = ([0 , 'air_conditioner'], [1, 'car_horn'], [2, 'children_playing'], [3, 'dog_bark'], 
                          [4, 'drilling'], [5, 'engine_idling'], [6, 'gun_shot'], [7, 'jackhammer'], 
                          [8, 'siren'], [9, 'street_music'])

In [225]:
class_name

([0, 'air_conditioner'],
 [1, 'car_horn'],
 [2, 'children_playing'],
 [3, 'dog_bark'],
 [4, 'drilling'],
 [5, 'engine_idling'],
 [6, 'gun_shot'],
 [7, 'jackhammer'],
 [8, 'siren'],
 [9, 'street_music'])

In [68]:
from sklearn.externals import joblib
filename = 'features_arrary.sav'
joblib.dump(features, filename)  

['features_arrary.sav']

In [63]:
from sklearn.externals import joblib
filename = 'audio_class.sav'
joblib.dump(audio_class, filename)  

['audio_class.sav']

In [67]:
features = np.array(features)

In [69]:
features.shape

(26196, 193)

In [73]:
labels = one_hot_encode(audio_class)

In [77]:
labels.shape

(26196, 10)

In [99]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing  import StandardScaler

train_x_temp, test_x, train_y_temp, test_y = train_test_split(features, labels, test_size=0.2, random_state=0)
train_x, valid_x, train_y, valid_y = train_test_split(train_x_temp, train_y_temp, test_size=0.2, random_state=0)

sc = StandardScaler()
sc.fit(train_x)


train_x = sc.transform(train_x)
valid_x = sc.transform(valid_x)
test_x = sc.transform(test_x)

In [96]:
train_x.shape, valid_x.shape, test_x.shape

((16764, 193), (4192, 193), (5240, 193))

In [101]:
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import seaborn as sns
from pylab import rcParams
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
import h5py
from keras.models import Model, load_model
from keras.layers import Input, Dense, BatchNormalization, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import regularizers

In [91]:
model=Sequential()

In [92]:
model.add(Dense(units=400,activation='relu',input_dim=193))
model.add(Dropout(0.4))
model.add(Dense(units=500,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(units=400,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(units=10,activation='softmax'))

In [93]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [94]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 400)               77600     
_________________________________________________________________
dropout_1 (Dropout)          (None, 400)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 500)               200500    
_________________________________________________________________
dropout_2 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 400)               200400    
_________________________________________________________________
dropout_3 (Dropout)          (None, 400)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)               

In [None]:
model.fit(x_train,y_train,epochs=30,validation_data=(x_test,y_test),batch_size=50)

In [102]:
nb_epoch = 150
batch_size = 32

checkpointer = ModelCheckpoint(filepath="4_8_NN.h5",
                               verbose=0,
                               save_best_only=True)

earlystopping = EarlyStopping(monitor='val_loss', patience=2, verbose=0) # 'patience' number of not improving epochs

history = model.fit(train_x, train_y,
                    epochs=nb_epoch,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(valid_x, valid_y),
                    verbose=1,
                    callbacks=[checkpointer, #tensorboard, 
                               earlystopping]).history

Train on 16764 samples, validate on 4192 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150


In [103]:
predit_y = model.predict(test_x)

In [104]:
predit_y

array([[3.6654751e-14, 7.8250151e-10, 3.6940939e-08, ..., 8.3407605e-15,
        3.4723479e-07, 9.9999964e-01],
       [4.6948293e-25, 5.3924829e-21, 1.4348701e-19, ..., 1.5812973e-35,
        1.0000000e+00, 1.1813175e-14],
       [8.7416673e-04, 7.6493260e-04, 1.6314220e-02, ..., 2.5681002e-04,
        1.5521420e-02, 8.6121596e-03],
       ...,
       [1.5003501e-15, 1.3204537e-10, 9.9981850e-01, ..., 1.1620793e-19,
        3.0540549e-11, 2.8693546e-06],
       [9.9996924e-01, 2.8379576e-08, 3.1184801e-08, ..., 1.0289728e-10,
        6.4888432e-08, 2.5297359e-07],
       [5.5890232e-17, 2.4550109e-16, 9.9989259e-01, ..., 2.1594463e-21,
        2.6886187e-09, 2.0912441e-09]], dtype=float32)

In [191]:
test_class = []
for i in range(len(test_y)):
    temp = np.where(test_y[i] == 1)
    test_class.append(int(temp[0]))
    
pred_class = []
for i in range(len(test_y)):
    temp_max = max(predit_y[i])
    temp = np.where(predit_y[i] == temp_max)
    pred_class.append(int(temp[0]))

In [227]:
from sklearn.metrics import classification_report

print(classification_report(test_class, pred_class))

              precision    recall  f1-score   support

           0       0.96      0.90      0.93       605
           1       0.95      0.92      0.93       261
           2       0.68      0.87      0.76       617
           3       0.93      0.71      0.80       631
           4       0.95      0.93      0.94       616
           5       0.96      0.94      0.95       589
           6       0.89      0.84      0.86       217
           7       0.97      0.96      0.96       571
           8       0.79      0.92      0.85       546
           9       0.90      0.85      0.87       587

    accuracy                           0.88      5240
   macro avg       0.90      0.88      0.89      5240
weighted avg       0.89      0.88      0.89      5240



In [228]:
class_name

([0, 'air_conditioner'],
 [1, 'car_horn'],
 [2, 'children_playing'],
 [3, 'dog_bark'],
 [4, 'drilling'],
 [5, 'engine_idling'],
 [6, 'gun_shot'],
 [7, 'jackhammer'],
 [8, 'siren'],
 [9, 'street_music'])