In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

In [None]:
paths = []
labels = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        print(filename)
        break

In [None]:
paths = []
labels = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-3]
        label = label.split('.')[0]
        labels.append(label.lower())
    if len(paths) == 3000:
        break
print('Dataset is Loaded')

In [None]:
len(paths)

In [None]:
paths[:5]

In [None]:
labels[:10]

In [None]:
## Create a dataframe
df = pd.DataFrame()
df['alarm'] = paths
df['label'] = labels
df.head(10)

In [None]:
df['label'].value_counts()

In [None]:
sns.countplot(df['label'])

In [None]:
def waveplot(data, sr, alarm):
    plt.figure(figsize=(10,4))
    plt.title(alarm, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()
    
def spectogram(data, sr, alarm):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11,4))
    plt.title(alarm, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz', cmap="coolwarm")
    plt.colorbar()

In [None]:
alarm = 'danger'
path = np.array(df['alarm'][df['label']==alarm])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, alarm)
spectogram(data, sampling_rate, alarm)
Audio(path)

In [None]:
alarm = 'fire'
path = np.array(df['alarm'][df['label']==alarm])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, alarm)
spectogram(data, sampling_rate, alarm)
Audio(path)

In [None]:
alarm = 'gas'
path = np.array(df['alarm'][df['label']==alarm])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, alarm)
spectogram(data, sampling_rate, alarm)
Audio(path)

In [None]:
alarm = 'non'
path = np.array(df['alarm'][df['label']==alarm])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, alarm)
spectogram(data, sampling_rate, alarm)
Audio(path)

In [None]:
alarm = 'tsunami'
path = np.array(df['alarm'][df['label']==alarm])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, alarm)
spectogram(data, sampling_rate, alarm)
Audio(path)

<h1><b>Feature Extraction

In [None]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=1, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

In [None]:
extract_mfcc(df['alarm'][0])

In [None]:
X_mfcc = df['alarm'].apply(lambda x: extract_mfcc(x))

In [None]:
X_mfcc

In [None]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

In [None]:
## input split
X = np.expand_dims(X, -1)
X.shape

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])

In [None]:
y = y.toarray()

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.2)

In [None]:
print(X_train.shape,y_train.shape,X_validation.shape,y_validation.shape,X_test.shape,y_test.shape)


<h1><b>Model Creation

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40,1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(5, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# Train the model
# history = model.fit(X, y, validation_split=0.2, epochs=50, batch_size=64)
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

In [None]:
epochs = list(range(30))
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.plot(epochs, acc, label='train accuracy')
plt.plot(epochs, val_acc, label='val accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs, loss, label='train loss')
plt.plot(epochs, val_loss, label='val loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
predict_x=model.predict(X_validation) 
classes_x=np.argmax(predict_x,axis=1)

y_pred = model.predict(X_validation)
y_test_class = np.argmax(y_validation, axis=1)
cf_matrix = confusion_matrix(y_test_class, classes_x)
print (cf_matrix)

In [None]:
#confusion matrix prediction of validation data
ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')

ax.set_title('Confusion Matrix Validation Data\n\n');
ax.set_xlabel('Predicted Condition')
ax.set_ylabel('Actual Condition');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['1','2', '3', '4','5'])
ax.yaxis.set_ticklabels(['1','2', '3', '4','5'])
## Display the visualization of the Confusion Matrix.
plt.show()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test_class, classes_x))