In [None]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Flatten
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from keras.regularizers import l2

# Set the path to the directory containing the sound files
sound_dir = "/content/drive/MyDrive/Window Breaking/Window Breaking"

# Define the number of mel frequency bins in the spectrogram
n_mels = 128

# Define the number of time steps in each segment of the spectrogram
n_steps = 128

# Define the batch size and number of epochs for training
batch_size = 32
epochs = 10


# Define a function to convert an audio file to a mel-spectrogram
def file_to_melspec(filepath):
    # Load the audio file and convert to mel-spectrogram
    signal, sr = librosa.load(filepath, sr=22050)
    spec = librosa.feature.melspectrogram(signal, sr=sr, n_mels=n_mels)
    # Resize the spectrogram to n_steps x n_mels
    spec = librosa.util.fix_length(spec, n_steps, axis=1)
    # Convert to decibel scale
    spec = librosa.power_to_db(spec, ref=np.max)
    return spec

# Load the sound files and labels into memory
sound_files = []
labels = []
for label in os.listdir(sound_dir):
    label_dir = os.path.join(sound_dir, label)
    for filename in os.listdir(label_dir):
        filepath = os.path.join(label_dir, filename)
        sound_files.append(filepath)
        labels.append(label)

cnt = 0
# Convert the sound files to mel-spectrograms and store in a numpy array
specs = np.zeros((len(sound_files), n_mels, n_steps), dtype=np.float32)
for i, filepath in enumerate(sound_files):
    spec = file_to_melspec(filepath)
    specs[i] = spec
    print(cnt,end = " ")
    cnt +=1

# Convert the labels to one-hot encoded vectors
label_map = {label: i for i, label in enumerate(set(labels))}
labels = [label_map[label] for label in labels]
labels = np.eye(len(label_map))[labels]

# Split the data into training and validation sets
train_specs, val_specs, train_labels, val_labels = train_test_split(specs, labels, test_size=0.2)

# Define the RNN model architecture
model = Sequential()
model.add(LSTM(128, input_shape=(n_mels, n_steps), return_sequences=True))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(len(label_map), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

# Train the model on the training data and evaluate on the validation data
model.fit(train_specs, train_labels, batch_size=batch_size, epochs=epochs, validation_data=(val_specs, val_labels))


In [None]:
model.save('glass_sound_detection.h5')

In [None]:
# Define a function to load an audio file and convert it to a mel-spectrogram
def audio_file_to_melspec(audio_file):
    # Load the audio file and convert to mel-spectrogram
    signal, sr = librosa.load(audio_file, sr=22050)
    spec = librosa.feature.melspectrogram(signal, sr=sr, n_mels=n_mels)
    # Resize the spectrogram to n_steps x n_mels
    spec = librosa.util.fix_length(spec, n_steps, axis=1)
    # Convert to decibel scale
    spec = librosa.power_to_db(spec, ref=np.max)
    return spec

# Load an audio file and convert it to a mel-spectrogram
audio_file = '/content/sample_data/0_170.wav'
spec = audio_file_to_melspec(audio_file)

# Reshape the mel-spectrogram to match the input shape of the model
spec = spec.reshape(1, n_mels, n_steps)

# Make a prediction using the trained model
prediction = model.predict(spec)

print(prediction.tolist())

# Print the predicted class label
predicted_label = np.argmax(prediction)
label_map = {i: label for label, i in label_map.items()}
if predicted_label in label_map.keys():
    print('Predicted class label:', label_map[predicted_label])
else:
    print('Unknown label')



[[0.9998306035995483, 0.00016933951701503247]]
Predicted class label: not glass sounds


In [None]:
label_map

{0: 'not glass sounds', 1: 'glass sounds'}

In [None]:
np.argmax(prediction)

0

In [None]:
import numpy as np
import librosa
import sounddevice as sd


# Set the path to the trained model
model_path = '/content/glass_sound_detection.h5'

# Set the number of mel frequency bins in the spectrogram
n_mels = 128

# Set the number of time steps in each segment of the spectrogram
n_steps = 128

# Load the trained model
model = load_model(model_path)

# Define a function to convert an audio array to a mel-spectrogram
def array_to_melspec(audio):
    # Convert to mel-spectrogram
    spec = librosa.feature.melspectrogram(audio, sr=22050, n_mels=n_mels)
    # Resize the spectrogram to n_steps x n_mels
    spec = librosa.util.fix_length(spec, n_steps, axis=1)
    # Convert to decibel scale
    spec = librosa.power_to_db(spec, ref=np.max)
    return spec

# Define a function to record audio from the microphone
def record(duration):
    # Set the sample rate and number of channels
    sr = 22050
    channels = 1
    # Record the audio
    audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels)
    sd.wait()
    # Convert to mono if necessary
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)
    return audio

# Record 3 seconds of audio from the microphone
duration = 3
print('Recording...')
audio = record(duration)

# Convert the audio to a mel-spectrogram and make a prediction
spec = array_to_melspec(audio)
spec = np.expand_dims(spec, axis=0)
prediction = model.predict(spec)
label_map = {0: 'glass_breaking', 1: 'gunshot'}
predicted_label = np.argmax(prediction)
print('Predicted class label:', label_map[predicted_label])


In [None]:
import keras
keras.__version__

'2.11.0'

In [None]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Flatten
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from keras.regularizers import l2

# Set the path to the directory containing the sound files
sound_dir = "/content/drive/MyDrive/Window Breaking/Window Breaking"

# Define the number of mel frequency bins in the spectrogram
n_mels = 128

# Define the number of time steps in each segment of the spectrogram
n_steps = 128

# Define the batch size and number of epochs for training
batch_size = 32
epochs = 10


# Define a function to convert an audio file to a mel-spectrogram
def file_to_melspec(filepath):
    # Load the audio file and convert to mel-spectrogram
    signal, sr = librosa.load(filepath, sr=22050)
    spec = librosa.feature.melspectrogram(signal, sr=sr, n_mels=n_mels)
    # Resize the spectrogram to n_steps x n_mels
    spec = librosa.util.fix_length(spec, n_steps, axis=1)
    # Convert to decibel scale
    spec = librosa.power_to_db(spec, ref=np.max)
    return spec

# Load the sound files and labels into memory
sound_files = []
labels = []
for label in os.listdir(sound_dir):
    label_dir = os.path.join(sound_dir, label)
    for filename in os.listdir(label_dir):
        filepath = os.path.join(label_dir, filename)
        sound_files.append(filepath)
        labels.append(label)

cnt = 0
# Convert the sound files to mel-spectrograms and store in a numpy array
specs = np.zeros((len(sound_files), n_mels, n_steps), dtype=np.float32)
for i, filepath in enumerate(sound_files):
    spec = file_to_melspec(filepath)
    specs[i] = spec
    print(cnt,end = " ")
    cnt +=1

# Convert the labels to one-hot encoded vectors
label_map = {label: i for i, label in enumerate(set(labels))}
labels = [label_map[label] for label in labels]
labels = np.eye(len(label_map))[labels]

# Split the data into training and validation sets
train_specs, val_specs, train_labels, val_labels = train_test_split(specs, labels, test_size=0.2)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(n_mels, n_steps, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(label_map), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

# Train the model on the training data and evaluate on the validation data
model.fit(train_specs, train_labels, batch_size=batch_size, epochs=epochs, validation_data=(val_specs, val_labels))
model.save('gsd_cnn.h5')


In [None]:
from keras.callbacks import EarlyStopping

# Define the CNN model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(n_mels, n_steps, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(len(label_map), activation='softmax'))

# Compile the model with categorical cross-entropy loss and evaluation metrics
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define the EarlyStopping callback
early_stop = EarlyStopping(monitor='val_accuracy', min_delta=0.01, patience=5, mode='max')

# Train the model on the training data and evaluate on the validation data
model.fit(train_specs, train_labels, batch_size=batch_size, epochs=epochs, validation_data=(val_specs, val_labels), callbacks=[early_stop])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


<keras.callbacks.History at 0x7f462a440190>

In [None]:
from keras.models import load_model
# Define a function to load an audio file and convert it to a mel-spectrogram
def audio_file_to_melspec(audio_file):
    # Load the audio file and convert to mel-spectrogram
    signal, sr = librosa.load(audio_file, sr=22050)
    spec = librosa.feature.melspectrogram(signal, sr=sr, n_mels=n_mels)
    # Resize the spectrogram to n_steps x n_mels
    spec = librosa.util.fix_length(spec, n_steps, axis=1)
    # Convert to decibel scale
    spec = librosa.power_to_db(spec, ref=np.max)
    return spec

# Load an audio file and convert it to a mel-spectrogram
audio_file = '/content/sample_data/segment_625.mp3'
spec = audio_file_to_melspec(audio_file)

# Reshape the mel-spectrogram to match the input shape of the model
spec = spec.reshape(1, n_mels, n_steps)

# Make a prediction using the trained model
model_path = "/content/glass_sound_detection.h5"
model = load_model(model_path)
prediction = model.predict(spec)

print(prediction.tolist())

# Print the predicted class label
predicted_label = np.argmax(prediction)
label_map = {i: label for label, i in label_map.items()}
if predicted_label in label_map.keys():
    print('Predicted class label:', label_map[predicted_label])
else:
    print('Unknown label')





[[1.6284376513908683e-08, 1.0]]
Predicted class label: glass sounds
