# Creating the dataset for the CNN

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os

def create_spectrogram(audio_path, output_path, sr=22050, n_mels=128, hop_length=512):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=sr)
    
    # Generate the mel spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    
    # Convert to log scale (dB)
    S_dB = librosa.power_to_db(S, ref=np.max)
    
    # Plot and save the spectrogram
    plt.figure(figsize=(3, 3))
    librosa.display.specshow(S_dB, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel')
    plt.axis('off')  # Turn off the axis
    plt.tight_layout()
    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Directory paths
audio_dir = 'path/to/audio/files'
output_dir = 'data/spectrograms'

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process each audio file
for audio_file in os.listdir(audio_dir):
    if audio_file.endswith('.wav'):  # Assuming audio files are in .wav format
        audio_path = os.path.join(audio_dir, audio_file)
        output_path = os.path.join(output_dir, f"{os.path.splitext(audio_file)[0]}.png")
        create_spectrogram(audio_path, output_path)

# Spectrogram based CNN Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')  # 2 classes: proper and improper, so 2 output neurons
])

#Load images and prepare the training and testing data

# Preprocess your data
# Assuming X_train and X_test are your image data and y_train, y_test are your labels
# Ensure X_train and X_test are reshaped to (num_samples, 128, 128, 3) and normalized
X_train = X_train.reshape(-1, 128, 128, 3) / 255.0
X_test = X_test.reshape(-1, 128, 128, 3) / 255.0

# Convert labels to categorical if they are not already
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
