In [6]:
import csv
import os
import librosa
import numpy as np
from keras.models import load_model

# Define the mapping from indices to class labels
class_mapping = {0: 'fireworks', 1: 'rain', 2: 'wind'}

# Define the paths to the models
mfcc_model_path = 'my_model-1-ogg-mfcc.h5'
mel_model_path = '1-ogg-mel.h5'

# Load the models
mfcc_model = load_model(mfcc_model_path)
mel_model = load_model(mel_model_path)

# Define the path to the directory containing the clips
clips_dir = '/home/tnguyen/Downloads/s1-1-sec-ogg/'




In [7]:
# Open the CSV file for writing
with open('predictions_1sec_ogg.csv', 'w') as f:
    writer = csv.writer(f)
    
    # Write the header row
    writer.writerow(['clip', 'mfcc_prediction', 'mel_prediction'])

    # Iterate over all clips in the directory
    for filename in os.listdir(clips_dir):
        # Check if the file is a .ogg file
        if filename.endswith('.ogg'):
            # Load the audio file
            audio, sr = librosa.load(os.path.join(clips_dir, filename), res_type='kaiser_fast')

            # Extract the MFCCs
            mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40).T, axis=0)
            
            # Extract the Mel-spectrogram
            mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048, hop_length=1024).T, axis=0)

            # Reshape the features for prediction
            mfccs = mfccs.reshape(1, -1)
            mel = mel.reshape(1, -1)

            # Make predictions
            mfcc_prediction = mfcc_model.predict(mfccs).argmax(axis=-1)
            mel_prediction = mel_model.predict(mel).argmax(axis=-1)
            
            # Convert numerical predictions to class labels
            mfcc_prediction_label = class_mapping[mfcc_prediction[0]]
            mel_prediction_label = class_mapping[mel_prediction[0]]

            # Write the predictions to the CSV file
            writer.writerow([filename, mfcc_prediction_label, mel_prediction_label])













































