In [28]:
import os
import numpy as np
from scipy.io import wavfile
from scipy.signal import lfilter

In [33]:
# Function to apply a comb filter to audio data
def apply_comb_filter(data, fs, w0, gain):
    delay = int(fs / w0)  # Calculate the delay in samples
    feedback_gain = gain  # Feedback gain for the comb filter
    comb_filter = np.zeros(delay + 1)
    comb_filter[0] = 1

    # Apply the comb filter using a difference equation
    filtered_data = lfilter([1], comb_filter, data)
    filtered_data[delay:] -= feedback_gain * filtered_data[:-delay]

    return filtered_data

In [31]:
# Function to process audio files and generate dataset
# def create_dataset(audio_folder, target_folder, param_folder, w_values=[100, 200, 300, 400, 500], g_values=[0.2, 0.4, 0.6, 0.8, 1.0]):
def create_dataset(audio_folder, target_folder, param_folder, n_examples):
    # Create target folder if it doesn't exist
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
    if not os.path.exists(param_folder):
        os.makedirs(param_folder)

    # List all audio files in the input folder
    audio_files = os.listdir(audio_folder)

    for audio_file in audio_files:
        # Load the audio file
        audio_path = os.path.join(audio_folder, audio_file)
        fs, audio_data = wavfile.read(audio_path)

        for i, (w, gain) in enumerate(zip(w_values, g_values)):
            # Apply the comb filter with gain to the audio data
            filtered_data = apply_comb_filter(audio_data, fs, w, gain)

            # Save the filtered data to a new audio file
            # target_path = os.path.join(target_folder, f'{audio_file}_{i}.wav')
            target_path = os.path.join(target_folder, f'{audio_file}')
            wavfile.write(target_path, fs, filtered_data.astype(np.int16))

            # Save the w0 and gain values as targets for the neural network
            param_values = np.array([w, gain])
            param_file = f'{audio_file}_{i}.npy'
            param_file = f'{audio_file}.npy'
            param_path = os.path.join(param_folder, param_file)
            np.save(param_path, param_values)

        print(f'Processed: {audio_file}')

In [32]:
# Set the paths for the audio folder and target folder
audio_folder = 'data/toy_dataset/input/dry'
target_folder = 'data/toy_dataset/input/wet'
param_folder = 'data/toy_dataset/target'

# Create the dataset
create_dataset(audio_folder, target_folder, param_folder)

Processed: balloon_burst_16.wav
Processed: balloon_burst_17.wav
Processed: balloon_burst_29.wav
Processed: balloon_burst_15.wav
Processed: balloon_burst_14.wav
Processed: balloon_burst_28.wav
Processed: balloon_burst_10.wav
Processed: balloon_burst_38.wav
Processed: balloon_burst_39.wav
Processed: balloon_burst_11.wav
Processed: balloon_burst_13.wav
Processed: balloon_burst_12.wav
Processed: balloon_burst_49.wav
Processed: balloon_burst_3.wav
Processed: balloon_burst_2.wav
Processed: balloon_burst_48.wav
Processed: balloon_burst_1.wav
Processed: balloon_burst_5.wav
Processed: balloon_burst_4.wav
Processed: balloon_burst_6.wav
Processed: balloon_burst_7.wav
Processed: balloon_burst_40.wav
Processed: balloon_burst_41.wav
Processed: balloon_burst_43.wav
Processed: balloon_burst_9.wav
Processed: balloon_burst_8.wav
Processed: balloon_burst_42.wav
Processed: balloon_burst_46.wav
Processed: balloon_burst_47.wav
Processed: balloon_burst_45.wav
Processed: balloon_burst_50.wav
Processed: balloo