In [34]:
import os
import numpy as np
from scipy.io import wavfile
from scipy.signal import lfilter
import random

In [35]:
# Function to apply a comb filter to audio data
def apply_comb_filter(data, fs, w0, gain):
    delay = int(fs / w0)  # Calculate the delay in samples
    feedback_gain = gain  # Feedback gain for the comb filter
    comb_filter = np.zeros(delay + 1)
    comb_filter[0] = 1

    # Apply the comb filter using a difference equation
    filtered_data = lfilter([1], comb_filter, data)
    filtered_data[delay:] -= feedback_gain * filtered_data[:-delay]

    return filtered_data

In [43]:
# Function to process audio files and generate dataset
# def create_dataset(audio_folder, target_folder, param_folder, w_values=[100, 200, 300, 400, 500], g_values=[0.2, 0.4, 0.6, 0.8, 1.0]):
def create_dataset(audio_folder, target_folder, param_folder, n_examples=1):
    # Create target folder if it doesn't exist
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
    if not os.path.exists(param_folder):
        os.makedirs(param_folder)

    # List all audio files in the input folder
    audio_files = os.listdir(audio_folder)

    for audio_file in audio_files:
        # Load the audio file
        audio_path = os.path.join(audio_folder, audio_file)
        fs, audio_data = wavfile.read(audio_path)

        # Apply the comb filter with gain to the audio data
        for i in range(n_examples):
            w = random.randint(100, 1000)
            gain = random.random()
            print(w, gain)
            filtered_data = apply_comb_filter(audio_data, fs, w, gain)

            # Save the filtered data to a new audio file
            target_path = os.path.join(target_folder, f'{audio_file}_{i}.wav')
            # target_path = os.path.join(target_folder, f'{audio_file}')
            wavfile.write(target_path, fs, filtered_data.astype(np.int16))

            # Save the w0 and gain values as targets for the neural network
            param_values = np.array([w, gain])
            param_file = f'{audio_file}_{i}.npy'
            # param_file = f'{audio_file}.npy'
            param_path = os.path.join(param_folder, param_file)
            np.save(param_path, param_values)
        print(f'Processed: {audio_file}')

In [44]:
# Set the paths for the audio folder and target folder
audio_folder = 'data/toy_dataset/input/dry'
target_folder = 'data/toy_dataset/input/wet'
param_folder = 'data/toy_dataset/target'

# Create the dataset
create_dataset(audio_folder, target_folder, param_folder)

202 0.18301063816355378
Processed: balloon_burst_16.wav
419 0.766050154525126
Processed: balloon_burst_17.wav
189 0.045626069526120805
Processed: balloon_burst_29.wav
647 0.9632507996334518
Processed: balloon_burst_15.wav
805 0.8537849320037901
Processed: balloon_burst_14.wav
442 0.8685363927260512
Processed: balloon_burst_28.wav
344 0.40615052377019056
Processed: balloon_burst_10.wav
904 0.05918072695294707
Processed: balloon_burst_38.wav
358 0.059157729222459876
Processed: balloon_burst_39.wav
271 0.875859963432508
Processed: balloon_burst_11.wav
111 0.6788189299524302
Processed: balloon_burst_13.wav
347 0.3542022934491442
Processed: balloon_burst_12.wav
667 0.8258872522321132
Processed: balloon_burst_49.wav
346 0.4279511678354244
Processed: balloon_burst_3.wav
379 0.09239437579771603
Processed: balloon_burst_2.wav
200 0.4777412104790596
Processed: balloon_burst_48.wav
934 0.737407325463368
Processed: balloon_burst_1.wav
941 0.5646852871423073
Processed: balloon_burst_5.wav
151 0.418