In [269]:
import tensorflow as tf
import numpy as np
import os
import librosa

In [235]:
dataset_dir = 'data/toy_dataset'
# dry_audio_dir = os.path.join(dataset_dir, 'input/dry')
wet_audio_dir = os.path.join(dataset_dir, 'input/wet')
target_dir = os.path.join(dataset_dir, 'target')

audio_extension = '.wav'
target_extension = '.npy'

In [244]:
# dry_audio_files = [os.path.join(dry_audio_dir, filename) for filename in os.listdir(dry_audio_dir) if filename.endswith(audio_extension)]
wet_audio_files = [os.path.join(wet_audio_dir, filename) for filename in os.listdir(wet_audio_dir) if filename.endswith(audio_extension)]
target_files = [os.path.join(target_dir, filename) for filename in os.listdir(target_dir) if filename.endswith(target_extension)]

In [245]:
# dry_audio_files.sort()
wet_audio_files.sort()
target_files.sort()

In [246]:
def load_audio(file_path):
    audio = tf.io.read_file(file_path)
    audio = tf.audio.decode_wav(audio, desired_channels=1).audio
    return audio

def load_target(file_path):
    return tf.convert_to_tensor(np.load(file_path), dtype=tf.float32)

In [247]:
# dry_tensors = [load_audio(file) for file in dry_audio_files]
wet_tensors = [load_audio(file) for file in wet_audio_files]
target_tensors = [load_target(file) for file in target_files]

In [248]:
# dataset = tf.data.Dataset.from_tensor_slices((dry_tensors, wet_tensors, target_tensors))
dataset = tf.data.Dataset.from_tensor_slices(( wet_tensors, target_tensors))

In [249]:
dataset

<_TensorSliceDataset element_spec=(TensorSpec(shape=(44100, 1), dtype=tf.float32, name=None), TensorSpec(shape=(2,), dtype=tf.float32, name=None))>

In [250]:
# Sanity check
num_elements = 3
for data in dataset.take(num_elements):
    # dry_audio, wet_audio, target = data
    wet_audio, target = data
    # print("Dry audio shape:", dry_audio.shape)
    print("Wet audio shape:", wet_audio.shape)
    print("Target:", target)
    print()


Wet audio shape: (44100, 1)
Target: tf.Tensor([9.340000e+02 7.374073e-01], shape=(2,), dtype=float32)

Wet audio shape: (44100, 1)
Target: tf.Tensor([344.           0.40615052], shape=(2,), dtype=float32)

Wet audio shape: (44100, 1)
Target: tf.Tensor([271.        0.87586], shape=(2,), dtype=float32)



In [265]:
# Split the dataset into train and test sets
x, y = [], []
for wet, target in dataset:
    x.append(wet)
    y.append(target)

In [276]:
def mel_transform(audio):
  # Compute mel spectrogram using librosa
  mel_ = librosa.feature.melspectrogram(audio, sr=44100)
  # Convert mel spectrograms to logarithmic scale
  mel_ = librosa.power_to_db(mel_, ref=np.max)
  return mel_
@tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])

def tf_mel(input):
  mel = tf.numpy_function(mel_transform, [input], tf.float32)
  return y

In [278]:
x = [tf_mel(tensor) for tensor in x]

 [3.4400000e+02 4.0615052e-01]
 [2.7100000e+02 8.7585998e-01]
 [3.4700000e+02 3.5420230e-01]
 [1.1100000e+02 6.7881894e-01]
 [8.0500000e+02 8.5378492e-01]
 [6.4700000e+02 9.6325082e-01]
 [2.0200000e+02 1.8301064e-01]
 [4.1900000e+02 7.6605016e-01]
 [4.3600000e+02 4.6521688e-01]
 [9.0900000e+02 8.1163192e-01]
 [3.7900000e+02 9.2394374e-02]
 [9.4700000e+02 5.8313626e-01]
 [8.0700000e+02 1.3723014e-02]
 [6.1300000e+02 9.6180387e-02]
 [7.0600000e+02 4.5950210e-01]
 [2.2800000e+02 6.5408722e-02]
 [9.0600000e+02 2.9620076e-02]
 [2.7700000e+02 6.9116455e-01]
 [8.3600000e+02 8.9477527e-01]
 [4.4200000e+02 8.6853641e-01]
 [1.8900000e+02 4.5626070e-02]
 [3.4600000e+02 4.2795116e-01]
 [6.7200000e+02 6.2127841e-01]
 [2.0600000e+02 2.6816574e-01]
 [1.3800000e+02 9.8963487e-01]
 [3.0600000e+02 8.4251177e-01]
 [9.5800000e+02 6.3419014e-01]
 [4.8000000e+02 4.4584569e-01]
 [5.8100000e+02 7.3946506e-01]
 [9.0300000e+02 2.8993592e-01]
 [9.0400000e+02 5.9180725e-02]
 [3.5800000e+02 5.9157729e-02]
 [1.5100

In [279]:
train_x, train_y = x, y
test_x = train_x[int(0.8 * len(train_x)):]
test_y = train_y[int(0.8 * len(train_y)):]
train_x = tf.stack(train_x)
train_y = tf.stack(train_y)

In [280]:
# Verify the shapes of train_x and train_y
print("train_x shape:", train_x.shape)
print("train_y shape:", train_y.shape)

# Reshape train_x if needed to match the model's input shape
train_x = np.reshape(train_x, (train_x.shape[0], -1))
print("Reshaped train_x:", train_x.shape)

# Verify the shapes again
print("Updated train_x shape:", train_x.shape)
print("Updated train_y shape:", train_y.shape)

# Define the model architecture
dim = train_x.shape[1:]
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=dim),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(2)  # Output layer for w parameter
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

train_x shape: (50, 50, 2)
train_y shape: (50, 2)
Reshaped train_x: (50, 100)
Updated train_x shape: (50, 100)
Updated train_y shape: (50, 2)


In [281]:
# Train the model
epochs = 1000
batch_size = 8
model.fit(x=train_x, y=train_y, validation_split=0.2, epochs=epochs, batch_size=batch_size)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E