In [1]:
import tensorflow as tf
import numpy as np
import os
import librosa

In [2]:
dataset_dir = 'data/comb_dataset'
# dry_audio_dir = os.path.join(dataset_dir, 'input/dry')
wet_audio_dir = os.path.join(dataset_dir, 'input/wet')
target_dir = os.path.join(dataset_dir, 'target')

audio_extension = '.wav'
target_extension = '.npy'

In [3]:
# dry_audio_files = [os.path.join(dry_audio_dir, filename) for filename in os.listdir(dry_audio_dir) if filename.endswith(audio_extension)]
wet_audio_files = [os.path.join(wet_audio_dir, filename) for filename in os.listdir(wet_audio_dir) if filename.endswith(audio_extension)]
target_files = [os.path.join(target_dir, filename) for filename in os.listdir(target_dir) if filename.endswith(target_extension)]

In [4]:
# dry_audio_files.sort()
wet_audio_files.sort()
target_files.sort()

In [5]:
def load_audio(file_path):
    audio = tf.io.read_file(file_path)
    audio = tf.audio.decode_wav(audio, desired_channels=1).audio
    return audio

def load_target(file_path):
    return tf.convert_to_tensor(np.load(file_path), dtype=tf.float32)

In [6]:
# dry_tensors = [load_audio(file) for file in dry_audio_files]
wet_tensors = [load_audio(file) for file in wet_audio_files]
target_tensors = [load_target(file) for file in target_files]

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



In [None]:
tf.data.Dataset.from_tensors()

In [24]:
# dataset = tf.data.Dataset.from_tensor_slices((dry_tensors, wet_tensors, target_tensors))
wet_dataset = tf.data.Dataset.from_tensors(( wet_tensors))
target_dataset = tf.data.Dataset.from_tensors((target_tensors))

In [25]:
wet_dataset

<_TensorDataset element_spec=TensorSpec(shape=(50, 44100, 1), dtype=tf.float32, name=None)>

In [26]:
target_dataset

<_TensorDataset element_spec=TensorSpec(shape=(50, 2), dtype=tf.float32, name=None)>

In [27]:
# Sanity check
num_elements = 3
for data in wet_dataset.take(num_elements):
    # dry_audio, wet_audio, target = data
    wet_audio = data
    # print("Dry audio shape:", dry_audio.shape)
    print("Wet audio shape:", wet_audio.shape)
    # print("Target:", target)
    print()


Wet audio shape: (50, 44100, 1)



In [10]:
# # TODO: maybe different transform, potentially?
# def mel_transform(audio):
#   # Compute mel spectrogram using librosa
#   mel_ = librosa.feature.melspectrogram(audio, sr=44100)
#   # Convert mel spectrograms to logarithmic scale
#   mel_ = librosa.power_to_db(mel_, ref=np.max)
#   return mel_
# # @tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])

# def tf_mel(input):
#   mel = tf.numpy_function(mel_transform, [input], tf.float32)
#   return y

In [11]:
# TODO: maybe different transform, potentially?
def mfcc_transform(audio):
  # Compute mel spectrogram using librosa
  mfccs = librosa.feature.mfcc(y=audio, sr=22050, hop_length=512)
  # Convert mel spectrograms to logarithmic scale
#   mel_ = librosa.power_to_db(mel_, ref=np.max)
  return mfccs
# @tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])

def tf_mfcc(wet, target):
  wet = tf.numpy_function(mfcc_transform, [wet], tf.float32)
  return wet, target

In [42]:
def get_spectrogram(waveform):
  # Convert the waveform to a spectrogram via a STFT.
  spectrogram = tf.signal.stft(
      waveform, frame_length=256, frame_step=128)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension, so that the spectrogram can be used
  # as image-like input data with convolution layers (which expect
  # shape (`batch_size`, `height`, `width`, `channels`).
  # spectrogram = spectrogram[..., tf.newaxis]
  return spectrogram

In [43]:
def make_spec_ds(ds):
  return ds.map(
      map_func=lambda audio: (get_spectrogram(audio)),
      num_parallel_calls=tf.data.AUTOTUNE)

In [44]:
spec_dataset = make_spec_ds(wet_dataset)
spec_dataset

<_ParallelMapDataset element_spec=TensorSpec(shape=(50, 44100, 0, 129), dtype=tf.float32, name=None)>

In [47]:
# Sanity check
num_elements = 3
for data in spec_dataset.take(num_elements):
    # dry_audio, wet_audio, target = data
    wet_audio = data
    # print("Dry audio shape:", dry_audio.shape)
    print("Wet audio shape:", wet_audio.shape)
    # print("Target:", target)
    print()

Wet audio shape: (50, 44100, 0, 129)



In [64]:
x = [spec for spec in spec_dataset]
y = [target for target in target_dataset]

In [None]:
# x = np.reshape(x, (50, 44100*20, -1))

In [65]:
train_x, train_y = x, y
test_x = train_x[int(0.8 * len(train_x)):]
test_y = train_y[int(0.8 * len(train_y)):]
train_x = tf.stack(train_x)
train_y = tf.stack(train_y)

In [54]:
for example_spec in spec_dataset.take(1):
    break

In [58]:
dim = example_spec.shape

In [66]:
# Verify the shapes of train_x and train_y
print("train_x shape:", train_x.shape)
print("train_y shape:", train_y.shape)

# Reshape train_x if needed to match the model's input shape
train_x = np.reshape(train_x, (train_x.shape[0], -1))
print("Reshaped train_x:", train_x.shape)

# Verify the shapes again
print("Updated train_x shape:", train_x.shape)
print("Updated train_y shape:", train_y.shape)

# Define the model architecture
# dim = train_x.shape[1:]
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=dim),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(2)  # Output layer for w parameter
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

train_x shape: (1, 50, 44100, 0, 129)
train_y shape: (1, 50, 2)
Reshaped train_x: (1, 0)
Updated train_x shape: (1, 0)
Updated train_y shape: (1, 50, 2)


In [67]:
# Train the model
epochs = 1000
batch_size = 8
model.fit(x=train_x, y=train_y, epochs=epochs, batch_size=batch_size)

Epoch 1/1000


ValueError: in user code:

    File "/Users/wwerkowicz/miniforge3/envs/metal/lib/python3.10/site-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/Users/wwerkowicz/miniforge3/envs/metal/lib/python3.10/site-packages/keras/engine/training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/wwerkowicz/miniforge3/envs/metal/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "/Users/wwerkowicz/miniforge3/envs/metal/lib/python3.10/site-packages/keras/engine/training.py", line 1050, in train_step
        y_pred = self(x, training=True)
    File "/Users/wwerkowicz/miniforge3/envs/metal/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/wwerkowicz/miniforge3/envs/metal/lib/python3.10/site-packages/keras/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 50, 44100, 0, 129), found shape=(None, 0)
