In [18]:
import numpy as np
import scipy as sp
import tensorflow as tf
from tensorflow import keras
import os
from datetime import date, datetime

## Choose computation device (CPU)

In [19]:
physical_devices = tf.config.list_physical_devices()
print(f"These are the physical devices available:\n{physical_devices}")

try:
    # Disable all GPUS
    tf.config.set_visible_devices([], 'GPU')
    visible_devices = tf.config.get_visible_devices()
    print(f"These are the visible devices:\n{visible_devices}")
except:
    pass

These are the physical devices available:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
These are the visible devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


## User inputs

In [30]:
# EDIT THIS SECTION FOR USER INPUTS

name = 'model_0'
in_file = 'TrainingData/flanger-input.wav'
out_file = 'TrainingData/flanger-target.wav'
epochs = 1

input_size = 1 
batch_size = 4096
test_size = 0.2
learning_rate = 0.0005 

if not os.path.exists('models/'+name):
    os.makedirs('models/'+name)

## Define some helper functions

In [31]:
def save_wav(name, data):
    sp.io.wavfile.write(name, 44100, data.flatten().astype(np.float32))

def normalize(data):
    data_max = max(data)
    data_min = min(data)
    data_norm = max(data_max,abs(data_min))
    return data / data_norm

## Pre-processing the data

In [32]:
# Load and Preprocess Data ###########################################
in_rate, in_data = sp.io.wavfile.read(in_file)
out_rate, out_data = sp.io.wavfile.read(out_file)

X_all = in_data.astype(np.float32).flatten()  
X_all = normalize(X_all).reshape(len(X_all),1)   
y_all = out_data.astype(np.float32).flatten() 
y_all = normalize(y_all).reshape(len(y_all),1)

# Get the last 20% of the wav data for testing and thee rest for training
X_training, X_testing = np.split(X_all, [int(len(X_all)*(1-test_size))])
y_training, y_testing = np.split(y_all, [int(len(y_all)*(1-test_size))])
print(f"X_training shape (pre-processing): {X_training.shape}")
print(f"y_training shape (pre-processing): {y_training.shape}")
print(f"X_testing shape (pre-processing): {X_testing.shape}")
print(f"y_testing shape (pre-processing): {y_testing.shape}")

# Create a new array where each element is an array of input_size samples in time order
# Each element of the new array is shifted by one sample from the previous element
indices = np.arange(input_size) + np.arange(len(X_training)-input_size+1)[:,np.newaxis]
X_ordered_training = tf.gather(X_training,indices) 

X_ordered_training_len = int(X_ordered_training.shape[0] / batch_size) * batch_size
X_ordered_training = X_ordered_training[:X_ordered_training_len,:,:]

print(f"X_ordered_training shape: {X_ordered_training.shape}")
indices = np.arange(input_size) + np.arange(len(X_testing)-input_size+1)[:,np.newaxis]
X_ordered_testing = tf.gather(X_testing,indices) 


X_ordered_testing_len = int(X_ordered_testing.shape[0] / batch_size) * batch_size
X_ordered_testing = X_ordered_testing[:X_ordered_testing_len,:,:]

print(f"X_ordered_testing shape: {X_ordered_testing.shape}")

# The input size defines the number of samples used for each prediction
# Therefore the first output value that we get is at index input_size-1
y_ordered_training = y_training[input_size-1:]
print(f"y_ordered_training shape: {y_ordered_training.shape}")
y_ordered_testing = y_testing[input_size-1:]
print(f"y_ordered_testing shape: {y_ordered_testing.shape}")


shuffled_indices = np.random.permutation(len(X_ordered_training)) 
X_random_training = tf.gather(X_ordered_training, shuffled_indices)
y_random_training = tf.gather(y_ordered_training, shuffled_indices)

X_random_training_len = int(X_random_training.shape[0] / batch_size) * batch_size
y_random_training_len = int(y_random_training.shape[0] / batch_size) * batch_size

X_random_training = X_random_training[:X_random_training_len,:,:]
y_random_training = y_random_training[:y_random_training_len,:]

print(f"X_random_training shape (post-processing): {X_random_training.shape}")
print(f"y_random_training shape (post-processing): {y_random_training.shape}")

print(f"The X_random_training data is an array, where each element is an array of input_size samples in time order. Therefore the lenght is smaller than the original X_training array (the first {input_size} samples are grouped).")
print(f"The y_random_training data is an array, where each element is a single sample. This single sample is the target output for the corresponding X_random_training element, which consists of input samples.")

X_training shape (pre-processing): (352800, 1)
y_training shape (pre-processing): (352800, 1)
X_testing shape (pre-processing): (88200, 1)
y_testing shape (pre-processing): (88200, 1)
X_ordered_training shape: (352256, 1, 1)
X_ordered_testing shape: (86016, 1, 1)
y_ordered_training shape: (352800, 1)
y_ordered_testing shape: (88200, 1)
X_random_training shape (post-processing): (352256, 1, 1)
y_random_training shape (post-processing): (352256, 1)
The X_random_training data is an array, where each element is an array of input_size samples in time order. Therefore the lenght is smaller than the original X_training array (the first 1 samples are grouped).
The y_random_training data is an array, where each element is a single sample. This single sample is the target output for the corresponding X_random_training element, which consists of input samples.


## Define the model

In [33]:
class StatefulLSTM(tf.keras.Model):
    def __init__(self, input_size=1, output_size=1, hidden_size=32, skip=1, batch_size=4096):
        super(StatefulLSTM, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.skip = skip
        self.batch_size = batch_size

        self.lstm = keras.layers.LSTM(units=hidden_size, return_sequences=False, stateful=True, return_state=False, batch_size=batch_size, use_bias=True)    
        self.dense = keras.layers.Dense(units=output_size, activation=None, batch_size=batch_size)
        
        # Build LSTM before training, because stateful lstm requires information batch size to build static graph
        self.lstm.build((batch_size, input_size, 1))
        
    def call(self, input_tensor):
        x = self.lstm(input_tensor)
        x = self.dense(x)    
        return x


In [34]:
class ESRLoss(tf.keras.losses.Loss):
    def __init__(self):
        super(ESRLoss, self).__init__()
        self.epsilon = 1e-5

    def call(self, y_true, y_pred):
        loss = tf.reduce_mean(tf.square(y_true - y_pred))
        energy = tf.reduce_mean(tf.square(y_true)) + self.epsilon
        return loss / energy

class DCLoss(tf.keras.losses.Loss):
    def __init__(self):
        super(DCLoss, self).__init__()
        self.epsilon = 1e-5

    def call(self, y_true, y_pred):
        loss = tf.reduce_mean(tf.square(tf.reduce_mean(y_true, axis=0) - tf.reduce_mean(y_pred, axis=0)))
        energy = tf.reduce_mean(tf.square(y_true)) + self.epsilon
        return loss / energy

class LossWrapper(tf.keras.losses.Loss):
    def __init__(self, loss_weights):
        super(LossWrapper, self).__init__()
        # Map the loss names to their corresponding classes
        loss_dict = {'ESR': ESRLoss, 'DC': DCLoss}
        # Create instances of the loss functions
        self.loss_functions = [loss_dict[key]() for key in ["ESR", "DC"]]
        # Assign the weights
        self.loss_factors = [loss_weights[key] for key in ["ESR", "DC"]]

    def call(self, y_true, y_pred):
        total_loss = 0
        for i, loss_function in enumerate(self.loss_functions):
            total_loss += loss_function(y_true, y_pred) * self.loss_factors[i]
        return total_loss


In [35]:
keras.backend.clear_session()

loss_wrapper = LossWrapper({"ESR": 0.75, "DC": 0.25})

model = StatefulLSTM(input_size=1, 
                     output_size=1,
                     hidden_size=32,
                     skip=0,
                     batch_size=batch_size)

# Compile the model with the custom loss
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss=loss_wrapper)

model.build((batch_size,1,1))
model.summary()

Model: "stateful_lstm"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 multiple                  4352      
                                                                 
 dense (Dense)               multiple                  33        
                                                                 
Total params: 4385 (17.13 KB)
Trainable params: 4385 (17.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Train the model

In [36]:
history = model.fit(x=X_random_training, y=y_random_training, epochs=epochs, batch_size=batch_size) 



In [42]:
model.save_weights('models/' + name + '/weights')
model.save_weights('models/' + name + '/lstm_model.h5')

## Run predictions
### 0. Load the model

In [54]:
inference_batch_size = 2

inference_model = StatefulLSTM(input_size=1, 
                               output_size=1,
                               hidden_size=32,
                               skip=0,
                               batch_size=inference_batch_size)

input_shape = (inference_batch_size,1,1)

inference_model.build(input_shape)

inference_model.load_weights('models/' + name + '/lstm_model.h5')

### 1. On the test audio data

In [92]:
# Run Prediction #################################################
# Test the model on the testing data #############################
print("Running prediction..")

input_data = X_ordered_testing

# Ensure the input data size for inference is a multiple of the batch size used during training.
# The batch size is crucial as the stateful LSTM maintains internal states for a specific number of samples across batches.
# Use model.predict(input_data, batch_size) to process the entire dataset in the specified batch size.
# Alternatively, use predict_on_batch for single batch processing, but this requires that the input_data len equals the specified batch size  
# Note: Mismatch in batch size or non-multiples of batch size in input data may lead to errors or unexpected model behavior.
#model.reset_states(2048)
prediction = inference_model.predict(input_data, inference_batch_size)

save_wav('models/'+name+'/y_pred.wav', prediction)
save_wav('models/'+name+'/x_test.wav', X_testing)
save_wav('models/'+name+'/y_test.wav', y_testing)

print("X_testing shape: ", X_testing.shape)
print("X_ordered_testing shape: ", X_ordered_testing.shape)
print("y_testing shape: ", y_testing.shape)
print("prediction shape: ", prediction.shape)

print("Note that the prediction shape is smaller than the y_testing shape. This is because the first predicted sample needs input_size samples for prediction.\n")

Running prediction..
X_testing shape:  (88200, 1)
X_ordered_testing shape:  (86016, 1, 1)
y_testing shape:  (88200, 1)
prediction shape:  (86016, 1)
Note that the prediction shape is smaller than the y_testing shape. This is because the first predicted sample needs input_size samples for prediction.


### 2. On a number sequence (to control inference)

In [55]:
# Test the model simple number sequence to compare with inference 
input_shape = (2,1,1)
test_sequence = tf.zeros(input_shape)


print("Running prediction..")
prediction = inference_model.predict(test_sequence)
print(f"prediction {prediction}")

print("Running prediction..")
prediction = inference_model.predict(test_sequence)
print(f"prediction2 {prediction}")

print("X_testing_2 shape: ", test_sequence.shape)
print("prediction_2 shape: ", prediction.shape)


Running prediction..
prediction [[-0.00425536]
 [-0.00425536]]
Running prediction..
prediction2 [[-0.00707254]
 [-0.00707254]]
X_testing_2 shape:  (2, 1, 1)
prediction_2 shape:  (2, 1)


## Export as tflite model
### 1. for minimal examples (with batch size = 2)

In [16]:
# Build the model with the desired input shape
save_model.build((1,1,1))  # Ensure this shape is correct for your model

# Load the pretrained weights
save_model.load_weights('lstm_model.h5')

test_data = tf.zeros((1,1,1))
prediction = save_model.predict(test_data, 1)

# Convert to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(save_model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open("models/"+name+"/"+name+"-minimal.tflite", 'wb') as f:
    f.write(tflite_model)

# Uncomment to analyze the model
# tf.lite.experimental.Analyzer.analyze(model_content=tflite_model)


Model: "stateful_lstm_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               multiple                  4352      
                                                                 
 dense_3 (Dense)             multiple                  33        
                                                                 
Total params: 4385 (17.13 KB)
Trainable params: 4385 (17.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
INFO:tensorflow:Assets written to: C:\Users\VALENT~1\AppData\Local\Temp\tmp4cb_oo6w\assets


INFO:tensorflow:Assets written to: C:\Users\VALENT~1\AppData\Local\Temp\tmp4cb_oo6w\assets


ConverterError: C:\Users\Valentin Ackva\Documents\Code\stateful-lstm\venv\lib\site-packages\keras\src\utils\traceback_utils.py:65:0: error: 'tf.TensorListReserve' op requires element_shape to be static during TF Lite transformation pass
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall"]): called from
C:\Users\Valentin Ackva\Documents\Code\stateful-lstm\venv\lib\site-packages\keras\src\utils\traceback_utils.py:65:0: error: failed to legalize operation 'tf.TensorListReserve' that was explicitly marked illegal
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall"]): called from
<unknown>:0: error: Lowering tensor list ops is failed. Please consider using Select TF ops and disabling `_experimental_lower_tensor_list_ops` flag in the TFLite converter object. For example, converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n converter._experimental_lower_tensor_list_ops = False


In [44]:
input_shape = [1, 1, 1]

func = tf.function(inference_model).get_concrete_function(
    tf.TensorSpec(input_shape, dtype=tf.float32))
converter = tf.lite.TFLiteConverter.from_concrete_functions([func], inference_model)
tflite_model = converter.convert()

# Save the model.
with open("models/"+name+"/"+"steerable-nafx.tflite", 'wb') as f:
  f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\VALENT~1\AppData\Local\Temp\tmptyp_zqu0\assets


INFO:tensorflow:Assets written to: C:\Users\VALENT~1\AppData\Local\Temp\tmptyp_zqu0\assets


In [38]:
# Convert to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open("models/"+name+"/"+name+"-minimal.tflite", 'wb') as f:
    f.write(tflite_model)
    
#tf.lite.experimental.Analyzer.analyze(model_content=tflite_model)

INFO:tensorflow:Assets written to: C:\Users\VALENT~1\AppData\Local\Temp\tmpt5hgngpr\assets


INFO:tensorflow:Assets written to: C:\Users\VALENT~1\AppData\Local\Temp\tmpt5hgngpr\assets


=== TFLite ModelAnalyzer ===

Your TFLite model has '4' subgraph(s). In the subgraph description below,
T# represents the Tensor numbers. For example, in Subgraph#0, the CALL_ONCE op takes
 as input and produces  as output.

Subgraph#0 main(T#0) -> [T#23]
  Op#0 CALL_ONCE(Subgraph#1) -> []
  Op#1 VAR_HANDLE() -> [T#11]
  Op#2 VAR_HANDLE() -> [T#12]
  Op#3 READ_VARIABLE(T#11) -> [T#13]
  Op#4 READ_VARIABLE(T#12) -> [T#14]
  Op#5 TRANSPOSE(T#0, T#8[1, 0, 2]) -> [T#15]
  Op#6 WHILE(T#9[0], T#9[0], T#4, T#14, T#13, T#15, Cond: Subgraph#2, Body: Subgraph#3) -> [T#16, T#17, T#18, T#19, T#20, T#21]
  Op#7 STRIDED_SLICE(T#18, T#1[-1, 0, 0], T#2[0, 4096, 32], T#3[1, 1, 1]) -> [T#22]
  Op#8 FULLY_CONNECTED(T#22, T#10, T#5) -> [T#23]
  Op#9 ASSIGN_VARIABLE(T#12, T#19) -> []
  Op#10 ASSIGN_VARIABLE(T#11, T#20) -> []

Tensors of Subgraph#0
  T#0(serving_default_input_1:0) shape_signature:[-1, 1, 1], type:FLOAT32
  T#1(strided_slice_2) shape:[3], type:INT32 RO 12 bytes, buffer: 2, data:[-1, 0, 0]
  

In [12]:
import tf2onnx
import onnx

# Define the input shape
input_signature = [tf.TensorSpec([4096, input_size, 1], tf.float32, name='x')]

# Convert the model
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=13)
onnx.save(proto=onnx_model, f="models/"+name+"/"+name+"-tflite"+"-minimal.onnx")



### 2. for real-time streaming (with batch size = 64)

In [9]:
batch_size_streaming = 1

In [10]:
input_shape = model.inputs[0].shape.as_list()
input_shape[0] = batch_size_streaming
func = tf.function(model).get_concrete_function(
    tf.TensorSpec(input_shape, model.inputs[0].dtype))
converter = tf.lite.TFLiteConverter.from_concrete_functions([func])
tflite_model = converter.convert()

# Save the model.
with open("models/"+name+"/"+name+"-streaming.tflite", 'wb') as f:
  f.write(tflite_model)

tf.lite.experimental.Analyzer.analyze(model_content=tflite_model)

2023-10-24 15:22:31.259267: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2023-10-24 15:22:31.259339: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2023-10-24 15:22:31.260768: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2023-10-24 15:22:31.260791: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-10-24 15:22:31.260795: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-10-24 15:22:31.260854: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-10-24 15:22:31.260891: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/

=== TFLite ModelAnalyzer ===

Your TFLite model has '3' subgraph(s). In the subgraph description below,
T# represents the Tensor numbers. For example, in Subgraph#0, the PAD op takes
tensor #0 and tensor #15 as input and produces tensor #21 as output.

Subgraph#0 main(T#0) -> [T#37]
  Op#0 PAD(T#0, T#15[0, 0, 12, 12, 0, ...]) -> [T#21]
  Op#1 RESHAPE(T#21, T#3[128, 1, 174, 1]) -> [T#22]
  Op#2 CONV_2D(T#22, T#7, T#1) -> [T#23]
  Op#3 RESHAPE(T#23, T#4[128, 14, 16]) -> [T#24]
  Op#4 PAD(T#24, T#15[0, 0, 12, 12, 0, ...]) -> [T#25]
  Op#5 RESHAPE(T#25, T#5[128, 1, 38, 16]) -> [T#26]
  Op#6 CONV_2D(T#26, T#8, T#2) -> [T#27]
  Op#7 RESHAPE(T#27, T#6[128, 3, 16]) -> [T#28]
  Op#8 TRANSPOSE(T#28, T#17[1, 0, 2]) -> [T#29]
  Op#9 WHILE(T#19[0], T#19[0], T#12, T#13, T#13, T#29, Cond: Subgraph#1, Body: Subgraph#2) -> [T#30, T#31, T#32, T#33, T#34, T#35]
  Op#10 STRIDED_SLICE(T#32, T#9[-1, 0, 0], T#10[0, 128, 36], T#11[1, 1, 1]) -> [T#36]
  Op#11 FULLY_CONNECTED(T#36, T#20, T#18) -> [T#37]

Tensor

2023-10-24 15:22:31.440740: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:2245] Estimated count of arithmetic ops: 6.538 M  ops, equivalently 3.269 M  MACs


In [11]:
import tf2onnx
import onnx

# Define the input shape
input_signature = [tf.TensorSpec([batch_size_streaming, input_size, 1], tf.float32, name='x')]

# Convert the model
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=13)
onnx.save(proto=onnx_model, f="models/"+name+"/"+name+"-tflite"+"-streaming.onnx")

2023-10-24 15:22:35.191437: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2023-10-24 15:22:35.191501: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2023-10-24 15:22:35.191641: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-10-24 15:22:35.191653: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-10-24 15:22:35.402321: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compile

## Save the model as json

In [37]:
# Save the model as a JSON file (from RTNeural repo) ###################################
import model_utils_RTNeural

model_utils_RTNeural.save_model(model, filename="models/"+name+"/"+name+".json")

Skipping layer: <keras.src.engine.input_layer.InputLayer object at 0x10469b0d0>
