## To Keras Tensorflow

In [None]:
%pip install tensorflow transformers nobuco

# required install torch on https://pytorch.org/get-started/locally/

In [None]:
import nobuco
from nobuco import ChannelOrder, ChannelOrderingStrategy
import torch
import torch.nn.functional as F
import tensorflow as tf
from model_pytorch import Mamba, ModelArgs
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neox-20b')

In [47]:
@nobuco.converter(F.softplus, channel_ordering_strategy=ChannelOrderingStrategy.MINIMUM_TRANSPOSITIONS)
def softplus(input: torch.Tensor):
    return lambda input: tf.keras.activations.softplus(input)

In [48]:
@nobuco.converter(torch.einsum, channel_ordering_strategy=ChannelOrderingStrategy.FORCE_PYTORCH_ORDER)
def converter_einsum(*args):
    def func(*args):
        equation = args[0]
        operands = args[1:]
        print("----------------converter_einsum------------------")
        print(operands)
        return tf.einsum(equation, *operands)
    return func

In [None]:
args = ModelArgs(
    d_model=5,
    n_layer=1,
    vocab_size=50277
)
model = Mamba(args)
model.eval()
export_name = "mamba_minimal_1_layer"
dummy_input = "Test"
input_ids = tokenizer(dummy_input, return_tensors='pt').input_ids

keras_model = nobuco.pytorch_to_keras(
    model,
    args=[input_ids], kwargs=None,
    input_shapes={input_ids: (1, None)}, # Annotate dynamic axes with None
    inputs_channel_order=ChannelOrder.TENSORFLOW,
    outputs_channel_order=ChannelOrder.TENSORFLOW,
    constants_to_variables=False,
    trace_shape=True,
    save_trace_html=True
)

In [None]:
tf.keras.models.save_model(keras_model, f'{export_name}.keras')
tf.keras.models.save_model(keras_model, f'{export_name}.h5')
tf.saved_model.save(keras_model, f'{export_name}')

In [None]:
# Inference Test for nobuco converted model
# prompt
dummy_prompt_keras = "Harry Potter"
input_ids_keras = tokenizer(dummy_prompt_keras, return_tensors='tf').input_ids

#input_ids_keras = tf.cast(input_ids_keras, tf.int64)
# inference
out = keras_model.predict(input_ids_keras)
# output
print(out)

## SavedModel 

In [None]:
# prompt
dummy_prompt_keras = "Harry"
input_ids_keras = tokenizer(dummy_prompt_keras, return_tensors='tf').input_ids

# loading model
export_path = "mamba_minimal_1_layer" # No '.h5' in the path!
keras_model_restored = tf.saved_model.load(export_path)

input_ids_keras = tf.cast(input_ids_keras, tf.int64)

# inference
out = keras_model_restored(input_ids_keras)

# output
print(out)

## .keras

In [None]:
# prompt
dummy_prompt_keras = "Harry Potter"
input_ids_keras = tokenizer(dummy_prompt_keras, return_tensors='tf').input_ids

# loading model
export_name = "mamba_minimal_1_layer"
keras_model = tf.keras.saving.load_model (f'{export_name}.keras', safe_mode=False)

input_ids_keras = tf.cast(input_ids_keras, tf.int64)

# inference
out = keras_model.predict(input_ids_keras)

# output
print(out)

## .h5

In [None]:
import keras
import tensorflow as tf
from transformers import AutoTokenizer
from nobuco.layers.weight import WeightLayer

tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neox-20b')

# prompt
dummy_prompt_keras = "Harry Potter"
input_ids_keras = tokenizer(dummy_prompt_keras, return_tensors='tf').input_ids

# loading model
export_name = "mamba_minimal_1_layer"

custom_objects = {'WeightLayer': WeightLayer}

keras_model = keras.saving.load_model (f'{export_name}.h5', custom_objects=custom_objects)

input_ids_keras = tf.cast(input_ids_keras, tf.int64)

# inference
out = keras_model.predict(input_ids_keras)

# output
print(out)

## ONNX to Keras

In [None]:
import onnx
from onnx2keras import onnx_to_keras

# Load ONNX model
onnx_model = onnx.load('mamba_minimal_1_layer.onnx')

# Call the converter (input - is the main model input name, can be different for your model)
k_model = onnx_to_keras(onnx_model, ['input_ids'])

In [None]:
from onnx_tf.backend import prepare
import onnx

onnx_model = onnx.load('mamba_minimal_1_layer.onnx')

tf_rep = prepare(onnx_model)

In [None]:
# Crea il convertitore per il modello salvato
converter = tf.lite.TFLiteConverter.from_saved_model("mamba_minimal_1_layer")

# Converti il modello
tflite_model = converter.convert()

# Salva il modello TFLite
with open("tflite_mamba_minimal_1_layer.tflite", 'wb') as f:
    f.write(tflite_model)


In [None]:
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer

# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path="tflite_mamba_minimal_1_layer.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# prompt
tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neox-20b')
dummy_prompt_keras = "Harry tt"
input_ids_keras = tokenizer(dummy_prompt_keras, return_tensors='tf').input_ids
input_ids_keras_int64 = tf.cast(input_ids_keras, tf.int64)

interpreter.set_tensor(input_details[0]['index'], input_ids_keras_int64)

interpreter.invoke()

output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)