### Export to ONNX and save models notebook
#### Requirements

- install pytorch: https://pytorch.org/get-started/locally/

In [None]:
#Required once
%pip install --user -qqr requirements.txt

In [1]:
#Required

from mamba_ssm.models.config_mamba import MambaConfig
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
import torch
from transformers import AutoTokenizer

from mamba_ssm.onnx.model_wrapper import ModelWrapper, BlockModelWrapper, MambaModelWrapper

# Config
model_name = "state-spaces/mamba-130m"
device = "cpu"
dtype = torch.float32

  from .autonotebook import tqdm as notebook_tqdm


Run below to export MambaLMHeadModel in ONNX:

In [9]:
# Init model pretrained
model = ModelWrapper(model_name=model_name, use_generation=False, device=device, dtype=dtype)

Number of layers: 24
Size of d: 768
Number of parameters: 129135360


In [None]:
# Init model custom config not pretrained
config = MambaConfig()
config.d_model = 200
config.n_layer = 1
model = ModelWrapper(model_name=None, use_generation=False, config=config, device=device, dtype=dtype)

In [10]:
# Generate a model input
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
dummy_prompt = "Hello, world!"  
tokens = tokenizer(dummy_prompt, return_tensors="pt")
input_ids = tokens.input_ids.to(device=device)

onnx_model_path = "model.onnx"

# Export to ONNX
torch.onnx.export(model, 
                  (input_ids),  
                  onnx_model_path,
                  verbose=False,
                  input_names=['input_ids'],
                  output_names=['output'],
                  dynamic_axes={'input_ids': {0: 'batch_size'}, 'output': {0: 'batch_size'}})

torch.save(model, "model_wrapper.pt")

print(f"Model exported in {onnx_model_path}")


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Model exported in model.onnx


In [4]:
torch.save(model, "model_wrapper.pt")

Run to save pretrained pytorch model:

In [5]:
model = MambaLMHeadModel.from_pretrained(model_name, device=device, dtype=dtype)
torch.save(model, "model_original_pretrained.pt")

Run to save custom not pretrained model:

In [2]:
config = MambaConfig()
config.d_model = 200
config.n_layer = 2
model = MambaLMHeadModel(config=config, device=device, dtype=dtype)
torch.save(model, "model_custom.pt")

Run to export block layer:

In [None]:
config = MambaConfig()
config.d_model = 5
config.n_layer = 1
block_model_wrapper = BlockModelWrapper(config=config, device=device, dtype=dtype)

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
dummy_prompt = "Hello, world!"  
tokens = tokenizer(dummy_prompt, return_tensors="pt")

input_ids = tokens.input_ids.to(device=device)
hidden_states = torch.randn(1, 10, config.d_model, device='cpu')  # Batch size = 1, Seq length = 10

residual = torch.zeros_like(hidden_states, device=hidden_states.device, dtype=hidden_states.dtype)

torch.onnx.export(
    block_model_wrapper,
    (input_ids, hidden_states, residual),  
    'block_model.onnx',
    input_names=['input_ids', 'hidden_states', 'residual'],
    output_names=['output']
)
torch.save(block_model_wrapper, "block_model.pt")

Run to export Mamba layer:

In [15]:
config = MambaConfig()
config.d_model = 5
config.n_layer = 1
block_model_wrapper = MambaModelWrapper(config=config, device=device, dtype=dtype)

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
dummy_prompt = "Hello, world!"  
tokens = tokenizer(dummy_prompt, return_tensors="pt")

input_ids = tokens.input_ids.to(device=device)

torch.onnx.export(
    block_model_wrapper,
    input_ids,  
    'mamba_model_d_5_h.onnx',
    input_names=['input_ids'],
    output_names=['output'],
    dynamic_axes={
        'input_ids': {0: 'batch_size', 1: 'seq_length'}, 
        'hidden_states': {0: 'batch_size', 1: 'seq_length', 2: 'd_model'}, 
        'output': {0: 'batch_size', 1: 'seq_length', 2: 'd_model'}
    }
)

torch.save(block_model_wrapper, "mamba_model.pt")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Size of d: 5
Number of parameters: 720
0 torch.Size([1, 4, 5])




In [5]:
input_ids.shape

torch.Size([1, 4])

In [10]:
config = MambaConfig()
config.d_model = 5
config.n_layer = 1
block_model_wrapper = MambaModelWrapper(config=config, device=device, dtype=dtype)
block_model_wrapper.eval()

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
dummy_prompt = "Hello, !world"  
tokens = tokenizer(dummy_prompt, return_tensors="pt")

input_ids = tokens.input_ids.to(device=device)

out = block_model_wrapper(input_ids)

out

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Size of d: 5
Number of parameters: 720
0 torch.Size([1, 4, 5])


tensor([[[-0.0187,  0.0326,  0.0519,  0.0323,  0.0524],
         [-0.0337, -0.0044,  0.0254,  0.0173,  0.0112],
         [-0.0278, -0.0198,  0.0496,  0.0301,  0.0151],
         [ 0.0188,  0.0294,  0.0273,  0.0104, -0.0015]]],
       grad_fn=<UnsafeViewBackward0>)

In [14]:
out.shape


torch.Size([1, 4, 5])