In [1]:
from transformers import AutoTokenizer
from openvino_tokenizers import convert_tokenizer, connect_models
import openvino as ov

## Download the TinyLlama-1.1B-Chat tokenizer from HuggingFace Hub

In [2]:
model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
model_vendor, model_name = model_id.split('/')

hf_tokenizer = AutoTokenizer.from_pretrained(model_id)

hf_tokenizer

LlamaTokenizerFast(name_or_path='TinyLlama/TinyLlama-1.1B-Chat-v1.0', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

## Convert the downloaded tokenizer into OpenVINO IR models using 'OpenVINO Tokenizers'

In [3]:
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
print(ov_tokenizer, ov_detokenizer)

<Model: 'tokenizer'
inputs[
<ConstOutput: names[string_input] shape[?] type: string>
]
outputs[
<ConstOutput: names[input_ids] shape[?,?] type: i64>,
<ConstOutput: names[attention_mask] shape[?,?] type: i64>
]> <Model: 'detokenizer'
inputs[
<ConstOutput: names[Parameter_22] shape[?,?] type: i64>
]
outputs[
<ConstOutput: names[string_output] shape[?] type: string>
]>


## How to use the converted tokenizer and de-tokenizer

In [4]:
# Compile the OV Tokenizer models for a specific device (CPU)
compiled_tokenizer   = ov.compile_model(ov_tokenizer, 'CPU')
compiled_detokenizer = ov.compile_model(ov_detokenizer, 'CPU')

# Tokenize
input_string = 'OpenVINO is an open-source toolkit for optimizing and deploying deep learning models from cloud to edge.'
token_ids = compiled_tokenizer.infer_new_request([input_string])
print(token_ids)

# Detokenize
detokenize_result = compiled_detokenizer.infer_new_request(token_ids['input_ids'])
print(detokenize_result)
print(detokenize_result['string_output'])

print(input_string == detokenize_result['string_output'][0])


{<ConstOutput: names[input_ids] shape[?,?] type: i64>: array([[    1,  4673, 29963,  1177, 29949,   338,   385,  1722, 29899,
         4993,  5780,  7354,   363,  5994,  5281,   322,  7246,   292,
         6483,  6509,  4733,   515,  9570,   304,  7636, 29889]],
      dtype=int64), <ConstOutput: names[attention_mask] shape[?,?] type: i64>: array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1]], dtype=int64)}
{<ConstOutput: names[string_output] shape[?] type: string>: array(['OpenVINO is an open-source toolkit for optimizing and deploying deep learning models from cloud to edge.'],
      dtype='<U104')}
['OpenVINO is an open-source toolkit for optimizing and deploying deep learning models from cloud to edge.']
True


## Save the IR models for later use

In [6]:
ov.save_model(ov_tokenizer, 'ov_tokenizer.xml')
ov.save_model(ov_detokenizer, 'ov_detokenizer.xml')