https://huggingface.co/Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime

In [1]:
from transformers import AutoTokenizer, AutoConfig
from optimum.intel.openvino import OVModelForSequenceClassification
import nncf
import openvino as ov
import openvino_tokenizers as ovtk

## Prepare models
Download a sentiment analysis model and tokenizer from HuggingFace hub. And then, convert the sentiment analysis model and tokenizer into OpenVINO IR model.

In [2]:
model_id = 'MarieAngeA13/Sentiment-Analysis-BERT'

# Convert a tokenizer
hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
ov_tokenizer = ovtk.convert_tokenizer(hf_tokenizer)
ov.save_model(ov_tokenizer, 'tokenizer.xml')

# Convert and compress weight a sentiment analysis model
config = AutoConfig.from_pretrained(model_id, output_hidden_states=True)    
ov_model = OVModelForSequenceClassification.from_pretrained(model_id, config=config, export=True, compile=False, load_in_8bit=False)
# Compress the model weights
ov_model = ov_model.half()
ov_compressed_model = nncf.compress_weights(ov_model.model, mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8)
ov.save_model(ov_model.model, 'sentiment-analysis.xml')


Framework not specified. Using pt to export the model.
Non-default generation parameters: {'max_length': 64}
Using framework PyTorch: 2.3.1+cpu
Overriding 1 configuration item(s)
	- use_cache -> False
Non-default generation parameters: {'max_length': 64}


Output()

INFO:nncf:Statistics of the bitwidth distribution:
+----------------+-----------------------------+----------------------------------------+
|   Num bits (N) | % all parameters (layers)   | % ratio-defining parameters (layers)   |
|              8 | 40% (34 / 77)               | 23% (30 / 73)                          |
+----------------+-----------------------------+----------------------------------------+
|              4 | 60% (43 / 77)               | 77% (43 / 73)                          |
+----------------+-----------------------------+----------------------------------------+


Output()

In [3]:
import openvino as ov
import openvino_tokenizers as ovtk

## Read the sentiment analysis model and tokenizer model

In [4]:
ov_core = ov.Core()
ov_tokenizer = ov_core.read_model('tokenizer.xml')
ov_sentiment_model = ov_core.read_model('sentiment-analysis.xml')

print(ov_tokenizer, ov_sentiment_model)

<Model: 'tokenizer'
inputs[
<ConstOutput: names[Parameter_1] shape[?] type: string>
]
outputs[
<ConstOutput: names[input_ids] shape[?,?] type: i64>,
<ConstOutput: names[token_type_ids] shape[?,?] type: i64>,
<ConstOutput: names[attention_mask] shape[?,?] type: i64>
]> <Model: 'Model3'
inputs[
<ConstOutput: names[input_ids] shape[?,?] type: i64>,
<ConstOutput: names[attention_mask] shape[?,?] type: i64>,
<ConstOutput: names[token_type_ids] shape[?,?] type: i64>
]
outputs[
<ConstOutput: names[logits] shape[?,3] type: f32>
]>


## Connect the tokenizer model and sentiment analysis model
Connect two models to make it a single model.

In [5]:
ov_connected_model = ovtk.connect_models(ov_tokenizer, ov_sentiment_model)
ov_compiled_connected_model = ov.compile_model(ov_connected_model, 'CPU')
print(ov_compiled_connected_model)

<CompiledModel:
inputs[
<ConstOutput: names[Parameter_1] shape[?] type: string>
]
outputs[
<ConstOutput: names[logits] shape[?,3] type: f32>
]>


## Run the integrated sentiment analysis model

In [6]:
import numpy as np

def softmax(x):
    x = x - np.max(x, axis=0)
    return np.exp(x) / np.sum(np.exp(x), axis=0)

input_sentence = "I lost my wallet. I'm so sad."
res = ov_compiled_connected_model.infer_new_request([input_sentence])
print(res)
res = softmax(res['logits'].flatten())
print('Negative, Neutral, Positive')
print(res)

{<ConstOutput: names[logits] shape[?,3] type: f32>: array([[ 2.8169205, -0.9881832, -2.1669624]], dtype=float32)}
Negative, Neutral, Positive
[0.97171885 0.02162744 0.00665377]
