In [2]:
from optimum.onnxruntime import ORTModelForFeatureExtraction
from transformers import AutoTokenizer
from transformers import Pipeline
import torch.nn.functional as F
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
import json
import time
from tqdm import tqdm
from pathlib import Path
from typing import List

In [None]:
onnx_path = Path("bge_chinese_large_v1d5_finetune_240129v2_onnx/")

In [None]:
# load optimized model
model = ORTModelForFeatureExtraction.from_pretrained(
    onnx_path, file_name="model_optimized.onnx", provider="CUDAExecutionProvider"
)
tokenizer = AutoTokenizer.from_pretrained(onnx_path)

In [None]:
model_id = "bge_chinese_large_v1d5_finetune_240129v2"
sbertmodel = SentenceTransformer(model_id)

In [None]:
sbertoutput = sbertmodel.encode("你是谁", normalize_embeddings=True)
sbertoutput

In [None]:
def embd_func(
    model,
    tokenizer,
    inputs: List[str],
    normalize_embeddings: bool = True,
    max_length: int = 512,
) -> np.ndarray:
    encoded_inputs = tokenizer(
        inputs,
        padding=True,
        truncation=True,
        return_tensors="pt",
        max_length=max_length,
    )
    outputs = model(**encoded_inputs)

    embedding_value = outputs.last_hidden_state[:, 0]
    if normalize_embeddings:
        # Normalize embeddings
        sentence_embeddings = F.normalize(embedding_value, p=2, dim=1)

    sentence_embeddings = sentence_embeddings.cpu().numpy()
    return sentence_embeddings


onnx_test = embd_func(model, tokenizer, ["你是谁", "你是谁1"])
onnx_test.shape

In [3]:
with open("gptdataset/0.json", encoding="utf-8", mode="r") as fin:
    alldata = [json.loads(i) for i in fin.readlines()]

allquery = [i["query"] for i in alldata]
len(allquery)

26988

In [None]:
alltime = []
for i in tqdm(allquery[:500]):
    s1 = time.time()
    v = sbertmodel.encode(i, normalize_embeddings=True)
    s1 = time.time() - s1
    alltime.append(s1)
np.mean(alltime)

In [None]:
alltime = []
for i in tqdm(allquery[:500]):
    s1 = time.time()
    v = embd_func(model, tokenizer, [i])
    s1 = time.time() - s1
    alltime.append(s1)
np.mean(alltime)

In [None]:
sbert_test = sbertmodel.encode(["你是谁", "你是谁1"], normalize_embeddings=True)
sbert_test.shape

In [None]:
np.allclose(onnx_test, sbert_test, atol=1e-3)

In [None]:
onnx_test

In [None]:
sbert_test