In [2]:
from optimum.onnxruntime import ORTModelForFeatureExtraction
from transformers import AutoTokenizer
from transformers import Pipeline
import torch.nn.functional as F
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
import json
import time
from tqdm import tqdm
from pathlib import Path
from typing import List

In [None]:
onnx_path = Path("bge_chinese_large_v1d5_finetune_240129v2_onnx/")

In [None]:
# load optimized model
model = ORTModelForFeatureExtraction.from_pretrained(
    onnx_path, file_name="model_optimized.onnx", provider="CUDAExecutionProvider"
)
tokenizer = AutoTokenizer.from_pretrained(onnx_path)

In [None]:
model_id = "bge_chinese_large_v1d5_finetune_240129v2"
sbertmodel = SentenceTransformer(model_id)

In [None]:
sbertoutput = sbertmodel.encode("你是谁", normalize_embeddings=True)
sbertoutput

In [None]:
def embd_func(
    model,
    tokenizer,
    inputs: List[str],
    normalize_embeddings: bool = True,
    max_length: int = 512,
) -> np.ndarray:
    encoded_inputs = tokenizer(
        inputs,
        padding=True,
        truncation=True,
        return_tensors="pt",
        max_length=max_length,
    )
    outputs = model(**encoded_inputs)

    embedding_value = outputs.last_hidden_state[:, 0]
    if normalize_embeddings:
        # Normalize embeddings
        sentence_embeddings = F.normalize(embedding_value, p=2, dim=1)

    sentence_embeddings = sentence_embeddings.cpu().numpy()
    return sentence_embeddings


onnx_test = embd_func(model, tokenizer, ["你是谁", "你是谁1"])
onnx_test.shape

In [3]:
with open("gptdataset/0.json", encoding="utf-8", mode="r") as fin:
    alldata = [json.loads(i) for i in fin.readlines()]

allquery = [i["query"] for i in alldata]
len(allquery)

26988

In [None]:
alltime = []
for i in tqdm(allquery[:500]):
    s1 = time.time()
    v = sbertmodel.encode(i, normalize_embeddings=True)
    s1 = time.time() - s1
    alltime.append(s1)
np.mean(alltime)

In [None]:
alltime = []
for i in tqdm(allquery[:500]):
    s1 = time.time()
    v = embd_func(model, tokenizer, [i])
    s1 = time.time() - s1
    alltime.append(s1)
np.mean(alltime)

In [None]:
sbert_test = sbertmodel.encode(["你是谁", "你是谁1"], normalize_embeddings=True)
sbert_test.shape

In [None]:
np.allclose(onnx_test, sbert_test, atol=1e-3)

In [None]:
onnx_test

In [None]:
sbert_test

In [6]:
import time
import requests
import numpy as np


def test_online(text: str):
    service_endpoint = "http://10.136.0.64:7777/personal-homepage"  # "http://10.136.0.64:7779/personal-homepage"  # 10.136.0.64
    body = {
        "task": "embeddingv2",
        "text": text,
    }

    response = requests.post(
        f"{service_endpoint}", headers={"Content-Type": "application/json"}, json=body
    )
    return np.array(json.loads(response.text))


online_test = test_online("你是谁")
online_test[:3]

array([ 0.02434009, -0.02068553, -0.01256034])

In [7]:
from tqdm import tqdm

alltime = []
for i in tqdm(allquery[:100]):
    s1 = time.time()
    v = test_online(i)
    s1 = time.time() - s1
    alltime.append(s1)
np.mean(alltime)

100%|██████████| 100/100 [00:05<00:00, 17.49it/s]


0.05678013801574707

In [5]:
def test_online_raw(text: str):
    service_endpoint = "http://10.136.0.64:7777/personal-homepage"  # "http://yongfeng.wml.weibo.com/mm-wb-ml-nlp-v2/personal-homepage"  # "http://10.136.0.64:7779/personal-homepage"  # 10.136.0.64
    body = {
        "task": "embedding",
        "text": text,
    }

    response = requests.post(
        f"{service_endpoint}", headers={"Content-Type": "application/json"}, json=body
    )
    return np.array(json.loads(response.text))


from tqdm import tqdm

alltime = []
for i in tqdm(allquery[:100]):
    s1 = time.time()
    v = test_online_raw(i)
    s1 = time.time() - s1
    alltime.append(s1)
np.mean(alltime)

100%|██████████| 100/100 [00:07<00:00, 13.17it/s]


0.0755191707611084

In [None]:
onnx_test[0, :3]

In [None]:
from tqdm import tqdm


statusall = []
for i in tqdm(allquery[:50]):
    v = sbertmodel.encode(i, normalize_embeddings=True)
    o = test_online(i)
    status = np.allclose(v, o, atol=1e-3)
    statusall.append(statusall)


all(statusall)

In [None]:
# docker run -it --gpus all -p 7777:8080 -v /data2/homepage_ckpt_backup:/workspace/dev/wb_ml_nlp nlp-homepage-model:2024-02-02-15 bash

In [8]:
import asyncio
import datetime


async def say_after(delay, what):
    await asyncio.sleep(delay)
    print(what)


async def test_raw_online(query):
    def test_online_raw(text: str):
        service_endpoint = "http://10.136.0.64:7777/personal-homepage"  # "http://yongfeng.wml.weibo.com/mm-wb-ml-nlp-v2/personal-homepage"  # "http://10.136.0.64:7779/personal-homepage"  # 10.136.0.64
        body = {
            "task": "embedding",
            "text": text,
        }

        response = requests.post(
            f"{service_endpoint}",
            headers={"Content-Type": "application/json"},
            json=body,
        )
        return np.array(json.loads(response.text))

    value = test_online_raw(query)
    return value


async def main5():
    """
    将create_task拿到列表外面（变成了gather)
    并且舍去列表推导式展开部分
    :return:
    """
    start_date = datetime.datetime.now()
    print(f"Start at {start_date:%Y-%m-%d %H:%M:%S}")

    await asyncio.gather(
        *[
            test_raw_online(q)
            for q in allquery[:50]
            # say_after(delay=delay, what=what)
            # for delay, what in zip([1, 2, 3, 4], ["hello", "world", "foo", "bar"])
        ]
    )

    end_date = datetime.datetime.now()
    print(f"finished at {end_date:%Y-%m-%d %H:%M:%S}")
    print(f"total used time: {(end_date - start_date).seconds} s")


main5()

<coroutine object main5 at 0x7fcca8d90270>