In [None]:
onnx_model_path = "./model/model.onnx"
use_float16 = False

ExecutionProvider="OpenVINOExecutionProvider"

if ExecutionProvider == "OpenVINOExecutionProvider":
    onnx_model_path = "./model/openvino_model_quant_st.onnx"
elif ExecutionProvider == "DmlExecutionProvider" or ExecutionProvider == "NvTensorRTRTXExecutionProvider" or ExecutionProvider == "MIGraphXExecutionProvider":
    use_float16 = True

In [None]:
# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing
import subprocess
import json
import sys
import os
import onnxruntime as ort

def register_execution_providers():
    worker_script = os.path.abspath('winml.py')
    result = subprocess.check_output([sys.executable, worker_script], text=True)
    paths = json.loads(result)
    for item in paths.items():
        ort.register_execution_provider_library(item[0], item[1])

register_execution_providers()

In [None]:
from PIL import Image
import requests
 
from transformers import ChineseCLIPProcessor
import onnxruntime as ort
import numpy as np
import torch
 
processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
 
url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
image = Image.open(requests.get(url, stream=True).raw)

# Squirtle, Bulbasaur, Charmander, Pikachu in English
inputs = processor(text=["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"],
                images=image, return_tensors="np", padding="max_length",
                max_length= 77, truncation=True)
 

def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):
    ep_devices = ort.get_ep_devices()
    for ep_device in ep_devices:
        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:
            print(f"Adding {ep_name} for {device_type}")
            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)
            break
 
opts = ort.SessionOptions()
 
add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)
assert opts.has_providers()

# options = ort.SessionOptions()
session = ort.InferenceSession(onnx_model_path,
    sess_options=opts,
    # providers=[ExecutionProvider],
    # provider_options=[provider_options]
)
logits_per_image = session.run(["logits_per_image"],
                     {
                        "input_ids": inputs['input_ids'].astype(np.int64),
                        "attention_mask": inputs['attention_mask'].astype(np.int64),
                        "pixel_values": inputs['pixel_values'].astype(np.float16) if use_float16 else inputs['pixel_values']
                    })
 
probs = torch.tensor(logits_per_image[0]).softmax(dim=1)
print("Label probs:", probs)