In [None]:
from openvino.inference_engine import IECore

ie = IECore()

devices = ie.available_devices
for device in devices:
    device_name = ie.get_metric(device_name=device, metric_name='FULL_DEVICE_NAME')
    print(f"{device}: {device_name}")

In [None]:
# 加载 OpenVINO 模型
from openvino.inference_engine import IECore

ie = IECore()

In [None]:
# 加载 ONNX 模型
from openvino.inference_engine import IECore

ie = IECore()
onnx_model = './onnx/resnet50-v1-7.onnx'
net_onnx = ie.read_network(model=onnx_model)
exec_net_onnx = ie.load_network(network=net_onnx, device_name='CPU')

print(net_onnx.input_info)
input_layer = next(iter(net_onnx.input_info))
print(input_layer)

print(f"input layout: {net_onnx.input_info[input_layer].layout}")
print(f"input precision: {net_onnx.input_info[input_layer].precision}")
print(f"input shape: {net_onnx.input_info[input_layer].tensor_desc.dims}")

output_layer = next(iter(net_onnx.outputs))
print(f"output layout: {net_onnx.outputs[output_layer].layout}")
print(f"output precision: {net_onnx.outputs[output_layer].precision}")
print(f"output shape: {net_onnx.outputs[output_layer].shape}")

In [None]:
net_onnx.serialize('export_onnx_model.xml')

In [None]:
# inference
import numpy as np
import timeit

image_fake = np.random.randn(1, 3, 224, 224)
for _ in range(100):
    exec_net_onnx.infer({input_layer: image_fake})

print('resnet50:', timeit.timeit('exec_net_onnx.infer({input_layer: image_fake})',
                                  number=1000, globals=globals()))

In [None]:
# inference a image
import cv2

image_filename = 'demo.jpeg'
image = cv2.imread(image_filename)
print(image.shape)

N, C, H, W = net_onnx.input_info[input_layer].tensor_desc.dims
resized_image = cv2.resize(src=image, dsize=(W, H))
print(resized_image.shape)
input_data = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), axis=0).astype(np.float32)
print(input_data.shape)

result = exec_net_onnx.infer({input_layer: input_data})
np.argmax(result[next(iter(net_onnx.outputs))])
#result[output_layer].shape

In [None]:
# OpenVINO 2.0 API
from openvino.runtime import Core

ie = Core()

In [None]:
devices = ie.available_devices

for device in devices:
    device_name = ie.get_property(device_name=device, name='FULL_DEVICE_NAME')
    print(f"{device}: {device_name}")

In [None]:
from openvino.runtime import Core

ie = Core()
onnx_model_path = './onnx/resnet50-v1-7.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name='CPU')

print(model_onnx.inputs)
print(model_onnx.outputs)

print(compiled_model_onnx.inputs)
print(compiled_model_onnx.outputs)

In [None]:
from openvino.offline_transformations import serialize

serialize(model=model_onnx, model_path='export_onnx_model.xml', weights_path='export_onnx_model.bin')

In [None]:
# Model input
from openvino.runtime import Core

ie = Core()
classification_model_xml = 'export_onnx_model.xml'
model = ie.read_model(model=classification_model_xml)
input_layer = next(iter(model.inputs))

print(f"input name: {input_layer.any_name}")
print(f"input precision: {input_layer.element_type}")
print(f"input shape: {input_layer.shape}")

In [None]:
# Model output
from openvino.runtime import Core

ie = Core()
classification_model_xml = 'export_onnx_model.xml'
model = ie.read_model(model=classification_model_xml)

output_layer = next(iter(model.outputs))
print(f"output name: {output_layer.any_name}")
print(f"output precision: {output_layer.element_type}")
print(f"output shape: {output_layer.shape}")

In [None]:
# inference
import numpy as np
import timeit

image_fake = np.random.randn(1, 3, 224, 224)

from openvino.runtime import Core, PartialShape

ie = Core()
onnx_model_path = './onnx/resnet50-v1-7.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
input_layer = next(iter(model_onnx.inputs))

model_onnx.reshape({input_layer: PartialShape([1, 3, 224, 224])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU')
request = compiled_model_onnx.create_infer_request()

for _ in range(100):
    request.infer({input_layer.any_name: image_fake})

print(
    'resnet50:',
    timeit.timeit(
        'request.infer({input_layer.any_name: image_fake})',
        number=1000,
        globals=globals()))

In [None]:
# inference a image
import cv2

image_filename = 'demo.jpeg'
image = cv2.imread(image_filename)
print(image.shape)

from openvino.runtime import Core

ie = Core()
onnx_model_path = './onnx/resnet50-v1-7.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name='CPU')
input_layer = next(iter(compiled_model_onnx.inputs))
output_layer = next(iter(compiled_model_onnx.outputs))

N, C, H, W = input_layer.shape
resized_image = cv2.resize(src=image, dsize=(W, H))
print(resized_image.shape)
input_data = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), axis=0).astype(np.float32)
print(input_data.shape)

# compiled_model inference directly
result = compiled_model_onnx([input_data])[output_layer]

request = compiled_model_onnx.create_infer_request()
request.infer(inputs={input_layer.any_name: input_data})
result = request.get_output_tensor(output_layer.index).data

np.argmax(result, axis=-1)

In [18]:
# inference sync
import numpy as np
import timeit

image_fake = np.random.randn(8, 3, 224, 224)

from openvino.runtime import Core, PartialShape

ie = Core()
onnx_model_path = './onnx/resnet50-v1-7.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
input_layer = next(iter(model_onnx.inputs))

model_onnx.reshape({input_layer: PartialShape([8, 3, 224, 224])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU',
    config={"PERFORMANCE_HINT": "LATENCY"})
print(compiled_model_onnx.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS"))
# compiled_model_onnx = ie.compile_model(
#     model=model_onnx,
#     device_name='CPU',
#     config={"PERFORMANCE_HINT": "LATENCY"})
# print(compiled_model_onnx.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS"))
request = compiled_model_onnx.create_infer_request()

for _ in range(100):
    request.infer({input_layer.any_name: image_fake})

print(
    'resnet50:',
    timeit.timeit(
        'request.infer({input_layer.any_name: image_fake})',
        number=1000,
        globals=globals()))

1
resnet50: 26.049174329265952


In [21]:
# inference async
import numpy as np
import timeit


from openvino.runtime import Core, AsyncInferQueue, PartialShape

ie = Core()
onnx_model_path = './onnx/resnet50-v1-7.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
input_layer = next(iter(model_onnx.inputs))

model_onnx.reshape({input_layer: PartialShape([8, 3, 224, 224])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU',
    config={"PERFORMANCE_HINT": "LATENCY", })
            #"PERFORMANCE_HINT_NUM_REQUESTS": "64",
            #"INFERENCE_NUM_THREADS": "64"})
print(compiled_model_onnx.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS"))
# compiled_model_onnx = ie.compile_model(
#     model=model_onnx,
#     device_name='CPU',
#     config={"PERFORMANCE_HINT": "LATENCY"})
# print(compiled_model_onnx.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS"))
#request = compiled_model_onnx.create_infer_request()

batch = 8
infer_queue = AsyncInferQueue(compiled_model_onnx, batch)
image_fake = np.random.randn(batch, 3, 224, 224)

for _ in range(100):
    #request.infer({input_layer.any_name: image_fake})
    infer_queue.start_async(inputs={input_layer.any_name: image_fake})

infer_queue.wait_all()

import time
start_time = time.time()
for _ in range(1000):
    #request.infer({input_layer.any_name: image_fake})
    infer_queue.start_async(inputs={input_layer.any_name: image_fake})
infer_queue.wait_all()

elapse_time = time.time() - start_time
print(elapse_time)
# print(
#     'resnet50:',
#     timeit.timeit(
#         'request.infer({input_layer.any_name: image_fake})',
#         number=1000,
#         globals=globals()))

1
23.2129807472229
