In [None]:
from openvino.runtime import Core, PartialShape
import numpy as np
import timeit

image_fake = np.random.randn(8, 3, 224, 224)

ie = Core()
onnx_model_path = './onnx/resnet50-v1-7.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
input_layer = next(iter(model_onnx.inputs))
print(input_layer.any_name)
#model_onnx.reshape({input_layer: PartialShape([8, 224, 224, 3])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU',
    #config={"PERFORMANCE_HINT": "THROUGHPUT"})
    config={"PERFORMANCE_HINT": "LATENCY"})
request = compiled_model_onnx.create_infer_request()

for _ in range(100):
    request.infer({input_layer.any_name: image_fake})

print(
    'resnet50:',
    timeit.timeit(
        'request.infer({input_layer.any_name: image_fake})',
        number=1000,
        globals=globals()))


In [None]:
# dynamic shape
from openvino.runtime import Core, PartialShape
import numpy as np
import timeit

ie = Core()
onnx_model_path = './onnx/resnet50.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
model_onnx.reshape([-1, 224, 224, 3])
input_layer = next(iter(model_onnx.inputs))
#print(input_layer.get_partial_shape())
#model_onnx.reshape({input_layer: PartialShape([8, 224, 224, 3])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU',
    #config={"PERFORMANCE_HINT": "THROUGHPUT"})
    config={"PERFORMANCE_HINT": "LATENCY"})
request = compiled_model_onnx.create_infer_request()

image_fake = np.random.randn(4, 224, 224, 3)

for _ in range(100):
    request.infer([image_fake])

print(
    'resnet50:',
    timeit.timeit(
        'request.infer({input_layer.any_name: image_fake})',
        number=1000,
        globals=globals()))

In [None]:
from openvino.runtime import Core#, PartialShape
#import numpy as np
#import timeit

#image_fake = np.random.randn(8, 224, 224, 3)

ie = Core()
onnx_model_path = '/workspace/case-text-classification/inference/model.onnx'
model_onnx = ie.read_model(model=onnx_model_path)

model_onnx.reshape({'input_1': [-1, (1, 32)], 
                    'input_2': [1, 32]})
# from openvino.offline_transformations import serialize

# serialize(
#     model=model_onnx,
#     model_path="/workspace/case-text-classification/inference/onnx_model.xml",
#     weights_path="/workspace/case-text-classification/inference/onnx_model.bin"
# )

#input_layer = next(iter(model_onnx.inputs))
#print(input_layer.shape)
#model_onnx.reshape({input_layer: PartialShape([8, 224, 224, 3])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU',
    #config={"PERFORMANCE_HINT": "THROUGHPUT"})
    config={"PERFORMANCE_HINT": "LATENCY"})

In [None]:
from openvino.runtime import Core, PartialShape

ie = Core()
model_path = '/workspace/case-text-classification/inference/model.xml'
model_onnx = ie.read_model(model=model_path)

#input_layer = next(iter(model_onnx.inputs))
#print(input_layer.shape)
#model_onnx.reshape({input_layer: PartialShape([8, 224, 224, 3])})
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU')
    #config={"PERFORMANCE_HINT": "THROUGHPUT"})
    #config={"PERFORMANCE_HINT": "LATENCY"})

In [None]:
# TensorFlow Saved Model
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
import tensorflow as tf

saved_model_path = 'saved_model'
tf_model = tf.saved_model.load(saved_model_path)

# ONNXRuntime
import onnxruntime as ort

ort_session = ort.InferenceSession(
    './onnx/resnet50.onnx', providers=['CPUExecutionProvider'])

# OpenVINO
from openvino.runtime import Core, AsyncInferQueue

ie = Core()
onnx_model_path = './onnx/resnet50.onnx'
model_onnx = ie.read_model(model=onnx_model_path)
input_layer = next(iter(model_onnx.inputs))
compiled_model_onnx = ie.compile_model(
    model=model_onnx,
    device_name='CPU',
    config={"PERFORMANCE_HINT": "THROUGHPUT"})
request = compiled_model_onnx.create_infer_request()
# INT8
ir_model_path = 'pot/results/resnet50_DefaultQuantization/2022-04-20_17-33-03/optimized/resnet50.xml'
model_ir = ie.read_model(model=ir_model_path)


# input
import numpy as np

dummy_input = np.random.randn(1, 224, 224, 3).astype(np.float32)

tf_output = tf_model(tf.convert_to_tensor(dummy_input))
ort_output = ort_session.run(None,
                             {ort_session.get_inputs()[0].name: dummy_input})
request.infer({input_layer.any_name: dummy_input})
ov_output = request.get_output_tensor(0).data

np.testing.assert_allclose(tf_output, ort_output[0], rtol=1e-03, atol=1e-05)
np.testing.assert_allclose(tf_output, ov_output, rtol=1e-03, atol=1e-05)

In [None]:
import time
   
warm_up_iters = 100
inference_iters = 1000

batch_size = [1, 2, 4, 8, 16]
for bs in batch_size:
    dummy_input = np.random.randn(bs, 224, 224, 3).astype(np.float32)

    # tensorflow saved_model
    tf_input = tf.convert_to_tensor(dummy_input)
    # warm up
    for _ in range(warm_up_iters):
        tf_model(tf_input)
    # inference test
    start_time = time.time()
    for _ in range(inference_iters):
        tf_model(tf_input)
    tf_time = time.time() - start_time
    print(f'tensorflow: bs {bs}, {tf_time} s')

    # onnxruntime 
    ort_inputs = {ort_session.get_inputs()[0].name: dummy_input}
    # warm up
    for _ in range(warm_up_iters):
        ort_session.run(None, ort_inputs)
    # inference test
    start_time = time.time()
    for _ in range(inference_iters):
        ort_session.run(None, ort_inputs)
    ort_time = time.time() - start_time
    print(f'onnxruntime: bs {bs}, {ort_time} s')

    # openvino
    model_onnx.reshape([bs, 224, 224, 3])
    compiled_model_onnx = ie.compile_model(
        model=model_onnx,
        device_name='CPU',
        config={"PERFORMANCE_HINT": "THROUGHPUT"})
    infer_queue = AsyncInferQueue(compiled_model_onnx, 16)
    # warm up
    for _ in range(warm_up_iters):
        infer_queue.start_async(inputs={input_layer.any_name: dummy_input})
    infer_queue.wait_all()
    # inference test
    start_time = time.time()
    for _ in range(inference_iters):
        infer_queue.start_async(inputs={input_layer.any_name: dummy_input})
    infer_queue.wait_all()
    ov_time = time.time() - start_time
    print(f'openvino-fp32: bs {bs}, {ov_time} s')

    # openvino INT8
    model_ir.reshape([bs, 224, 224, 3])
    compiled_model_ir = ie.compile_model(
        model=model_ir,
        device_name='CPU',
        config={"PERFORMANCE_HINT": "THROUGHPUT"})
    infer_queue = AsyncInferQueue(compiled_model_ir, 16)
    # warm up
    for _ in range(warm_up_iters):
        infer_queue.start_async(inputs={input_layer.any_name: dummy_input})
    infer_queue.wait_all()
    # inference test
    start_time = time.time()
    for _ in range(inference_iters):
        infer_queue.start_async(inputs={input_layer.any_name: dummy_input})
    infer_queue.wait_all()
    ov_time = time.time() - start_time
    print(f'openvino-int8: bs {bs}, {ov_time} s')

In [None]:
batch_size = [1, 2, 4, 8, 16]

@tf.function
def warp_tf_model(model, inputs):
    return model(inputs)

for bs in batch_size:
    # tensorflow saved_model
    saved_model_path = 'saved_model'
    keras_model = tf.keras.models.load_model(saved_model_path)
    tf_input = tf.convert_to_tensor(dummy_input)
    # warm up
    for _ in range(warm_up_iters):
        #keras_model(tf_input)
        warp_tf_model(keras_model, tf_input)
    # inference test
    start_time = time.time()
    for _ in range(inference_iters):
        #keras_model(tf_input)
        warp_tf_model(keras_model, tf_input)
    tf_time = time.time() - start_time
    print(f'tensorflow keras: bs {bs}, {tf_time} s')

In [None]:
# int8
# OpenVINO
from openvino.runtime import Core, AsyncInferQueue

ie = Core()
ir_model_path = 'pot/results/resnet50_DefaultQuantization/2022-04-20_17-33-03/optimized/resnet50.xml'
model_ir = ie.read_model(model=ir_model_path)

import time
import numpy as np
   
warm_up_iters = 100
inference_iters = 1000

batch_size = [1, 2, 4, 8, 16]
for bs in batch_size:
    dummy_input = np.random.randn(bs, 224, 224, 3).astype(np.float32)

    # openvino
    model_ir.reshape([bs, 224, 224, 3])
    input_layer = next(iter(model_ir.inputs))
    compiled_model_ir = ie.compile_model(
        model=model_ir,
        device_name='CPU',
        config={"PERFORMANCE_HINT": "THROUGHPUT"})
        
    infer_queue = AsyncInferQueue(compiled_model_ir, 16)
    # warm up
    for _ in range(warm_up_iters):
        infer_queue.start_async(inputs={input_layer.any_name: dummy_input})
    infer_queue.wait_all()
    # inference test
    start_time = time.time()
    for _ in range(inference_iters):
        infer_queue.start_async(inputs={input_layer.any_name: dummy_input})
    infer_queue.wait_all()
    ov_time = time.time() - start_time
    print(f'openvino: bs {bs}, {ov_time} s')