Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ultralytics 8.1.25 OpenVINO LATENCY and THROUGHPUT modes #8058

Merged
merged 36 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
318a343
Enable OpenVINO models throughput mode
glenn-jocher Feb 6, 2024
92e13e4
Auto-format by https://ultralytics.com/actions
UltralyticsAssistant Feb 6, 2024
1275028
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 7, 2024
80130bc
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 9, 2024
3710e79
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 12, 2024
82e7d59
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 12, 2024
53a94bb
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 13, 2024
5eb2778
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 17, 2024
f76ff04
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 20, 2024
8a8ec7d
Add throughput mode code
glenn-jocher Feb 20, 2024
5f52ed3
Add throughput mode code
glenn-jocher Feb 20, 2024
5477165
Add compile_model config arg
glenn-jocher Feb 20, 2024
7240c1c
Update dependency from 2023.0 to 2023.3
glenn-jocher Feb 20, 2024
ecc7d42
Update dependency from 2023.0 to 2023.3
glenn-jocher Feb 20, 2024
6b67d1c
Debug
glenn-jocher Feb 20, 2024
8bbd760
Simplify batch dim handling
glenn-jocher Feb 20, 2024
0ca45a3
Cleanup
glenn-jocher Feb 20, 2024
46b7263
Cleanup
glenn-jocher Feb 20, 2024
73e09f4
Cleanup
glenn-jocher Feb 20, 2024
48cd8f2
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 20, 2024
e9bae3d
Merge branch 'main' into ov-throughput-mode
glenn-jocher Feb 21, 2024
ae898e1
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 5, 2024
11c1b3c
Update autobackend.py
glenn-jocher Mar 5, 2024
5919ac5
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 5, 2024
5f26239
Remove mo import
glenn-jocher Mar 5, 2024
d867e38
Fix ov imports
glenn-jocher Mar 5, 2024
d0c095e
Update inference mode logic
glenn-jocher Mar 5, 2024
f9f8b1e
Add userdata input
glenn-jocher Mar 5, 2024
21778ad
Update ultralytics/nn/autobackend.py
glenn-jocher Mar 5, 2024
1be6b7c
Cleanup autobackend comments
glenn-jocher Mar 5, 2024
f19995e
Correct THROUGHPUT mode sort order
glenn-jocher Mar 5, 2024
0ac74bd
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 5, 2024
8260d01
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 5, 2024
aeb610e
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 5, 2024
9c3f16c
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 6, 2024
f1b4685
Merge branch 'main' into ov-throughput-mode
glenn-jocher Mar 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions ultralytics/engine/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,8 @@ def export_onnx(self, prefix=colorstr("ONNX:")):
@try_export
def export_openvino(self, prefix=colorstr("OpenVINO:")):
"""YOLOv8 OpenVINO export."""
check_requirements("openvino>=2023.3") # requires openvino: https://pypi.org/project/openvino-dev/
import openvino as ov # noqa
check_requirements("openvino>=2023.3") # requires openvino: https://pypi.org/project/openvino/
import openvino as ov

LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
assert TORCH_1_13, f"OpenVINO export requires torch>=1.13.0 but torch=={torch.__version__} is installed"
Expand All @@ -433,7 +433,7 @@ def serialize(ov_model, file):
if self.model.task != "classify":
ov_model.set_rt_info("fit_to_window_letterbox", ["model_info", "resize_type"])

ov.save_model(ov_model, file, compress_to_fp16=self.args.half)
ov.runtime.save_model(ov_model, file, compress_to_fp16=self.args.half)
yaml_save(Path(file).parent / "metadata.yaml", self.metadata) # add metadata.yaml

if self.args.int8:
Expand Down
34 changes: 29 additions & 5 deletions ultralytics/nn/autobackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@
metadata = session.get_modelmeta().custom_metadata_map # metadata
elif xml: # OpenVINO
LOGGER.info(f"Loading {w} for OpenVINO inference...")
check_requirements("openvino>=2023.3") # requires openvino: https://pypi.org/project/openvino-dev/
check_requirements("openvino>=2023.3")
import openvino as ov # noqa

core = ov.Core()
Expand All @@ -193,7 +193,14 @@
batch_dim = ov.get_batch(ov_model)
if batch_dim.is_static:
batch_size = batch_dim.get_length()
ov_compiled_model = core.compile_model(ov_model, device_name="AUTO") # AUTO selects best available device

inference_mode = "LATENCY" # either 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT'
ov_compiled_model = core.compile_model(
ov_model,
device_name="AUTO", # AUTO selects best available device, do not modify
config={"PERFORMANCE_HINT": inference_mode},
)
input_name = ov_compiled_model.input().get_any_name()
metadata = w.parent / "metadata.yaml"
elif engine: # TensorRT
LOGGER.info(f"Loading {w} for TensorRT inference...")
Expand Down Expand Up @@ -327,8 +334,7 @@

raise TypeError(
f"model='{w}' is not a supported model format. "
"See https://docs.ultralytics.com/modes/predict for help."
f"\n\n{export_formats()}"
f"See https://docs.ultralytics.com/modes/predict for help.\n\n{export_formats()}"
)

# Load external metadata YAML
Expand Down Expand Up @@ -393,7 +399,25 @@
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32
y = list(self.ov_compiled_model(im).values())

if self.inference_mode == "CUMULATIVE_THROUGHPUT": # optimized for larger batch-sizes
glenn-jocher marked this conversation as resolved.
Show resolved Hide resolved
results = [] # this list will be filled by the callback function

Check warning on line 404 in ultralytics/nn/autobackend.py

View check run for this annotation

Codecov / codecov/patch

ultralytics/nn/autobackend.py#L404

Added line #L404 was not covered by tests

def callback(request, userdata):

Check warning on line 406 in ultralytics/nn/autobackend.py

View check run for this annotation

Codecov / codecov/patch

ultralytics/nn/autobackend.py#L406

Added line #L406 was not covered by tests
"""Callback function to handle the completion of an async inference request."""
results.append(request.results) # directly append the inference result to 'results'

Check warning on line 408 in ultralytics/nn/autobackend.py

View check run for this annotation

Codecov / codecov/patch

ultralytics/nn/autobackend.py#L408

Added line #L408 was not covered by tests
glenn-jocher marked this conversation as resolved.
Show resolved Hide resolved

# Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
async_queue = self.ov.runtime.AsyncInferQueue(self.ov_compiled_model)
async_queue.set_callback(callback)
for i, image in enumerate(im):
async_queue.start_async(inputs={self.input_name: image[None]}, userdata=i) # expand image to BCHW
async_queue.wait_all() # wait for all inference requests to complete
y = [list(r.values()) for r in results][0]

Check warning on line 416 in ultralytics/nn/autobackend.py

View check run for this annotation

Codecov / codecov/patch

ultralytics/nn/autobackend.py#L411-L416

Added lines #L411 - L416 were not covered by tests

else: # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1
y = list(self.ov_compiled_model(im).values())

elif self.engine: # TensorRT
if self.dynamic and im.shape != self.bindings["images"].shape:
i = self.model.get_binding_index("images")
Expand Down