ultralytics · glenn-jocher · Mar 6, 2024 · Feb 6, 2024 · Feb 6, 2024 · Feb 7, 2024
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
@@ -399,7 +399,7 @@ def export_onnx(self, prefix=colorstr("ONNX:")):
     @try_export
     def export_openvino(self, prefix=colorstr("OpenVINO:")):
         """YOLOv8 OpenVINO export."""
-        check_requirements("openvino-dev>=2023.0")  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+        check_requirements("openvino-dev>=2023.3")  # requires openvino-dev: https://pypi.org/project/openvino-dev/
         import openvino.runtime as ov  # noqa
         from openvino.tools import mo  # noqa
 

diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
@@ -180,7 +180,7 @@
             metadata = session.get_modelmeta().custom_metadata_map  # metadata
         elif xml:  # OpenVINO
             LOGGER.info(f"Loading {w} for OpenVINO inference...")
-            check_requirements("openvino>=2023.0")  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            check_requirements("openvino>=2023.3")  # requires openvino-dev: https://pypi.org/project/openvino-dev/
             from openvino.runtime import Core, Layout, get_batch  # noqa
 
             core = Core()
@@ -193,7 +193,12 @@
             batch_dim = get_batch(ov_model)
             if batch_dim.is_static:
                 batch_size = batch_dim.get_length()
-            ov_compiled_model = core.compile_model(ov_model, device_name="AUTO")  # AUTO selects best available device
+            ov_compiled_model = core.compile_model(
+                ov_model,
+                device_name="AUTO",  # AUTO selects best available device
+                config={"PERFORMANCE_HINT": "THROUGHPUT"},
+            )
+            input_name = ov_compiled_model.input().get_any_name()
             metadata = w.parent / "metadata.yaml"
         elif engine:  # TensorRT
             LOGGER.info(f"Loading {w} for TensorRT inference...")
@@ -393,7 +398,30 @@
             y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
         elif self.xml:  # OpenVINO
             im = im.cpu().numpy()  # FP32
-            y = list(self.ov_compiled_model(im).values())
+
+            inference_mode = "throughput"  # either 'latency' or 'throughput'
+            # Latency optimized inference at batch-size 1
+            if inference_mode == "latency":
+                y = list(self.ov_compiled_model(im).values())
+
+            # Throughput optimized inference using OpenVINO AsyncInferQueue
+            elif inference_mode == "throughput":
+                from openvino.runtime import AsyncInferQueue
+
+                results = []  # this list will be filled by the callback function
+
+                def callback(request, userdata):
+                    """Callback function to handle the completion of an async inference request."""
+                    results.append(request.results)  # directly append the inference result to 'results'
+
+                # Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
+                async_queue = AsyncInferQueue(self.ov_compiled_model, 8)  # adjust the queue size as needed
+                async_queue.set_callback(callback)
+                for image in im:
+                    async_queue.start_async(inputs={self.input_name: image[None]})  # expand batch dim
+                async_queue.wait_all()  # wait for all inference requests to complete
+                y = [list(r.values()) for r in results][0]
+
         elif self.engine:  # TensorRT
             if self.dynamic and im.shape != self.bindings["images"].shape:
                 i = self.model.get_binding_index("images")