ultralytics · glenn-jocher · Apr 10, 2024 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
@@ -654,6 +654,7 @@
     def export_engine(self, prefix=colorstr("TensorRT:")):
         """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
         assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
+        self.args.simplify = True
         f_onnx, _ = self.export_onnx()  # run before trt import https://github.com/ultralytics/ultralytics/issues/7016
 
         try:
@@ -662,12 +663,10 @@
             if LINUX:
                 check_requirements("nvidia-tensorrt", cmds="-U --index-url https://pypi.ngc.nvidia.com")
             import tensorrt as trt  # noqa
-
         check_version(trt.__version__, "7.0.0", hard=True)  # require tensorrt>=7.0.0
 
-        self.args.simplify = True
-
         LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...")
+        is_trt_10 = int(trt.__version__.split(".")[0]) >= 10  # is TensorRT >= 10
         assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}"
         f = self.file.with_suffix(".engine")  # TensorRT engine file
         logger = trt.Logger(trt.Logger.INFO)
@@ -676,7 +675,11 @@
 
         builder = trt.Builder(logger)
         config = builder.create_builder_config()
-        config.max_workspace_size = int(self.args.workspace * (1 << 30))
+        workspace = int(self.args.workspace * (1 << 30))
+        if is_trt_10:
+            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace)
+        else:  # TensorRT versions 7, 8
+            config.max_workspace_size = workspace
         flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
         network = builder.create_network(flag)
         parser = trt.OnnxParser(network, logger)
@@ -699,23 +702,24 @@
                 profile.set_shape(inp.name, (1, *shape[1:]), (max(1, shape[0] // 2), *shape[1:]), shape)
             config.add_optimization_profile(profile)
 
-        LOGGER.info(
-            f"{prefix} building FP{16 if builder.platform_has_fast_fp16 and self.args.half else 32} engine as {f}"
-        )
-        if builder.platform_has_fast_fp16 and self.args.half:
+        half = builder.platform_has_fast_fp16 and self.args.half
+        LOGGER.info(f"{prefix} building FP{16 if half else 32} engine as {f}")
+        if half:
             config.set_flag(trt.BuilderFlag.FP16)
 
+        # Free CUDA memory
         del self.model
         torch.cuda.empty_cache()
 
         # Write file
-        with builder.build_engine(network, config) as engine, open(f, "wb") as t:
+        build = builder.build_serialized_network if is_trt_10 else builder.build_engine
+        with build(network, config) as engine, open(f, "wb") as t:
             # Metadata
             meta = json.dumps(self.metadata)
             t.write(len(meta).to_bytes(4, byteorder="little", signed=True))
             t.write(meta.encode())
             # Model
-            t.write(engine.serialize())
+            t.write(engine if is_trt_10 else engine.serialize())
 
         return f, None
 

diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
@@ -234,23 +234,40 @@
                 meta_len = int.from_bytes(f.read(4), byteorder="little")  # read metadata length
                 metadata = json.loads(f.read(meta_len).decode("utf-8"))  # read metadata
                 model = runtime.deserialize_cuda_engine(f.read())  # read engine
-            context = model.create_execution_context()
+            try:
+                context = model.create_execution_context()
+            except AttributeError:  # model is None
+                # TensorRT <10 and >=10 incompatible
+                LOGGER.error(
+                    f"\nExport to .engine  using the same version of TensorRT installed; currently using {trt.__version__}\n"
+                )
+                raise err
             bindings = OrderedDict()
             output_names = []
             fp16 = False  # default updated below
             dynamic = False
-            for i in range(model.num_bindings):
-                name = model.get_binding_name(i)
-                dtype = trt.nptype(model.get_binding_dtype(i))
-                if model.binding_is_input(i):
-                    if -1 in tuple(model.get_binding_shape(i)):  # dynamic
+            is_legacy = hasattr(model, "num_bindings")  # TensorRT <10
+            num = range(model.num_bindings) if is_legacy else range(model.num_io_tensors)
+            for i in num:
+                name = model.get_binding_name(i) if is_legacy else model.get_tensor_name(i)
+                dtype = trt.nptype(model.get_binding_dtype(i) if is_legacy else model.get_tensor_dtype(name))
+                is_input = (
+                    model.binding_is_input(i) if is_legacy else model.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input:
+                    if -1 in tuple(
+                        model.get_binding_shape(i) if is_legacy else model.get_tensor_shape(name)
+                    ):  # dynamic
                         dynamic = True
-                        context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
+                        profile_shape = (
+                            model.get_profile_shape(0, i) if is_legacy else model.get_tensor_profile_shape(name, i)
+                        )
+                        context.set_binding_shape(i, tuple(profile_shape[2]))
                     if dtype == np.float16:
                         fp16 = True
                 else:  # output
                     output_names.append(name)
-                shape = tuple(context.get_binding_shape(i))
+                shape = tuple(context.get_binding_shape(i) if is_legacy else context.get_tensor_shape(name))
                 im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
                 bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
             binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())