From ab1a8fe35a11529ad93871692e835f56a5ca2513 Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Fri, 6 May 2022 12:06:22 -0700 Subject: [PATCH 1/3] fix autocomplete to use cpu instance when no gpu found --- src/onnxruntime.cc | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index ea86624..720faec 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -682,10 +682,40 @@ ModelState::AutoCompleteConfig() OrtAllocator* default_allocator; std::string model_path; { + TRITONSERVER_InstanceGroupKind kind; + + triton::common::TritonJson::Value instance_group; + ModelConfig().Find("instance_group", &instance_group); + + // Earlier in the model lifecycle, device checks for the instance group + // have already occurred. If at least one instance group with + // "kind" = "KIND_GPU" then allow model to use GPU else autocomplete to CPU + bool found_gpu_instance = false; + for (size_t i = 0; i < instance_group.ArraySize(); ++i) { + triton::common::TritonJson::Value instance_obj; + instance_group.IndexAsObject(i, &instance_obj); + + triton::common::TritonJson::Value instance_group_kind; + instance_obj.Find("kind", &instance_group_kind); + std::string kind_str; + RETURN_IF_ERROR(instance_group_kind.AsString(&kind_str)); + + if (kind_str == "KIND_GPU" || kind_str == "KIND_AUTO") { + found_gpu_instance = true; + break; + } + } + + if (found_gpu_instance) { + kind = TRITONSERVER_INSTANCEGROUPKIND_GPU; + } else { + kind = TRITONSERVER_INSTANCEGROUPKIND_CPU; + } + OrtSession* sptr = nullptr; RETURN_IF_ERROR(LoadModel( - artifact_name, TRITONSERVER_INSTANCEGROUPKIND_AUTO, 0, &model_path, - &sptr, &default_allocator, nullptr)); + artifact_name, kind, 0, &model_path, &sptr, &default_allocator, + nullptr)); session.reset(sptr); } OnnxTensorInfoMap input_tensor_infos; From 69bb8c8e6deac37b44d859e35ee6b13c0567770d Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Fri, 6 May 2022 13:54:50 -0700 Subject: [PATCH 2/3] default to kind cpu in autocomplete; took out KIND_AUTO check in autocomplete --- src/onnxruntime.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index 720faec..fc19d5c 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -682,14 +682,16 @@ ModelState::AutoCompleteConfig() OrtAllocator* default_allocator; std::string model_path; { - TRITONSERVER_InstanceGroupKind kind; + TRITONSERVER_InstanceGroupKind kind = TRITONSERVER_INSTANCEGROUPKIND_CPU; +#ifdef TRITON_ENABLE_GPU triton::common::TritonJson::Value instance_group; ModelConfig().Find("instance_group", &instance_group); // Earlier in the model lifecycle, device checks for the instance group // have already occurred. If at least one instance group with - // "kind" = "KIND_GPU" then allow model to use GPU else autocomplete to CPU + // "kind" = "KIND_GPU" then allow model to use GPU else autocomplete to + // "KIND_CPU" bool found_gpu_instance = false; for (size_t i = 0; i < instance_group.ArraySize(); ++i) { triton::common::TritonJson::Value instance_obj; @@ -700,7 +702,7 @@ ModelState::AutoCompleteConfig() std::string kind_str; RETURN_IF_ERROR(instance_group_kind.AsString(&kind_str)); - if (kind_str == "KIND_GPU" || kind_str == "KIND_AUTO") { + if (kind_str == "KIND_GPU") { found_gpu_instance = true; break; } @@ -708,10 +710,8 @@ ModelState::AutoCompleteConfig() if (found_gpu_instance) { kind = TRITONSERVER_INSTANCEGROUPKIND_GPU; - } else { - kind = TRITONSERVER_INSTANCEGROUPKIND_CPU; } - +#endif // TRITON_ENABLE_GPU OrtSession* sptr = nullptr; RETURN_IF_ERROR(LoadModel( artifact_name, kind, 0, &model_path, &sptr, &default_allocator, From 0e83e5732110a34f4b12409ece9ea89868e706e9 Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Fri, 6 May 2022 14:05:32 -0700 Subject: [PATCH 3/3] simplified logic when setting the instance group kind in autocomplete --- src/onnxruntime.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index fc19d5c..397d439 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -692,7 +692,6 @@ ModelState::AutoCompleteConfig() // have already occurred. If at least one instance group with // "kind" = "KIND_GPU" then allow model to use GPU else autocomplete to // "KIND_CPU" - bool found_gpu_instance = false; for (size_t i = 0; i < instance_group.ArraySize(); ++i) { triton::common::TritonJson::Value instance_obj; instance_group.IndexAsObject(i, &instance_obj); @@ -703,15 +702,12 @@ ModelState::AutoCompleteConfig() RETURN_IF_ERROR(instance_group_kind.AsString(&kind_str)); if (kind_str == "KIND_GPU") { - found_gpu_instance = true; + kind = TRITONSERVER_INSTANCEGROUPKIND_GPU; break; } } - - if (found_gpu_instance) { - kind = TRITONSERVER_INSTANCEGROUPKIND_GPU; - } #endif // TRITON_ENABLE_GPU + OrtSession* sptr = nullptr; RETURN_IF_ERROR(LoadModel( artifact_name, kind, 0, &model_path, &sptr, &default_allocator,