From 80ba0304daf22a9aa93e4324400d0bdadc76a5d0 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 28 Nov 2025 20:08:22 +0530 Subject: [PATCH 1/6] Update --- include/triton/core/tritonbackend.h | 9 +++++++++ src/backend_manager.cc | 8 ++++++++ src/backend_manager.h | 7 +++++++ src/backend_model.cc | 12 ++++++++++++ src/backend_model.h | 3 +++ src/backend_model_instance.cc | 22 ++++++++++++++++++++++ src/backend_model_instance.h | 1 + src/model.h | 3 +++ src/server.cc | 8 ++++++++ 9 files changed, 73 insertions(+) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 12d30a0e9..6ec6e5dc6 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1643,6 +1643,15 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute( TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, const uint32_t request_count); +/// Check if a model instance is ready. This function is optional, a backend is +/// not required to implement it. This function is called to check if the +/// model instance is ready to handle requests. +/// +/// \param instance The model instance. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceReady( + TRITONBACKEND_ModelInstance* instance); + /// Query the backend for different model attributes. This function is optional, /// a backend is not required to implement it. The backend is also not required /// to set all backend attribute listed. This function is called when diff --git a/src/backend_manager.cc b/src/backend_manager.cc index 1d5b6591d..d60a6f167 100644 --- a/src/backend_manager.cc +++ b/src/backend_manager.cc @@ -176,6 +176,7 @@ TritonBackend::ClearHandles() inst_init_fn_ = nullptr; inst_fini_fn_ = nullptr; inst_exec_fn_ = nullptr; + inst_ready_fn_ = nullptr; } Status @@ -190,6 +191,7 @@ TritonBackend::LoadBackendLibrary( TritonModelInstanceInitFn_t iifn; TritonModelInstanceFiniFn_t iffn; TritonModelInstanceExecFn_t iefn; + TritonModelInstanceReadyFn_t irfn; { std::unique_ptr slib; @@ -234,6 +236,11 @@ TritonBackend::LoadBackendLibrary( RETURN_IF_ERROR(slib->GetEntrypoint( dlhandle_, "TRITONBACKEND_ModelInstanceExecute", false /* optional */, reinterpret_cast(&iefn))); + + // Model instance ready function, optional + RETURN_IF_ERROR(slib->GetEntrypoint( + dlhandle_, "TRITONBACKEND_ModelInstanceReady", true /* optional */, + reinterpret_cast(&irfn))); } backend_init_fn_ = bifn; @@ -244,6 +251,7 @@ TritonBackend::LoadBackendLibrary( inst_init_fn_ = iifn; inst_fini_fn_ = iffn; inst_exec_fn_ = iefn; + inst_ready_fn_ = irfn; return Status::Success; } diff --git a/src/backend_manager.h b/src/backend_manager.h index 209253a80..62f68cbf5 100644 --- a/src/backend_manager.h +++ b/src/backend_manager.h @@ -67,6 +67,8 @@ class TritonBackend { typedef TRITONSERVER_Error* (*TritonModelInstanceExecFn_t)( TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, const uint32_t request_cnt); + typedef TRITONSERVER_Error* (*TritonModelInstanceReadyFn_t)( + TRITONBACKEND_ModelInstance* instance); static Status Create( const std::string& name, const std::string& dir, @@ -112,6 +114,10 @@ class TritonBackend { { return inst_exec_fn_; } + TritonModelInstanceReadyFn_t ModelInstanceReadyFn() const + { + return inst_ready_fn_; + } private: typedef TRITONSERVER_Error* (*TritonBackendInitFn_t)( @@ -159,6 +165,7 @@ class TritonBackend { TritonModelInstanceInitFn_t inst_init_fn_; TritonModelInstanceFiniFn_t inst_fini_fn_; TritonModelInstanceExecFn_t inst_exec_fn_; + TritonModelInstanceReadyFn_t inst_ready_fn_; // Opaque state associated with the backend. void* state_; diff --git a/src/backend_model.cc b/src/backend_model.cc index 93cccb7e7..fb19f8227 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -291,6 +291,18 @@ TritonModel::GetExecutionPolicy(const inference::ModelConfig& model_config) return Status::Success; } +Status +TritonModel::IsReady() const +{ + for (const auto& instances : {&instances_, &passive_instances_}) { + for (const auto& instance : (*instances)) { + RETURN_IF_ERROR(instance->IsReady()); + } + } + + return Status::Success; +} + std::vector TritonModel::GetBackendLibrarySearchPaths( const std::string& model_path, int64_t version, diff --git a/src/backend_model.h b/src/backend_model.h index 2c761dd55..bff5b9a7f 100644 --- a/src/backend_model.h +++ b/src/backend_model.h @@ -112,6 +112,9 @@ class TritonModel : public Model { TritonModelBatchFiniFn_t ModelBatchFiniFn() const { return batch_fini_fn_; } TRITONBACKEND_Batcher** Batcher() { return &batcher_; } + // Check if the model is ready. + Status IsReady() const override; + private: DISALLOW_COPY_AND_ASSIGN(TritonModel); diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc index 60c89c753..dca693aa6 100644 --- a/src/backend_model_instance.cc +++ b/src/backend_model_instance.cc @@ -592,6 +592,28 @@ TritonModelInstance::Schedule( return Status::Success; } +Status +TritonModelInstance::IsReady() +{ + TritonBackend::TritonModelInstanceReadyFn_t inst_ready_fn = + model_->Backend()->ModelInstanceReadyFn(); + + if (inst_ready_fn != nullptr) { + TRITONBACKEND_ModelInstance* triton_model_instance = + reinterpret_cast(this); + TRITONSERVER_Error* err = inst_ready_fn(triton_model_instance); + if (err != nullptr) { + Status status = Status( + TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)), + TRITONSERVER_ErrorMessage(err)); + TRITONSERVER_ErrorDelete(err); + return status; + } + } + + return Status::Success; +} + Status TritonModelInstance::Initialize() { diff --git a/src/backend_model_instance.h b/src/backend_model_instance.h index 037b3deb2..a4d1c50fc 100644 --- a/src/backend_model_instance.h +++ b/src/backend_model_instance.h @@ -223,6 +223,7 @@ class TritonModelInstance { Status PrepareRequestsOrRespond( std::vector>& requests); void Execute(std::vector& triton_requests); + Status IsReady(); std::shared_ptr triton_backend_thread_; diff --git a/src/model.h b/src/model.h index 2acf63b11..67c5850bb 100644 --- a/src/model.h +++ b/src/model.h @@ -250,6 +250,9 @@ class Model { return reporter_; } + // Check if the model is ready. + virtual Status IsReady() const { return Status::Success; } + protected: virtual std::map> AccumulatedInstanceMemoryUsage() const diff --git a/src/server.cc b/src/server.cc index fecc5d4a8..a516931b0 100644 --- a/src/server.cc +++ b/src/server.cc @@ -472,6 +472,14 @@ InferenceServer::ModelIsReady( ->ModelState(model_name, model->Version(), &state) .IsOk()) { *ready = (state == ModelReadyState::READY); + if (*ready) { + Status status = model->IsReady(); + if (!status.IsOk()) { + *ready = false; + LOG_VERBOSE(1) << "Model '" << model_name << "' version " + << model_version << " is not ready: " << status.Message(); + } + } } } From 451d195470d282fe684947e0138ca3cc84d0cb17 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Sun, 30 Nov 2025 21:52:19 +0530 Subject: [PATCH 2/6] Update --- src/backend_model_instance.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend_model_instance.h b/src/backend_model_instance.h index a4d1c50fc..1a031917e 100644 --- a/src/backend_model_instance.h +++ b/src/backend_model_instance.h @@ -117,6 +117,7 @@ class TritonModelInstance { Status Initialize(); Status WarmUp(); Status Schedule(std::vector>&& requests); + Status IsReady(); TritonModel* Model() const { return model_; } void* State() { return state_; } @@ -223,7 +224,6 @@ class TritonModelInstance { Status PrepareRequestsOrRespond( std::vector>& requests); void Execute(std::vector& triton_requests); - Status IsReady(); std::shared_ptr triton_backend_thread_; From cfa456bfb49a636e0298305bfe45161a1857f7c5 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 15:47:02 +0530 Subject: [PATCH 3/6] Fix pre-commit errors --- src/server.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/server.cc b/src/server.cc index a516931b0..64024b9dd 100644 --- a/src/server.cc +++ b/src/server.cc @@ -477,7 +477,8 @@ InferenceServer::ModelIsReady( if (!status.IsOk()) { *ready = false; LOG_VERBOSE(1) << "Model '" << model_name << "' version " - << model_version << " is not ready: " << status.Message(); + << model_version + << " is not ready: " << status.Message(); } } } From cfe3f54ebe2cc5b629d102cab0b64054fafbcddd Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 5 Dec 2025 20:50:12 +0530 Subject: [PATCH 4/6] Update --- src/backend_model.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/backend_model.cc b/src/backend_model.cc index fb19f8227..cefce6b59 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -294,10 +294,12 @@ TritonModel::GetExecutionPolicy(const inference::ModelConfig& model_config) Status TritonModel::IsReady() const { - for (const auto& instances : {&instances_, &passive_instances_}) { - for (const auto& instance : (*instances)) { - RETURN_IF_ERROR(instance->IsReady()); - } + for (const auto& instance : instances_) { + RETURN_IF_ERROR(instance->IsReady()); + } + + for (const auto& instance : passive_instances_) { + RETURN_IF_ERROR(instance->IsReady()); } return Status::Success; From 0d10363a1e2a1430fdba82de76addd89595c3bca Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 5 Dec 2025 20:56:42 +0530 Subject: [PATCH 5/6] Update --- src/server.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.cc b/src/server.cc index 64024b9dd..b1cc4eafe 100644 --- a/src/server.cc +++ b/src/server.cc @@ -477,7 +477,7 @@ InferenceServer::ModelIsReady( if (!status.IsOk()) { *ready = false; LOG_VERBOSE(1) << "Model '" << model_name << "' version " - << model_version + << model->Version() << " is not ready: " << status.Message(); } } From 830daaacdd361bc6af56d5237a4a83a24a4c8dc1 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 8 Dec 2025 18:42:54 +0530 Subject: [PATCH 6/6] Update --- src/backend_model_instance.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc index dca693aa6..1f7d409ef 100644 --- a/src/backend_model_instance.cc +++ b/src/backend_model_instance.cc @@ -598,6 +598,7 @@ TritonModelInstance::IsReady() TritonBackend::TritonModelInstanceReadyFn_t inst_ready_fn = model_->Backend()->ModelInstanceReadyFn(); + // Implementing inst_ready_fn is optional if (inst_ready_fn != nullptr) { TRITONBACKEND_ModelInstance* triton_model_instance = reinterpret_cast(this);