diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 12d30a0e9..6ec6e5dc6 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1643,6 +1643,15 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute( TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, const uint32_t request_count); +/// Check if a model instance is ready. This function is optional, a backend is +/// not required to implement it. This function is called to check if the +/// model instance is ready to handle requests. +/// +/// \param instance The model instance. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceReady( + TRITONBACKEND_ModelInstance* instance); + /// Query the backend for different model attributes. This function is optional, /// a backend is not required to implement it. The backend is also not required /// to set all backend attribute listed. This function is called when diff --git a/src/backend_manager.cc b/src/backend_manager.cc index 1d5b6591d..d60a6f167 100644 --- a/src/backend_manager.cc +++ b/src/backend_manager.cc @@ -176,6 +176,7 @@ TritonBackend::ClearHandles() inst_init_fn_ = nullptr; inst_fini_fn_ = nullptr; inst_exec_fn_ = nullptr; + inst_ready_fn_ = nullptr; } Status @@ -190,6 +191,7 @@ TritonBackend::LoadBackendLibrary( TritonModelInstanceInitFn_t iifn; TritonModelInstanceFiniFn_t iffn; TritonModelInstanceExecFn_t iefn; + TritonModelInstanceReadyFn_t irfn; { std::unique_ptr slib; @@ -234,6 +236,11 @@ TritonBackend::LoadBackendLibrary( RETURN_IF_ERROR(slib->GetEntrypoint( dlhandle_, "TRITONBACKEND_ModelInstanceExecute", false /* optional */, reinterpret_cast(&iefn))); + + // Model instance ready function, optional + RETURN_IF_ERROR(slib->GetEntrypoint( + dlhandle_, "TRITONBACKEND_ModelInstanceReady", true /* optional */, + reinterpret_cast(&irfn))); } backend_init_fn_ = bifn; @@ -244,6 +251,7 @@ TritonBackend::LoadBackendLibrary( inst_init_fn_ = iifn; inst_fini_fn_ = iffn; inst_exec_fn_ = iefn; + inst_ready_fn_ = irfn; return Status::Success; } diff --git a/src/backend_manager.h b/src/backend_manager.h index 209253a80..62f68cbf5 100644 --- a/src/backend_manager.h +++ b/src/backend_manager.h @@ -67,6 +67,8 @@ class TritonBackend { typedef TRITONSERVER_Error* (*TritonModelInstanceExecFn_t)( TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, const uint32_t request_cnt); + typedef TRITONSERVER_Error* (*TritonModelInstanceReadyFn_t)( + TRITONBACKEND_ModelInstance* instance); static Status Create( const std::string& name, const std::string& dir, @@ -112,6 +114,10 @@ class TritonBackend { { return inst_exec_fn_; } + TritonModelInstanceReadyFn_t ModelInstanceReadyFn() const + { + return inst_ready_fn_; + } private: typedef TRITONSERVER_Error* (*TritonBackendInitFn_t)( @@ -159,6 +165,7 @@ class TritonBackend { TritonModelInstanceInitFn_t inst_init_fn_; TritonModelInstanceFiniFn_t inst_fini_fn_; TritonModelInstanceExecFn_t inst_exec_fn_; + TritonModelInstanceReadyFn_t inst_ready_fn_; // Opaque state associated with the backend. void* state_; diff --git a/src/backend_model.cc b/src/backend_model.cc index 93cccb7e7..cefce6b59 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -291,6 +291,20 @@ TritonModel::GetExecutionPolicy(const inference::ModelConfig& model_config) return Status::Success; } +Status +TritonModel::IsReady() const +{ + for (const auto& instance : instances_) { + RETURN_IF_ERROR(instance->IsReady()); + } + + for (const auto& instance : passive_instances_) { + RETURN_IF_ERROR(instance->IsReady()); + } + + return Status::Success; +} + std::vector TritonModel::GetBackendLibrarySearchPaths( const std::string& model_path, int64_t version, diff --git a/src/backend_model.h b/src/backend_model.h index 2c761dd55..bff5b9a7f 100644 --- a/src/backend_model.h +++ b/src/backend_model.h @@ -112,6 +112,9 @@ class TritonModel : public Model { TritonModelBatchFiniFn_t ModelBatchFiniFn() const { return batch_fini_fn_; } TRITONBACKEND_Batcher** Batcher() { return &batcher_; } + // Check if the model is ready. + Status IsReady() const override; + private: DISALLOW_COPY_AND_ASSIGN(TritonModel); diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc index 60c89c753..1f7d409ef 100644 --- a/src/backend_model_instance.cc +++ b/src/backend_model_instance.cc @@ -592,6 +592,29 @@ TritonModelInstance::Schedule( return Status::Success; } +Status +TritonModelInstance::IsReady() +{ + TritonBackend::TritonModelInstanceReadyFn_t inst_ready_fn = + model_->Backend()->ModelInstanceReadyFn(); + + // Implementing inst_ready_fn is optional + if (inst_ready_fn != nullptr) { + TRITONBACKEND_ModelInstance* triton_model_instance = + reinterpret_cast(this); + TRITONSERVER_Error* err = inst_ready_fn(triton_model_instance); + if (err != nullptr) { + Status status = Status( + TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)), + TRITONSERVER_ErrorMessage(err)); + TRITONSERVER_ErrorDelete(err); + return status; + } + } + + return Status::Success; +} + Status TritonModelInstance::Initialize() { diff --git a/src/backend_model_instance.h b/src/backend_model_instance.h index 037b3deb2..1a031917e 100644 --- a/src/backend_model_instance.h +++ b/src/backend_model_instance.h @@ -117,6 +117,7 @@ class TritonModelInstance { Status Initialize(); Status WarmUp(); Status Schedule(std::vector>&& requests); + Status IsReady(); TritonModel* Model() const { return model_; } void* State() { return state_; } diff --git a/src/model.h b/src/model.h index 2acf63b11..67c5850bb 100644 --- a/src/model.h +++ b/src/model.h @@ -250,6 +250,9 @@ class Model { return reporter_; } + // Check if the model is ready. + virtual Status IsReady() const { return Status::Success; } + protected: virtual std::map> AccumulatedInstanceMemoryUsage() const diff --git a/src/server.cc b/src/server.cc index fecc5d4a8..b1cc4eafe 100644 --- a/src/server.cc +++ b/src/server.cc @@ -472,6 +472,15 @@ InferenceServer::ModelIsReady( ->ModelState(model_name, model->Version(), &state) .IsOk()) { *ready = (state == ModelReadyState::READY); + if (*ready) { + Status status = model->IsReady(); + if (!status.IsOk()) { + *ready = false; + LOG_VERBOSE(1) << "Model '" << model_name << "' version " + << model->Version() + << " is not ready: " << status.Message(); + } + } } }