Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions include/triton/core/tritonbackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -1643,6 +1643,15 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute(
TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
const uint32_t request_count);

/// Check if a model instance is ready. This function is optional, a backend is
/// not required to implement it. This function is called to check if the
/// model instance is ready to handle requests.
///
/// \param instance The model instance.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceReady(
TRITONBACKEND_ModelInstance* instance);

/// Query the backend for different model attributes. This function is optional,
/// a backend is not required to implement it. The backend is also not required
/// to set all backend attribute listed. This function is called when
Expand Down
8 changes: 8 additions & 0 deletions src/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ TritonBackend::ClearHandles()
inst_init_fn_ = nullptr;
inst_fini_fn_ = nullptr;
inst_exec_fn_ = nullptr;
inst_ready_fn_ = nullptr;
}

Status
Expand All @@ -190,6 +191,7 @@ TritonBackend::LoadBackendLibrary(
TritonModelInstanceInitFn_t iifn;
TritonModelInstanceFiniFn_t iffn;
TritonModelInstanceExecFn_t iefn;
TritonModelInstanceReadyFn_t irfn;

{
std::unique_ptr<SharedLibrary> slib;
Expand Down Expand Up @@ -234,6 +236,11 @@ TritonBackend::LoadBackendLibrary(
RETURN_IF_ERROR(slib->GetEntrypoint(
dlhandle_, "TRITONBACKEND_ModelInstanceExecute", false /* optional */,
reinterpret_cast<void**>(&iefn)));

// Model instance ready function, optional
RETURN_IF_ERROR(slib->GetEntrypoint(
dlhandle_, "TRITONBACKEND_ModelInstanceReady", true /* optional */,
reinterpret_cast<void**>(&irfn)));
}

backend_init_fn_ = bifn;
Expand All @@ -244,6 +251,7 @@ TritonBackend::LoadBackendLibrary(
inst_init_fn_ = iifn;
inst_fini_fn_ = iffn;
inst_exec_fn_ = iefn;
inst_ready_fn_ = irfn;

return Status::Success;
}
Expand Down
7 changes: 7 additions & 0 deletions src/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class TritonBackend {
typedef TRITONSERVER_Error* (*TritonModelInstanceExecFn_t)(
TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
const uint32_t request_cnt);
typedef TRITONSERVER_Error* (*TritonModelInstanceReadyFn_t)(
TRITONBACKEND_ModelInstance* instance);

static Status Create(
const std::string& name, const std::string& dir,
Expand Down Expand Up @@ -112,6 +114,10 @@ class TritonBackend {
{
return inst_exec_fn_;
}
TritonModelInstanceReadyFn_t ModelInstanceReadyFn() const
{
return inst_ready_fn_;
}

private:
typedef TRITONSERVER_Error* (*TritonBackendInitFn_t)(
Expand Down Expand Up @@ -159,6 +165,7 @@ class TritonBackend {
TritonModelInstanceInitFn_t inst_init_fn_;
TritonModelInstanceFiniFn_t inst_fini_fn_;
TritonModelInstanceExecFn_t inst_exec_fn_;
TritonModelInstanceReadyFn_t inst_ready_fn_;

// Opaque state associated with the backend.
void* state_;
Expand Down
14 changes: 14 additions & 0 deletions src/backend_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,20 @@ TritonModel::GetExecutionPolicy(const inference::ModelConfig& model_config)
return Status::Success;
}

Status
TritonModel::IsReady() const
{
for (const auto& instance : instances_) {
RETURN_IF_ERROR(instance->IsReady());
}

for (const auto& instance : passive_instances_) {
RETURN_IF_ERROR(instance->IsReady());
}

return Status::Success;
}

std::vector<std::string>
TritonModel::GetBackendLibrarySearchPaths(
const std::string& model_path, int64_t version,
Expand Down
3 changes: 3 additions & 0 deletions src/backend_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ class TritonModel : public Model {
TritonModelBatchFiniFn_t ModelBatchFiniFn() const { return batch_fini_fn_; }
TRITONBACKEND_Batcher** Batcher() { return &batcher_; }

// Check if the model is ready.
Status IsReady() const override;

private:
DISALLOW_COPY_AND_ASSIGN(TritonModel);

Expand Down
23 changes: 23 additions & 0 deletions src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,29 @@ TritonModelInstance::Schedule(
return Status::Success;
}

Status
TritonModelInstance::IsReady()
{
TritonBackend::TritonModelInstanceReadyFn_t inst_ready_fn =
model_->Backend()->ModelInstanceReadyFn();

// Implementing inst_ready_fn is optional
if (inst_ready_fn != nullptr) {
TRITONBACKEND_ModelInstance* triton_model_instance =
reinterpret_cast<TRITONBACKEND_ModelInstance*>(this);
TRITONSERVER_Error* err = inst_ready_fn(triton_model_instance);
if (err != nullptr) {
Status status = Status(
TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)),
TRITONSERVER_ErrorMessage(err));
TRITONSERVER_ErrorDelete(err);
return status;
}
}

return Status::Success;
}

Status
TritonModelInstance::Initialize()
{
Expand Down
1 change: 1 addition & 0 deletions src/backend_model_instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class TritonModelInstance {
Status Initialize();
Status WarmUp();
Status Schedule(std::vector<std::unique_ptr<InferenceRequest>>&& requests);
Status IsReady();

TritonModel* Model() const { return model_; }
void* State() { return state_; }
Expand Down
3 changes: 3 additions & 0 deletions src/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ class Model {
return reporter_;
}

// Check if the model is ready.
virtual Status IsReady() const { return Status::Success; }

protected:
virtual std::map<TRITONSERVER_MemoryType, std::map<int64_t, size_t>>
AccumulatedInstanceMemoryUsage() const
Expand Down
9 changes: 9 additions & 0 deletions src/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,15 @@ InferenceServer::ModelIsReady(
->ModelState(model_name, model->Version(), &state)
.IsOk()) {
*ready = (state == ModelReadyState::READY);
if (*ready) {
Status status = model->IsReady();
if (!status.IsOk()) {
*ready = false;
LOG_VERBOSE(1) << "Model '" << model_name << "' version "
<< model->Version()
<< " is not ready: " << status.Message();
}
}
}
}

Expand Down
Loading