Skip to content

Commit

Permalink
Fix lifetime of infer payload
Browse files Browse the repository at this point in the history
  • Loading branch information
Tabrizian committed May 14, 2023
1 parent 3623215 commit f0cf5d1
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/request_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ void
InferResponseComplete(
TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
{
auto p = reinterpret_cast<InferPayload*>(userp);
auto infer_payload =
*(reinterpret_cast<std::shared_ptr<InferPayload>*>(userp));
std::unique_ptr<InferResponse> infer_response;
std::vector<std::shared_ptr<PbTensor>> output_tensors;
std::shared_ptr<PbError> pb_error;
Expand Down Expand Up @@ -146,7 +147,7 @@ InferResponseComplete(
output_tensors.clear();
}

if (!p->IsDecoupled()) {
if (!infer_payload->IsDecoupled()) {
infer_response = std::make_unique<InferResponse>(
output_tensors, pb_error, true /* is_last_response */);
} else {
Expand All @@ -167,7 +168,8 @@ InferResponseComplete(
TRITONSERVER_InferenceResponseDelete(response),
"Failed to release BLS inference response.");
} else if (
p->IsDecoupled() && (flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0) {
infer_payload->IsDecoupled() &&
(flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0) {
// An empty response may be the last reponse for decoupled models.
infer_response = std::make_unique<InferResponse>(
output_tensors, pb_error, true /* is_last_response */, userp /* id */);
Expand All @@ -177,7 +179,7 @@ InferResponseComplete(
output_tensors, pb_error, true /* is_last_response */, userp /* id */);
}

p->SetValue(std::move(infer_response));
infer_payload->SetValue(std::move(infer_response));
}

TRITONSERVER_Error*
Expand Down Expand Up @@ -333,8 +335,8 @@ RequestExecutor::Infer(
std::string("Model ") + model_name +
" is using the decoupled. The current BLS request call doesn't "
"support models using the decoupled transaction policy. Please use "
"stream API 'stream_exec()' or 'async_stream_exec() for decoupled "
"models.'");
"'decoupled=True' argument to the 'exec' or 'async_exec' calls for "
"decoupled models.'");
}

// Inference
Expand Down Expand Up @@ -379,11 +381,13 @@ RequestExecutor::Infer(
ResponseAllocatorUserp response_allocator_userp(
shm_pool_.get(), infer_request->GetPreferredMemory());
infer_payload->SetResponseAllocUserp(response_allocator_userp);
std::shared_ptr<InferPayload>* infer_payload_p =
new std::shared_ptr<InferPayload>(infer_payload);

THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetResponseCallback(
irequest, response_allocator_,
reinterpret_cast<void*>(infer_payload->ResponseAllocUserp().get()),
InferResponseComplete, reinterpret_cast<void*>(infer_payload.get())));
InferResponseComplete, reinterpret_cast<void*>(infer_payload_p)));

THROW_IF_TRITON_ERROR(TRITONSERVER_ServerInferAsync(
server_, irequest, nullptr /* trace */));
Expand Down

0 comments on commit f0cf5d1

Please sign in to comment.