Skip to content

Commit

Permalink
Fix perf analyzer CAPI request lifecycle (#124)
Browse files Browse the repository at this point in the history
* Fix perf analyzer CAPI request lifecycle

* Update copyrights
  • Loading branch information
Tabrizian committed Jul 5, 2022
1 parent f6e7ca3 commit fe60703
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 22 deletions.
26 changes: 5 additions & 21 deletions src/c++/perf_analyzer/client_backend/triton_c_api/triton_loader.cc
@@ -1,4 +1,4 @@
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -42,7 +42,7 @@
namespace triton { namespace perfanalyzer { namespace clientbackend {
namespace tritoncapi {
namespace {
bool enforce_memory_type = false;

TRITONSERVER_MemoryType requested_memory_type;
bool helper_verbose = false;
/// Helper function for allocating memory
Expand Down Expand Up @@ -70,20 +70,8 @@ ResponseAlloc(
}
} else {
void* allocated_ptr = nullptr;
if (enforce_memory_type) {
*actual_memory_type = requested_memory_type;
}

switch (*actual_memory_type) {
// Use CPU memory if the requested memory type is unknown
// (default case).
case TRITONSERVER_MEMORY_CPU:
default: {
*actual_memory_type = TRITONSERVER_MEMORY_CPU;
allocated_ptr = malloc(byte_size);
break;
}
}
*actual_memory_type = TRITONSERVER_MEMORY_CPU;
allocated_ptr = malloc(byte_size);

// Pass the tensor name with buffer_userp so we can show it when
// releasing the buffer.
Expand Down Expand Up @@ -138,8 +126,7 @@ void
InferRequestComplete(
TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
{
// request is deleted at the end of the Infer call so don't need to do
// anything here
TritonLoader::DeleteInferRequest(request);
}


Expand Down Expand Up @@ -955,9 +942,6 @@ TritonLoader::Infer(
RETURN_IF_TRITONSERVER_ERROR(
GetSingleton()->inference_response_delete_fn_(completed_response),
"deleting inference response");
RETURN_IF_TRITONSERVER_ERROR(
GetSingleton()->request_delete_fn_(irequest),
"deleting inference request");
RETURN_IF_TRITONSERVER_ERROR(
GetSingleton()->response_allocator_delete_fn_(allocator),
"deleting response allocator");
Expand Down
@@ -1,4 +1,4 @@
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -119,6 +119,11 @@ class TritonLoader : public tc::InferenceServerClient {

static bool ModelIsLoaded() { return GetSingleton()->model_is_loaded_; }
static bool ServerIsReady() { return GetSingleton()->server_is_ready_; }
static TRITONSERVER_Error* DeleteInferRequest(
TRITONSERVER_InferenceRequest* irequest)
{
return GetSingleton()->request_delete_fn_(irequest);
}

// TRITONSERVER_ApiVersion
typedef TRITONSERVER_Error* (*TritonServerApiVersionFn_t)(
Expand Down

0 comments on commit fe60703

Please sign in to comment.