From d2465903773a96a1f5c4437dfe834c1982339a8f Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Tue, 26 Sep 2023 12:46:41 -0400 Subject: [PATCH] Fix response iterator memory leak (#302) --- CMakeLists.txt | 30 ++++++++++++++++++------------ src/pb_response_iterator.cc | 4 +--- src/pb_response_iterator.h | 2 +- src/pb_stub.cc | 7 ++++++- src/shm_manager.cc | 2 ++ src/shm_manager.h | 32 ++++++++++++++++++++++++++++++++ 6 files changed, 60 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93a7ae60..eae73836 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,20 +95,18 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(dlpack) +set(BOOST_ENABLE_CMAKE ON) +set(BOOST_INCLUDE_LIBRARIES stacktrace) # # Boost # -ExternalProject_Add( - boostorg - URL https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz - URL_HASH SHA256=273f1be93238a068aba4f9735a4a2b003019af067b9c183ed227780b8f36062c - PREFIX "boost-src" - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E copy_directory - /boost/ ${CMAKE_BINARY_DIR}/boost - INSTALL_COMMAND "" - BUILD_COMMAND "" +FetchContent_Declare( + Boost + GIT_REPOSITORY https://github.com/boostorg/boost.git + GIT_TAG boost-1.81.0 + GIT_SHALLOW ON ) -set(boostorg_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/boost/") +FetchContent_MakeAvailable(Boost) # # CUDA @@ -125,6 +123,10 @@ if(${TRITON_ENABLE_NVTX}) add_definitions(-DTRITON_ENABLE_NVTX=1) endif() # TRITON_ENABLE_NVTX +add_definitions(-DBOOST_STACKTRACE_USE_ADDR2LINE=1) +add_definitions(-DBOOST_STACKTRACE_USE_BACKTRACE=1) + + find_package(ZLIB REQUIRED) find_package(Threads REQUIRED) @@ -220,8 +222,6 @@ add_executable( ${PYTHON_BACKEND_STUB_SRCS} ) -add_dependencies(triton-python-backend boostorg) -add_dependencies(triton-python-backend-stub boostorg) set_property(TARGET triton-python-backend-stub PROPERTY OUTPUT_NAME triton_python_backend_stub) @@ -255,6 +255,9 @@ target_link_libraries( triton-core-serverstub # from repo-core ZLIB::ZLIB -larchive + -ldl + Boost::stacktrace_backtrace + Boost::stacktrace_addr2line ) target_link_libraries( @@ -267,6 +270,9 @@ target_link_libraries( pybind11::embed -lrt # shared memory -larchive # libarchive + -ldl + Boost::stacktrace_backtrace + Boost::stacktrace_addr2line ) set_target_properties( diff --git a/src/pb_response_iterator.cc b/src/pb_response_iterator.cc index 9561df68..1e0d631a 100644 --- a/src/pb_response_iterator.cc +++ b/src/pb_response_iterator.cc @@ -100,7 +100,7 @@ ResponseIterator::Next() } } -py::iterator +void ResponseIterator::Iter() { if (is_finished_) { @@ -111,8 +111,6 @@ ResponseIterator::Iter() idx_ = 0; } } - - return py::cast(*this); } void diff --git a/src/pb_response_iterator.h b/src/pb_response_iterator.h index 1122a216..cad5ff1f 100644 --- a/src/pb_response_iterator.h +++ b/src/pb_response_iterator.h @@ -38,7 +38,7 @@ class ResponseIterator { ~ResponseIterator(); std::shared_ptr Next(); - py::iterator Iter(); + void Iter(); void EnqueueResponse(std::shared_ptr infer_response); void* Id(); void Clear(); diff --git a/src/pb_stub.cc b/src/pb_stub.cc index d096f420..37c9a5b5 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1544,7 +1544,12 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::class_>( module, "ResponseIterator") .def(py::init&>()) - .def("__iter__", &ResponseIterator::Iter, py::keep_alive<0, 1>()) + .def( + "__iter__", + [](ResponseIterator& it) -> ResponseIterator& { + it.Iter(); + return it; + }) .def("__next__", &ResponseIterator::Next); py::class_ logger(module, "Logger"); diff --git a/src/shm_manager.cc b/src/shm_manager.cc index b52d5a4f..0a75e4b1 100644 --- a/src/shm_manager.cc +++ b/src/shm_manager.cc @@ -48,10 +48,12 @@ SharedMemoryManager::SharedMemoryManager( shm_obj_ = std::make_unique( bi::create_only, shm_region_name.c_str(), bi::read_write); shm_obj_->truncate(shm_size); + shm_debug_info_.open("shm_allocations_debug"); } else { // Open the existing region. shm_obj_ = std::make_unique( bi::open_only, shm_region_name.c_str(), bi::read_write); + shm_debug_info_.open("shm_allocations_debug_stub"); } current_capacity_ = shm_size; diff --git a/src/shm_manager.h b/src/shm_manager.h index bd462403..23892f43 100644 --- a/src/shm_manager.h +++ b/src/shm_manager.h @@ -37,8 +37,14 @@ #include #include #include +#define BOOST_STACKTRACE_USE_ADDR2LINE 1 + +#include +#include +#include #include "pb_exception.h" +using namespace std; namespace triton { namespace backend { namespace python { namespace bi = boost::interprocess; @@ -108,6 +114,13 @@ class SharedMemoryManager { handle = managed_buffer_->get_handle_from_address( reinterpret_cast(shm_ownership_data)); + std::string stack_trace = + boost::stacktrace::to_string(boost::stacktrace::stacktrace()); + std::replace(stack_trace.begin(), stack_trace.end(), '\n', '|'); + std::replace(stack_trace.begin(), stack_trace.end(), ',', ' '); + shm_debug_info_ << handle << ",ALLOC" + << "," << stack_trace << std::endl; + shm_debug_info_.flush(); } return WrapObjectInUniquePtr(obj, shm_ownership_data, handle); @@ -143,12 +156,30 @@ class SharedMemoryManager { bi::scoped_lock guard{*shm_mutex_}; GrowIfNeeded(0); void* ptr = managed_buffer_->get_address_from_handle(handle); + + std::string stack_trace = + boost::stacktrace::to_string(boost::stacktrace::stacktrace()); + std::replace(stack_trace.begin(), stack_trace.end(), '\n', '|'); + std::replace(stack_trace.begin(), stack_trace.end(), ',', ' '); + shm_debug_info_ << handle << ",DEALLOC" + << "," << stack_trace << std::endl; + shm_debug_info_.flush(); + managed_buffer_->deallocate(ptr); } void DeallocateUnsafe(bi::managed_external_buffer::handle_t handle) { void* ptr = managed_buffer_->get_address_from_handle(handle); + + std::string stack_trace = + boost::stacktrace::to_string(boost::stacktrace::stacktrace()); + std::replace(stack_trace.begin(), stack_trace.end(), '\n', '|'); + std::replace(stack_trace.begin(), stack_trace.end(), ',', ' '); + shm_debug_info_ << handle << ",DEALLOC" + << "," << stack_trace << std::endl; + shm_debug_info_.flush(); + managed_buffer_->deallocate(ptr); } @@ -171,6 +202,7 @@ class SharedMemoryManager { uint64_t* total_size_; bool create_; bool delete_region_; + std::ofstream shm_debug_info_; template AllocatedSharedMemory WrapObjectInUniquePtr(