From a5b6b7ee82387903f8a0ddad457a548df5c9f12e Mon Sep 17 00:00:00 2001
From: fpetrini15 <francescogpetrini@gmail.com>
Date: Thu, 11 Apr 2024 10:17:33 -0700
Subject: [PATCH] Review comments. Passing ShmFile pointer instead of void

---
 docs/protocol/extension_shared_memory.md  |   4 +-
 qa/L0_shared_memory/shared_memory_test.py |  31 +++-
 qa/common/util.py                         |   6 +-
 src/shared_memory_manager.cc              | 183 +++++++++++-----------
 src/shared_memory_manager.h               |  68 +++++---
 5 files changed, 179 insertions(+), 113 deletions(-)

diff --git a/docs/protocol/extension_shared_memory.md b/docs/protocol/extension_shared_memory.md
index 5ad49eae9f..f10b3ee7f6 100644
--- a/docs/protocol/extension_shared_memory.md
+++ b/docs/protocol/extension_shared_memory.md
@@ -45,10 +45,10 @@ memory. These parameters and their type are:
   registered shared memory region. Region names share a namespace for
   system-shared-memory regions and CUDA-shared-memory regions.
 
-- "shared_memory_offset" : unsigned integer value is the offset, in bytes, into
+- "shared_memory_offset" : size_t value is the offset, in bytes, into
   the region where the data for the tensor starts.
 
-- "shared_memory_byte_size" : unsigned integer value is the size, in bytes, of
+- "shared_memory_byte_size" : size_t value is the size, in bytes, of
   the data.
 
 The “shared_memory_offset” parameter is optional and defaults to
diff --git a/qa/L0_shared_memory/shared_memory_test.py b/qa/L0_shared_memory/shared_memory_test.py
index 77830fe14e..f78c55d1c2 100755
--- a/qa/L0_shared_memory/shared_memory_test.py
+++ b/qa/L0_shared_memory/shared_memory_test.py
@@ -243,7 +243,7 @@ def test_invalid_create_shm(self, protocol):
         )
         print(f"*\n*\n*\nStarting Test:test_invalid_create_shm.{protocol}\n*\n*\n*\n")
         try:
-            shm_op0_handle = shm.create_shared_memory_region(
+            shm.create_shared_memory_region(
                 "dummy_data", (self._shm_key_prefix + "dummy_data"), -1
             )
         except Exception as ex:
@@ -305,6 +305,35 @@ def test_valid_create_set_register(self, protocol):
         shm.destroy_shared_memory_region(shm_op0_handle)
         self._test_passed = True
 
+    @parameterized.expand([("grpc"), ("http")])
+    def test_different_name_same_key(self, protocol):
+        # Create a valid system shared memory region, fill data in it and register
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_different_name_same_key.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_different_name_same_key.{protocol}\n*\n*\n*\n"
+        )
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        shm.set_shared_memory_region(
+            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
+        )
+        self._triton_client.register_system_shared_memory(
+            "dummy", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        try:
+            self._triton_client.register_system_shared_memory(
+                "dummy2", (self._shm_key_prefix + "dummy_data"), 8
+            )
+        except Exception as ex:
+            self.assertIn("registering an active shared memory key", str(ex))
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
     @parameterized.expand([("grpc"), ("http")])
     def test_unregister_before_register(self, protocol):
         # Create a valid system shared memory region and unregister before register
diff --git a/qa/common/util.py b/qa/common/util.py
index 9e41e589d3..d04aad3e81 100644
--- a/qa/common/util.py
+++ b/qa/common/util.py
@@ -53,7 +53,11 @@ def kill_server(server_process):
             server_process.kill()
         else:
             server_process.send_signal(signal.SIGINT)
-        server_process.wait()
+        try:
+            server_process.wait(timeout=30)
+        except subprocess.TimeoutExpired:
+            server_process.kill()
+            raise Exception("Server did not shutdown properly")
 
 
 def stream_to_log(client_log):
diff --git a/src/shared_memory_manager.cc b/src/shared_memory_manager.cc
index 15a4aa627a..afa538578f 100644
--- a/src/shared_memory_manager.cc
+++ b/src/shared_memory_manager.cc
@@ -59,16 +59,77 @@ GetWindowsError()
 #endif
 
 TRITONSERVER_Error*
-OpenSharedMemoryRegion(const std::string& shm_key, void** shm_file)
+UnmapSharedMemory(void* mapped_addr, size_t byte_size)
+{
+#ifdef _WIN32
+  bool success = UnmapViewOfFile(mapped_addr);
+  if (!success) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to unmap shared memory region, error code: " +
+            GetWindowsError())
+            .c_str());
+  }
+#else
+  int status = munmap(mapped_addr, byte_size);
+  if (status == -1) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to munmap shared memory region, errno: " +
+            std::string(std::strerror(errno)))
+            .c_str());
+  }
+#endif
+  return nullptr;
+}
+
+#ifdef TRITON_ENABLE_GPU
+TRITONSERVER_Error*
+OpenCudaIPCRegion(
+    const cudaIpcMemHandle_t* cuda_shm_handle, void** data_ptr, int device_id)
+{
+#ifdef _WIN32
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNSUPPORTED,
+      std::string(
+          "GPU shared memory features are currently not supported on Windows")
+          .c_str());
+#else
+  // Set to device curres
+  cudaSetDevice(device_id);
+
+  // Open CUDA IPC handle and read data from it
+  cudaError_t err = cudaIpcOpenMemHandle(
+      data_ptr, *cuda_shm_handle, cudaIpcMemLazyEnablePeerAccess);
+  if (err != cudaSuccess) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, std::string(
+                                         "failed to open CUDA IPC handle: " +
+                                         std::string(cudaGetErrorString(err)))
+                                         .c_str());
+  }
+
+  return nullptr;
+#endif
+}
+
+#endif  // TRITON_ENABLE_GPU
+
+}  // namespace
+
+TRITONSERVER_Error*
+SharedMemoryManager::OpenSharedMemoryRegion(
+    const std::string& shm_key, ShmFile** shm_file)
 {
 #ifdef _WIN32
-  HANDLE* shm_handle = static_cast<HANDLE*>(shm_file);
-  *shm_handle = OpenFileMapping(
+  HANDLE shm_handle = OpenFileMapping(
       FILE_MAP_ALL_ACCESS,  // read/write access
       FALSE,                // cannot inherit handle
       shm_key.c_str());     // name of mapping object
 
-  if (*shm_handle == NULL) {
+  if (shm_handle == NULL) {
     LOG_VERBOSE(1) << "OpenFileMapping failed with error code: "
                    << GetWindowsError();
     return TRITONSERVER_ErrorNew(
@@ -76,27 +137,29 @@ OpenSharedMemoryRegion(const std::string& shm_key, void** shm_file)
         std::string("Unable to open shared memory region: '" + shm_key + "'")
             .c_str());
   }
+  // Dynamic memory will eventually be owned by uniqe_ptr
+  *shm_file = new ShmFile(shm_handle);
 #else
   // get shared memory region descriptor
-  int* shm_fd = *reinterpret_cast<int**>(shm_file);
-  *shm_fd = shm_open(shm_key.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
-  if (*shm_fd == -1) {
+  int shm_fd = shm_open(shm_key.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
+  if (shm_fd == -1) {
     LOG_VERBOSE(1) << "shm_open failed, errno: " << errno;
     return TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INTERNAL,
         std::string("Unable to open shared memory region: '" + shm_key + "'")
             .c_str());
   }
+  // Dynamic memory will eventually be owned by uniqe_ptr
+  *shm_file = new ShmFile(shm_fd);
 #endif
   return nullptr;
 }
 
 TRITONSERVER_Error*
-CloseSharedMemoryRegion(void* shm_file)
+SharedMemoryManager::CloseSharedMemoryRegion(ShmFile* shm_file)
 {
 #ifdef _WIN32
-  HANDLE shm_handle = static_cast<HANDLE>(shm_file);
-  bool success = CloseHandle(shm_handle);
+  bool success = CloseHandle(shm_file->shm_handle_);
   if (!success) {
     return TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INTERNAL,
@@ -106,8 +169,7 @@ CloseSharedMemoryRegion(void* shm_file)
             .c_str());
   }
 #else
-  int shm_fd = *static_cast<int*>(shm_file);
-  int status = close(shm_fd);
+  int status = close(shm_file->shm_fd_);
   if (status == -1) {
     return TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INTERNAL,
@@ -122,12 +184,11 @@ CloseSharedMemoryRegion(void* shm_file)
 }
 
 TRITONSERVER_Error*
-MapSharedMemory(
-    void* shm_file, const size_t offset, const size_t byte_size,
+SharedMemoryManager::MapSharedMemory(
+    ShmFile* shm_file, const size_t offset, const size_t byte_size,
     void** mapped_addr)
 {
 #ifdef _WIN32
-  HANDLE shm_handle = static_cast<HANDLE>(shm_file);
   // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes
   // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the
   // operating system. To handle both cases agnostically, we cast 'offset' to
@@ -138,10 +199,10 @@ MapSharedMemory(
   DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF;
   // map shared memory to process address space
   *mapped_addr = MapViewOfFile(
-      shm_handle,           // handle to map object
-      FILE_MAP_ALL_ACCESS,  // read/write permission
-      high_order_offset,    // offset (high-order DWORD)
-      low_order_offset,     // offset (low-order DWORD)
+      shm_file->shm_handle_,  // handle to map object
+      FILE_MAP_ALL_ACCESS,    // read/write permission
+      high_order_offset,      // offset (high-order DWORD)
+      low_order_offset,       // offset (low-order DWORD)
       byte_size);
 
   if (*mapped_addr == NULL) {
@@ -153,10 +214,10 @@ MapSharedMemory(
             .c_str());
   }
 #else
-  int shm_fd = *static_cast<int*>(shm_file);
   // map shared memory to process address space
-  *mapped_addr =
-      mmap(NULL, byte_size, PROT_WRITE | PROT_READ, MAP_SHARED, shm_fd, offset);
+  *mapped_addr = mmap(
+      NULL, byte_size, PROT_WRITE | PROT_READ, MAP_SHARED, shm_file->shm_fd_,
+      offset);
   if (*mapped_addr == MAP_FAILED) {
     return TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INTERNAL, std::string(
@@ -168,67 +229,6 @@ MapSharedMemory(
   return nullptr;
 }
 
-TRITONSERVER_Error*
-UnmapSharedMemory(void* mapped_addr, size_t byte_size)
-{
-#ifdef _WIN32
-  bool success = UnmapViewOfFile(mapped_addr);
-  if (!success) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        std::string(
-            "unable to unmap shared memory region, error code: " +
-            GetWindowsError())
-            .c_str());
-  }
-#else
-  int status = munmap(mapped_addr, byte_size);
-  if (status == -1) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        std::string(
-            "unable to munmap shared memory region, errno: " +
-            std::string(std::strerror(errno)))
-            .c_str());
-  }
-#endif
-  return nullptr;
-}
-
-#ifdef TRITON_ENABLE_GPU
-TRITONSERVER_Error*
-OpenCudaIPCRegion(
-    const cudaIpcMemHandle_t* cuda_shm_handle, void** data_ptr, int device_id)
-{
-#ifdef _WIN32
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED,
-      std::string(
-          "GPU shared memory features are currently not supported on Windows")
-          .c_str());
-#else
-  // Set to device curres
-  cudaSetDevice(device_id);
-
-  // Open CUDA IPC handle and read data from it
-  cudaError_t err = cudaIpcOpenMemHandle(
-      data_ptr, *cuda_shm_handle, cudaIpcMemLazyEnablePeerAccess);
-  if (err != cudaSuccess) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL, std::string(
-                                         "failed to open CUDA IPC handle: " +
-                                         std::string(cudaGetErrorString(err)))
-                                         .c_str());
-  }
-
-  return nullptr;
-#endif
-}
-
-#endif  // TRITON_ENABLE_GPU
-
-}  // namespace
-
 SharedMemoryManager::~SharedMemoryManager()
 {
   UnregisterAll(TRITONSERVER_MEMORY_CPU);
@@ -253,17 +253,14 @@ SharedMemoryManager::RegisterSystemSharedMemory(
 
   // register
   void* mapped_addr;
-  // Safe initializer for Unix case where shm_file must be dereferenced to
-  // base in order to store file descriptor.
-  int shm_safe_initializer = -1;
-  void* shm_file = &shm_safe_initializer;
+  ShmFile* shm_file = nullptr;
   bool shm_file_exists = false;
 
   // don't re-open if shared memory is already open
   for (auto itr = shared_memory_map_.begin(); itr != shared_memory_map_.end();
        ++itr) {
     if (itr->second->shm_key_ == shm_key) {
-      shm_file = itr->second->platform_handle_->GetShmFile();
+      shm_file = itr->second->platform_handle_.get();
       shm_file_exists = true;
       break;
     }
@@ -272,8 +269,16 @@ SharedMemoryManager::RegisterSystemSharedMemory(
   // open and set new shm_file if new shared memory key
   if (!shm_file_exists) {
     RETURN_IF_ERR(OpenSharedMemoryRegion(shm_key, &shm_file));
+  } else {
+    // FIXME: DLIS-6448 - We should allow users the flexibility to register
+    // the same key under different names with different attributes.
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "registering an active shared memory key, \"" + shm_key +
+            "\", under a different name is not currently supported")
+            .c_str());
   }
-
   // Mmap and then close the shared memory descriptor
   TRITONSERVER_Error* err_map =
       MapSharedMemory(shm_file, offset, byte_size, &mapped_addr);
diff --git a/src/shared_memory_manager.h b/src/shared_memory_manager.h
index f6c6570399..1cce4a81dd 100644
--- a/src/shared_memory_manager.h
+++ b/src/shared_memory_manager.h
@@ -131,56 +131,84 @@ class SharedMemoryManager {
 #endif  // TRITON_ENABLE_GPU
 
  private:
-  /// A helper function to remove the named shared memory blocks of
-  /// specified type
-  TRITONSERVER_Error* UnregisterHelper(
-      const std::string& name, TRITONSERVER_MemoryType memory_type);
-
+  /// A struct that records the platform-dependent handle to a shared memory
+  /// file.
   struct ShmFile {
 #ifdef _WIN32
-    HANDLE shm_file_;
-    ShmFile(void* shm_file) { shm_file_ = static_cast<HANDLE>(shm_file); };
-    HANDLE GetShmFile() { return shm_file_; };
+    HANDLE shm_handle_;
+    ShmFile(HANDLE shm_handle) : shm_file_(shm_handle){};
 #else
-    std::unique_ptr<int> shm_file_;
-    ShmFile(void* shm_file)
-    {
-      shm_file_ = std::make_unique<int>(*static_cast<int*>(shm_file));
-    };
-    int* GetShmFile() { return shm_file_.get(); };
+    int shm_fd_;
+    ShmFile(int fd) : shm_fd_(fd){};
 #endif  // _WIN32
   };
 
+  /// Open the shared memory object created by the client.
+  /// \param shm_key The name of the shared memory object
+  /// containing the block of memory.
+  /// \param shm_file The file handle/descriptor of the the
+  /// opened shared memory object.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* OpenSharedMemoryRegion(
+      const std::string& shm_key, ShmFile** shm_file);
+
+  /// Close the shared memory object.
+  /// \param shm_file The file handle/descriptor of the the
+  /// open shared memory object.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* CloseSharedMemoryRegion(ShmFile* shm_file);
+
+  /// Maps a view of the shared object into the address space of
+  /// a calling process.
+  /// \param shm_file The file handle/descriptor of the the
+  /// opened shared memory object.
+  /// \param offset The offset within the shared memory object to the
+  /// start of the block.
+  /// \param byte_size The size, in bytes of the block.
+  /// \param mapped_addr A pointer to the starting address of the mapped
+  /// view.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* MapSharedMemory(
+      ShmFile* shm_file, const size_t offset, const size_t byte_size,
+      void** mapped_addr);
+
+  /// A helper function to remove the named shared memory blocks of
+  /// specified type
+  TRITONSERVER_Error* UnregisterHelper(
+      const std::string& name, TRITONSERVER_MemoryType memory_type);
+
   /// A struct that records the shared memory regions registered by the shared
   /// memory manager.
   struct SharedMemoryInfo {
     SharedMemoryInfo(
         const std::string& name, const std::string& shm_key,
-        const size_t offset, const size_t byte_size, void* shm_file,
+        const size_t offset, const size_t byte_size, ShmFile* shm_file,
         void* mapped_addr, const TRITONSERVER_MemoryType kind,
         const int64_t device_id)
         : name_(name), shm_key_(shm_key), offset_(offset),
-          byte_size_(byte_size),
-          platform_handle_(std::make_unique<ShmFile>(shm_file)),
-          mapped_addr_(mapped_addr), kind_(kind), device_id_(device_id)
+          byte_size_(byte_size), mapped_addr_(mapped_addr), kind_(kind),
+          device_id_(device_id)
     {
+      if (shm_file != nullptr) {
+        platform_handle_.reset(shm_file);
+      }
     }
 
     std::string name_;
     std::string shm_key_;
     size_t offset_;
     size_t byte_size_;
-    std::unique_ptr<ShmFile> platform_handle_;
     void* mapped_addr_;
     TRITONSERVER_MemoryType kind_;
     int64_t device_id_;
+    std::unique_ptr<ShmFile> platform_handle_;
   };
 
 #ifdef TRITON_ENABLE_GPU
   struct CUDASharedMemoryInfo : SharedMemoryInfo {
     CUDASharedMemoryInfo(
         const std::string& name, const std::string& shm_key,
-        const size_t offset, const size_t byte_size, void* shm_file,
+        const size_t offset, const size_t byte_size, ShmFile* shm_file,
         void* mapped_addr, const TRITONSERVER_MemoryType kind,
         const int64_t device_id, const cudaIpcMemHandle_t* cuda_ipc_handle)
         : SharedMemoryInfo(