Shared Memory Client API + Image client example (#440)

* Add image shared memory client example * fix shm image client * update cmake * create shard memory object function for IO in request - need to add logic and link to backends * review edits * merge shared memory image client * update python client api * formatting * fix client api * fix documentation for Shared Memory * set use_shm correctly * refactor image client code, fix grpc/http result check * fix client API * Update API for registering / unregistering SHM regions * review edits * additional edits * remove python changes from this PR * Add Shared Memory Register/Unregister functionality C++ * Add register / unregister support to server * Support for fetching active shm regions in TRTIS * review edits * add UnregisterAll and GetSharedMemoryStatus * Add SetSharedMemoryInputData to set shared memory buffer from registered shm region * set SharedMemoryInput for HTTP server * use GetRawCursor for output data * remove GetStatus for Shared Memory * post rebase cleanup * use shared memory context for register /unregister * additional cleanup * add shared memory to headers * fix image client to run only for input (buggy) * find url for HTTP, check for failure during registration * Correctly create shared memory block manager * fix C++ client, include shared memory input counts, fix naming convention for image_client, log errno for shared memory open / mmap failure * fix http regex for shared memory * revert image client to use shared memory for both input and output * review comments * additional review edits
triton-inference-server · Aug 7, 2019 · 6d33c8c · 6d33c8c
1 parent faa8f93
commit 6d33c8c
Show file tree

Hide file tree

Showing 13 changed files with 598 additions and 39 deletions.
diff --git a/src/clients/c++/CMakeLists.txt b/src/clients/c++/CMakeLists.txt
@@ -109,7 +109,6 @@ install(
   RUNTIME DESTINATION bin
 )
 
-
 #
 # perf_client
 #
@@ -130,7 +129,7 @@ set(
   load_manager.h
 )
 
-add_executable(perf_client 
+add_executable(perf_client
   ${PERF_CLIENT_SRCS} ${PERF_CLIENT_HDRS})
 target_link_libraries(
   perf_client

diff --git a/src/clients/c++/request.cc b/src/clients/c++/request.cc
@@ -36,6 +36,7 @@ ProfileContext::~ProfileContext() {}
 ServerHealthContext::~ServerHealthContext() {}
 ServerStatusContext::~ServerStatusContext() {}
 ModelControlContext::~ModelControlContext() {}
+SharedMemoryControlContext::~SharedMemoryControlContext() {}
 InferContext::Input::~Input() {}
 InferContext::Output::~Output() {}
 InferContext::Result::~Result() {}

diff --git a/src/clients/c++/request.h b/src/clients/c++/request.h
@@ -278,8 +278,8 @@ class InferContext {
     virtual const DimsList& Dims() const = 0;
 
     /// Prepare this input to receive new tensor values. Forget any
-    /// existing values that were set by previous calls to
-    /// SetRaw().
+    /// existing values that were set by previous calls to SetSharedMemory()
+    /// or SetRaw().
     /// \return Error object indicating success or failure.
     virtual Error Reset() = 0;
 
@@ -329,6 +329,23 @@ class InferContext {
     /// \param input The vector holding tensor string values.
     /// \return Error object indicating success or failure.
     virtual Error SetFromString(const std::vector<std::string>& input) = 0;
+
+    /// Set tensor values for this input by reference into a shared memory
+    /// region. The values are not copied and so the shared memory region and
+    /// its contents must not be modified or destroyed until this input is no
+    /// longer needed (that is until the Run() call(s) that use the input have
+    /// completed. This function must be called a single time for an input that
+    /// is using shared memory. For batched inputs, the tensor values for the
+    /// entire batch must be contiguous in a single shared memory region.
+    /// \param name The user-given name for the registered shared memory
+    /// region where the tensor values for this input is stored.
+    /// \param offset The offset into the shared memory region upto the start
+    /// of the input tensor values.
+    /// \param byte_size The size, in bytes of the input tensor data. Must
+    /// match the size expected by the input.
+    /// \return Error object indicating success or failure.
+    virtual Error SetSharedMemory(
+        const std::string& name, size_t offset, size_t byte_size) = 0;
   };
 
   //==============
@@ -347,6 +364,27 @@ class InferContext {
     /// model configuration. Variable-size dimensions are reported as
     /// -1.
     virtual const DimsList& Dims() const = 0;
+
+    /// Indicate that the result values for this output should be placed in a
+    /// shared memory region instead of being returned in the inference
+    /// response. The shared memory region must not be modified or destroyed
+    //  until this output is ready (that is until after the Run() call(s) have
+    /// written the output completely). For batched outputs, all tensor values
+    /// are copied into a contiguous space in a single shared memory region.
+    /// \param name The user-given name for the registered shared memory region
+    /// where the tensor values for this output should be stored.
+    /// \param offset The offset into the shared memory region upto the start
+    /// of the output tensor values.
+    /// \param byte_size The size, in bytes of the output tensor data.
+    /// Must match the size expected by the output.
+    /// \return Error object indicating success or failure.
+    virtual Error SetSharedMemory(
+        const std::string& name, size_t offset, size_t byte_size) = 0;
+
+    /// Prepare this output to store new tensor values. Forget any
+    /// existing values that were set by previous calls to SetSharedMemory()
+    /// \return Error object indicating success or failure.
+    virtual Error Reset() = 0;
   };
 
   //==============
@@ -715,7 +753,7 @@ class ProfileContext {
 /// can be used repeatedly.
 ///
 /// A ModelControlContext object can use either HTTP protocol or GRPC protocol
-/// depending on the Create function (ControlHttpContext::Create or
+/// depending on the Create function (ModelControlHttpContext::Create or
 /// ModelControlGrpcContext::Create). For example:
 ///
 /// \code
@@ -745,6 +783,60 @@ class ModelControlContext {
   virtual Error Unload(const std::string& model_name) = 0;
 };
 
+//==============================================================================
+/// A SharedMemoryControlContext object is used to control the registration /
+/// unregistration and get the status of shared memory regions on the inference
+/// server. Once created, a SharedMemoryControlContext object can be used
+/// repeatedly.
+///
+/// A SharedMemoryControlContext object can use either HTTP protocol or GRPC
+/// protocol depending on the Create function
+/// (SharedMemoryControlHttpContext::Create or
+/// SharedMemoryControlGrpcContext::Create). For example:
+///
+/// \code
+///   std::unique_ptr<SharedMemoryControlContext> ctx;
+///   SharedMemoryControlGrpcContext::Create(&ctx, "localhost:8000");
+///   std::string name = "shared_memory";
+///   std::string shm_key = "/input";
+///   ctx->RegisterSharedMemory(name, shm_key, 0, 104);
+///   ...
+///   ctx->UnregisterSharedMemory(name);
+///   ...
+///   ctx->UnregisterAllSharedMemory();
+///   ...
+/// \endcode
+///
+class SharedMemoryControlContext {
+ public:
+  virtual ~SharedMemoryControlContext() = 0;
+
+  /// Register a shared memory region on the inference server. If the shared
+  /// memory region is already registered, it will return error
+  /// 'TRTSERVER_ERROR_ALEADY_EXISTS'.
+  /// \param name The user-given name for the shared memory region to be
+  /// registered.
+  /// \param shm_key The unique name of the location in shared memory being
+  /// registered.
+  /// \param offset The offset into the shared memory region.
+  /// \param byte_size The size, in bytes of the tensor data.
+  /// \return Error object indicating success or failure.
+  virtual Error RegisterSharedMemory(
+      const std::string& name, const std::string& shm_key, size_t offset,
+      size_t byte_size) = 0;
+
+  /// Unregister a registered shared memory region on the inference server. If
+  /// the shared memory region is not registered, do nothing and return success.
+  /// \param name The user-given name for the shared memory region to be
+  /// unregistered.
+  /// \return Error object indicating success or failure.
+  virtual Error UnregisterSharedMemory(const std::string& name) = 0;
+
+  /// Unregisters all registered shared memory regions on the inference server.
+  /// \return Error object indicating success or failure.
+  virtual Error UnregisterAllSharedMemory() = 0;
+};
+
 //==============================================================================
 
 std::ostream& operator<<(std::ostream&, const Error&);

diff --git a/src/clients/c++/request_common.cc b/src/clients/c++/request_common.cc
@@ -125,7 +125,7 @@ InferContext::Options::Create(std::unique_ptr<InferContext::Options>* options)
 
 InputImpl::InputImpl(const ModelInput& mio)
     : mio_(mio), total_byte_size_(0), needs_shape_(false), batch_size_(0),
-      bufs_idx_(0), buf_pos_(0)
+      bufs_idx_(0), buf_pos_(0), io_type_(NONE)
 {
   if (GetElementCount(mio) == -1) {
     byte_size_ = -1;
@@ -140,7 +140,9 @@ InputImpl::InputImpl(const InputImpl& obj)
       total_byte_size_(obj.total_byte_size_), needs_shape_(obj.needs_shape_),
       shape_(obj.shape_), batch_size_(obj.batch_size_), bufs_idx_(0),
       buf_pos_(0), bufs_(obj.bufs_), buf_byte_sizes_(obj.buf_byte_sizes_),
-      str_bufs_(obj.str_bufs_)
+      str_bufs_(obj.str_bufs_), io_type_(obj.io_type_),
+      shm_name_(obj.shm_name_), shm_offset_(obj.shm_offset_),
+      shm_byte_size_(obj.shm_byte_size_)
 {
 }
 
@@ -168,6 +170,13 @@ InputImpl::SetShape(const std::vector<int64_t>& dims)
 Error
 InputImpl::SetRaw(const uint8_t* input, size_t input_byte_size)
 {
+  // If SetSharedMemory was called on this input already, return an error
+  if (io_type_ == SHARED_MEMORY) {
+    return Error(
+        RequestStatusCode::INVALID_ARG,
+        "The input '" + Name() + "' has already been set with SetSharedMemory");
+  }
+
   if (needs_shape_) {
     bufs_.clear();
     buf_byte_sizes_.clear();
@@ -204,6 +213,7 @@ InputImpl::SetRaw(const uint8_t* input, size_t input_byte_size)
 
   bufs_.push_back(input);
   buf_byte_sizes_.push_back(input_byte_size);
+  io_type_ = RAW;
 
   return Error::Success;
 }
@@ -214,6 +224,31 @@ InputImpl::SetRaw(const std::vector<uint8_t>& input)
   return SetRaw(&input[0], input.size());
 }
 
+Error
+InputImpl::SetSharedMemory(
+    const std::string& name, size_t offset, size_t byte_size)
+{
+  // If SetRaw was called on this input already, return an error
+  if (io_type_ == RAW) {
+    return Error(
+        RequestStatusCode::INVALID_ARG,
+        "The input '" + Name() + "' has already been set with SetRaw");
+  }
+
+  // If SetSharedMemory was called on this input already, return an error
+  if (io_type_ == SHARED_MEMORY) {
+    return Error(
+        RequestStatusCode::INVALID_ARG,
+        "The input '" + Name() + "' can only be set once with SetSharedMemory");
+  }
+
+  shm_name_ = name;
+  shm_offset_ = offset;
+  shm_byte_size_ = byte_size;
+  io_type_ = SHARED_MEMORY;
+  return Error::Success;
+}
+
 Error
 InputImpl::SetFromString(const std::vector<std::string>& input)
 {
@@ -316,7 +351,7 @@ InputImpl::Reset()
   bufs_idx_ = 0;
   buf_pos_ = 0;
   total_byte_size_ = 0;
-
+  io_type_ = NONE;
   return Error::Success;
 }
 
@@ -339,6 +374,33 @@ InputImpl::PrepareForRequest()
 
 //==============================================================================
 
+Error
+OutputImpl::SetSharedMemory(
+    const std::string& name, size_t offset, size_t byte_size)
+{
+  // If SetSharedMemory was called on this output already, return an error
+  if (io_type_ == SHARED_MEMORY) {
+    return Error(
+        RequestStatusCode::INVALID_ARG,
+        "The input '" + Name() + "' can only be set once with SetSharedMemory");
+  }
+
+  shm_name_ = name;
+  shm_offset_ = offset;
+  shm_byte_size_ = byte_size;
+  io_type_ = SHARED_MEMORY;
+  return Error::Success;
+}
+
+Error
+OutputImpl::Reset()
+{
+  io_type_ = NONE;
+  return Error::Success;
+}
+
+//==============================================================================
+
 ResultImpl::ResultImpl(
     const std::shared_ptr<InferContext::Output>& output, uint64_t batch_size)
     : output_(output),

diff --git a/src/clients/c++/request_common.h b/src/clients/c++/request_common.h
@@ -152,6 +152,10 @@ class InputImpl : public InferContext::Input {
   DataType DType() const override { return mio_.data_type(); }
   ModelInput::Format Format() const override { return mio_.format(); }
   const DimsList& Dims() const override { return mio_.dims(); }
+  bool IsSharedMemory() const { return (io_type_ == SHARED_MEMORY); }
+  const std::string& GetSharedMemoryName() const { return shm_name_; }
+  size_t GetSharedMemoryOffset() const { return shm_offset_; }
+  size_t GetSharedMemoryByteSize() const { return byte_size_; }
 
   void SetBatchSize(size_t batch_size) { batch_size_ = batch_size; }
 
@@ -161,6 +165,8 @@ class InputImpl : public InferContext::Input {
   Error Reset() override;
   Error SetRaw(const std::vector<uint8_t>& input) override;
   Error SetRaw(const uint8_t* input, size_t input_byte_size) override;
+  Error SetSharedMemory(
+      const std::string& name, size_t offset, size_t byte_size) override;
   Error SetFromString(const std::vector<std::string>& input) override;
 
   // Copy into 'buf' up to 'size' bytes of this input's data. Return
@@ -172,6 +178,10 @@ class InputImpl : public InferContext::Input {
   // Copy the pointer of the raw buffer at 'batch_idx' into 'buf'
   Error GetRaw(size_t batch_idx, const uint8_t** buf, size_t* byte_size) const;
 
+  // Copy the shared memory key, offset and batch_byte_size
+  Error GetSharedMemory(
+      std::string* name, size_t* offset, size_t* batch_byte_size);
+
   // Prepare to send this input as part of a request.
   Error PrepareForRequest();
 
@@ -195,21 +205,30 @@ class InputImpl : public InferContext::Input {
   // reallocs that could invalidate the pointer references into the
   // std::string objects.
   std::list<std::string> str_bufs_;
+
+  // Used only if working with Shared Memory
+  enum IOType { NONE, RAW, SHARED_MEMORY };
+  IOType io_type_;
+  std::string shm_name_;
+  size_t shm_offset_;
+  size_t shm_byte_size_;
 };
 
 //==============================================================================
 
 class OutputImpl : public InferContext::Output {
  public:
   OutputImpl(const ModelOutput& mio)
-      : mio_(mio), result_format_(InferContext::Result::ResultFormat::RAW)
+      : mio_(mio), result_format_(InferContext::Result::ResultFormat::RAW),
+        io_type_(RAW)
   {
   }
   ~OutputImpl() = default;
 
   const std::string& Name() const override { return mio_.name(); }
   DataType DType() const override { return mio_.data_type(); }
   const DimsList& Dims() const override { return mio_.dims(); }
+  bool IsSharedMemory() const { return (io_type_ == SHARED_MEMORY); }
 
   InferContext::Result::ResultFormat ResultFormat() const
   {
@@ -220,9 +239,20 @@ class OutputImpl : public InferContext::Output {
     result_format_ = result_format;
   }
 
+  Error Reset() override;
+  Error SetSharedMemory(
+      const std::string& name, size_t offset, size_t byte_size) override;
+
  private:
   const ModelOutput mio_;
   InferContext::Result::ResultFormat result_format_;
+
+  // Used only if working with Shared Memory
+  enum IOType { NONE, RAW, SHARED_MEMORY };
+  IOType io_type_;
+  std::string shm_name_;
+  size_t shm_offset_;
+  size_t shm_byte_size_;
 };
 
 //==============================================================================