From 647704bd032ec1f44a0f22d1b47794c0eedc62fc Mon Sep 17 00:00:00 2001
From: Iman Tabrizian <itabrizian@nvidia.com>
Date: Tue, 4 Jul 2023 14:29:16 -0400
Subject: [PATCH 1/5] Rebase and fix merge conflict

---
 src/libtorch.cc | 339 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 296 insertions(+), 43 deletions(-)

diff --git a/src/libtorch.cc b/src/libtorch.cc
index 99732cf..a4d8244 100644
--- a/src/libtorch.cc
+++ b/src/libtorch.cc
@@ -38,6 +38,7 @@
 #include "triton/backend/backend_output_responder.h"
 #include "triton/common/nvtx.h"
 #include "triton/core/tritonbackend.h"
+#include "triton/core/tritonserver.h"
 
 #ifdef TRITON_PYTORCH_ENABLE_TORCHVISION
 // Suppress warnings in torch headers
@@ -105,7 +106,10 @@ class ModelState : public BackendModel {
   bool EnabledCacheCleaning() { return enable_cache_cleaning_; }
 
   bool EnabledWeightSharing() { return enable_weight_sharing_; }
-  const std::vector<std::string>& ModelOutputs() { return output_names_; }
+  const std::map<std::string, std::pair<int64_t, int64_t>>& ModelOutputs()
+  {
+    return model_outputs_;
+  }
 
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
@@ -145,9 +149,12 @@ class ModelState : public BackendModel {
       std::pair<bool, int64_t>, std::shared_ptr<torch::jit::script::Module>>
       torch_models_;
 
-  // List of all the outputs specified in the output section of model
-  // configuration.
-  std::vector<std::string> output_names_;
+  // model_outputs is a map that contains unique outputs that the model must
+  // provide. In the model configuration, the output in the state configuration
+  // can have intersection with the outputs section of the model. If an output
+  // is specified both in the output section and state section, it indicates
+  // that the backend must return the output state to the client too.
+  std::map<std::string, std::pair<int64_t, int64_t>> model_outputs_;
 };
 
 TRITONSERVER_Error*
@@ -172,6 +179,49 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
     RETURN_IF_ERROR((*state)->SetModelConfig());
   }
 
+  auto& model_outputs = (*state)->model_outputs_;
+  // Parse the output states in the model configuration
+  triton::common::TritonJson::Value sequence_batching;
+  if ((*state)->ModelConfig().Find("sequence_batching", &sequence_batching)) {
+    triton::common::TritonJson::Value states;
+    if (sequence_batching.Find("state", &states)) {
+      for (size_t i = 0; i < states.ArraySize(); i++) {
+        triton::common::TritonJson::Value state;
+        RETURN_IF_ERROR(states.IndexAsObject(i, &state));
+        std::string output_state_name;
+        RETURN_IF_ERROR(
+            state.MemberAsString("output_name", &output_state_name));
+        auto it = model_outputs.find(output_state_name);
+        if (it == model_outputs.end()) {
+          model_outputs.insert({output_state_name, std::make_pair(-1, i)});
+        } else {
+          it->second.second = i;
+        }
+      }
+    }
+  }
+
+  // Parse the output names in the model configuration
+  triton::common::TritonJson::Value outputs;
+  RETURN_IF_ERROR((*state)->ModelConfig().MemberAsArray("output", &outputs));
+  for (size_t i = 0; i < outputs.ArraySize(); i++) {
+    triton::common::TritonJson::Value output;
+    THROW_IF_BACKEND_INSTANCE_ERROR(outputs.IndexAsObject(i, &output));
+
+    // Use names from ModelConfig by reference since the model
+    // config will persist longer than this inference execution.
+    std::string output_name;
+    THROW_IF_BACKEND_INSTANCE_ERROR(
+        output.MemberAsString("name", &output_name));
+
+    auto it = model_outputs.find(output_name);
+    if (it == model_outputs.end()) {
+      model_outputs.insert({output_name, std::make_pair(i, -1)});
+    } else {
+      it->second.first = i;
+    }
+  }
+
   RETURN_IF_ERROR((*state)->ParseParameters());
 
   return nullptr;  // success
@@ -185,22 +235,6 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
       enable_jit_executor_pair_({false, true}),
       enable_nvfuser_pair_({false, false})
 {
-  output_names_.clear();
-
-  triton::common::TritonJson::Value ios;
-  THROW_IF_BACKEND_INSTANCE_ERROR(ModelConfig().MemberAsArray("output", &ios));
-  for (size_t i = 0; i < ios.ArraySize(); i++) {
-    triton::common::TritonJson::Value io;
-    THROW_IF_BACKEND_INSTANCE_ERROR(ios.IndexAsObject(i, &io));
-
-    // Use names from ModelConfig by reference since the model
-    // config will persist longer than this inference execution.
-    const char* io_name;
-    size_t io_name_len;
-    THROW_IF_BACKEND_INSTANCE_ERROR(
-        io.MemberAsString("name", &io_name, &io_name_len));
-    output_names_.emplace_back(io_name);
-  }
 }
 
 TRITONSERVER_Error*
@@ -698,6 +732,11 @@ ModelInstanceState::ModelInstanceState(
     if (have_corrid) {
       expected_input_cnt += 1;
     }
+    // Add the state inputs to the expected count
+    triton::common::TritonJson::Value states;
+    if (sequence_batching.Find("state", &states)) {
+      expected_input_cnt += states.ArraySize();
+    }
   }
   supports_batching_ = model_state_->MaxBatchSize() > 0;
 
@@ -991,6 +1030,48 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
       }
     }
   }
+  triton::common::TritonJson::Value sequence_batching;
+  if (model_state_->ModelConfig().Find(
+          "sequence_batching", &sequence_batching)) {
+    triton::common::TritonJson::Value states;
+    if (sequence_batching.Find("state", &states)) {
+      for (size_t i = 0; i < states.ArraySize(); i++) {
+        triton::common::TritonJson::Value state;
+        RETURN_IF_ERROR(states.IndexAsObject(i, &state));
+        std::string state_name;
+        RETURN_IF_ERROR(state.MemberAsString("input_name", &state_name));
+        AddInputToMap(naming_convention, allowed_inputs, state_name, i);
+
+        // Validate data type
+        std::string state_dtype;
+        RETURN_IF_ERROR(state.MemberAsString("data_type", &state_dtype));
+        const auto pr = ModelConfigDataTypeToTorchType(state_dtype);
+        if (!pr.first && (state_dtype != "TYPE_STRING")) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              ("unsupported datatype " + state_dtype + " for input state '" +
+               state_name + "' for model '" + model_state_->Name() + "'")
+                  .c_str());
+        }
+
+
+        // Validate shape for String inputs. Only allow 1 dimension.
+        if (state_dtype == "TYPE_STRING") {
+          std::vector<int64_t> dims;
+          if ((dims.size() + (supports_batching_ ? 1 : 0)) > 1) {
+            return TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INTERNAL,
+                ("Triton only supports 1 dimensional List of String as input "
+                 "for "
+                 "'" +
+                 std::string(state_name) + "' for model '" +
+                 model_state_->Name() + "'")
+                    .c_str());
+          }
+        }
+      }
+    }
+  }
 
   triton::common::TritonJson::Value batch_inputs;
   RETURN_IF_ERROR(
@@ -1085,6 +1166,54 @@ ModelInstanceState::ValidateOutputs()
     output_dtype_map_[io_name] = ConvertTorchTypeToDataType(pr.second);
   }
 
+  triton::common::TritonJson::Value sequence_batching;
+  if (model_state_->ModelConfig().Find(
+          "sequence_batching", &sequence_batching)) {
+    triton::common::TritonJson::Value states;
+    if (sequence_batching.Find("state", &states)) {
+      for (size_t i = 0; i < states.ArraySize(); i++) {
+        triton::common::TritonJson::Value state;
+        RETURN_IF_ERROR(states.IndexAsObject(i, &state));
+        std::string state_name;
+        RETURN_IF_ERROR(state.MemberAsString("output_name", &state_name));
+        std::string state_dtype;
+        RETURN_IF_ERROR(state.MemberAsString("data_type", &state_dtype));
+        std::vector<int64_t> dims;
+        RETURN_IF_ERROR(ParseShape(state, "dims", &dims));
+
+        // For state, naming convention is enforced to be NAMED_INDEX
+        int start_pos = state_name.find(deliminator);
+        op_index = std::atoi(state_name.substr(start_pos + 2).c_str());
+
+        const auto pr = ModelConfigDataTypeToTorchType(state_dtype);
+        if (!pr.first && (state_dtype != "TYPE_STRING")) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              ("unsupported datatype " + state_dtype + " for state '" +
+               state_name + "' for model '" + model_state_->Name() + "'")
+                  .c_str());
+        }
+
+        // Validate shape for String outputs. Only allow 1 dimension.
+        if (state_dtype == "TYPE_STRING") {
+          if ((dims.size() + (supports_batching_ ? 1 : 0)) > 1) {
+            return TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INTERNAL,
+                ("Triton only supports 1 dimensional List of String as output "
+                 "for "
+                 "'" +
+                 std::string(state_name) + "' for model '" +
+                 model_state_->Name() + "'")
+                    .c_str());
+          }
+        }
+
+        output_index_map_[state_name] = op_index;
+        output_dtype_map_[state_name] = ConvertTorchTypeToDataType(pr.second);
+      }
+    }
+  }
+
   return nullptr;  // success
 }
 
@@ -1274,14 +1403,14 @@ ModelInstanceState::ProcessRequests(
 
   if (!all_response_failed) {
     for (const auto& name : model_state_->ModelOutputs()) {
-      int op_index = output_index_map_[name];
+      int op_index = output_index_map_[name.first];
       if ((op_index < 0) || (op_index > max_index)) {
         RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
             responses, request_count, all_response_failed,
             TRITONSERVER_ErrorNew(
                 TRITONSERVER_ERROR_INVALID_ARG,
                 std::string(
-                    "The output " + std::string(name) +
+                    "The output " + std::string(name.first) +
                     " in the model configuration refers to an output index "
                     "which doesn't exist. This model has " +
                     std::to_string(max_index + 1) + " outputs")
@@ -1608,6 +1737,61 @@ ModelInstanceState::GetNamingConvention(
     }
   }
 
+  triton::common::TritonJson::Value sequence_batching;
+  if (model_state_->ModelConfig().Find(
+          "sequence_batching", &sequence_batching)) {
+    // If we need to manage state for the model, then we need to check
+    // the naming of the state adheres to both the input and output conventions
+    triton::common::TritonJson::Value states;
+    if (sequence_batching.Find("state", &states)) {
+      if (*naming_convention != NamingConvention::NAMED_INDEX) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            ("PyTorch model '" + model_state_->Name() +
+             "' is using sequence batching with state but not all inputs and "
+             "outputs follow the <name>__<index> naming convention. ")
+                .c_str());
+      }
+    }
+
+    for (size_t i = 0; i < states.ArraySize(); i++) {
+      triton::common::TritonJson::Value state;
+      RETURN_IF_ERROR(states.IndexAsObject(i, &state));
+      std::string name_entry =
+          io_kind == "input" ? "input_name" : "output_name";
+      std::string state_name;
+      RETURN_IF_ERROR(state.MemberAsString(name_entry.c_str(), &state_name));
+      int start_pos = state_name.find(deliminator);
+      if (start_pos == -1) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            ("PyTorch model '" + model_state_->Name() +
+             "' is using sequence batching with state but state '" +
+             state_name +
+             "' does not follow the <name>__<index> naming convention. ")
+                .c_str());
+      } else {
+        // check if the index part of the name is not an integer
+        std::string index_str = state_name.substr(start_pos + 2);
+        bool is_int = true;
+        for (auto itr = index_str.begin(); itr != index_str.end(); itr++) {
+          if (std::isdigit(*itr) == 0) {
+            is_int = false;
+          }
+        }
+        if (!is_int) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              ("PyTorch model '" + model_state_->Name() +
+               "' is using sequence batching with state but state '" +
+               state_name +
+               "' does not follow the <name>__<index> naming convention. ")
+                  .c_str());
+        }
+      }
+    }
+  }
+
   return nullptr;  // success
 }
 
@@ -1789,10 +1973,11 @@ SetStringInputTensor(
 }
 
 bool
-SetStringOutputBuffer(
+SetStringBuffer(
     torch::List<torch::jit::IValue>* tensor, TRITONBACKEND_Response** response,
-    TRITONBACKEND_Output* response_output, const size_t tensor_element_count,
-    cudaStream_t stream, std::string* serialized)
+    TRITONBACKEND_Output* response_output, TRITONBACKEND_State* response_state,
+    const size_t tensor_element_count, cudaStream_t stream,
+    std::string* serialized, bool state)
 {
   bool cuda_copy = false;
 
@@ -1814,15 +1999,26 @@ SetStringOutputBuffer(
   TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_CPU;
   int64_t actual_memory_type_id = 0;
 
+  TRITONSERVER_Error* err;
   void* buffer;
-  auto err = TRITONBACKEND_OutputBuffer(
-      response_output, &buffer, serialized->size(), &actual_memory_type,
-      &actual_memory_type_id);
-  if (err != nullptr) {
-    RESPOND_AND_SET_NULL_IF_ERROR(response, err);
-    return cuda_copy;
-  }
 
+  if (!state) {
+    auto err = TRITONBACKEND_OutputBuffer(
+        response_output, &buffer, serialized->size(), &actual_memory_type,
+        &actual_memory_type_id);
+    if (err != nullptr) {
+      RESPOND_AND_SET_NULL_IF_ERROR(response, err);
+      return cuda_copy;
+    }
+  } else {
+    auto err = TRITONBACKEND_StateBuffer(
+        response_state, &buffer, serialized->size(), &actual_memory_type,
+        &actual_memory_type_id);
+    if (err != nullptr) {
+      RESPOND_AND_SET_NULL_IF_ERROR(response, err);
+      return cuda_copy;
+    }
+  }
   // Copy the serialized tensor into the allocated buffer.
   bool cuda_used = false;
   err = CopyBuffer(
@@ -1837,9 +2033,38 @@ SetStringOutputBuffer(
     return cuda_copy;
   }
 
+  if (state) {
+    RESPOND_AND_SET_NULL_IF_ERROR(
+        response, TRITONBACKEND_StateUpdate(response_state));
+  }
+
   return cuda_copy;
 }
 
+
+bool
+SetStringOutputBuffer(
+    torch::List<torch::jit::IValue>* tensor, TRITONBACKEND_Response** response,
+    TRITONBACKEND_Output* response_output, const size_t tensor_element_count,
+    cudaStream_t stream, std::string* serialized)
+{
+  return SetStringBuffer(
+      tensor, response, response_output, nullptr /* response_state */,
+      tensor_element_count, stream, serialized, false /* state */);
+}
+
+bool
+SetStringStateBuffer(
+    torch::List<torch::jit::IValue>* tensor, TRITONBACKEND_Response** response,
+    TRITONBACKEND_State* response_state, const size_t tensor_element_count,
+    cudaStream_t stream, std::string* serialized)
+{
+  return SetStringBuffer(
+      tensor, response, nullptr /* response_output */, response_state,
+      tensor_element_count, stream, serialized, true /* state */);
+}
+
+
 TRITONSERVER_Error*
 ModelInstanceState::SetInputTensors(
     size_t total_batch_size, TRITONBACKEND_Request** requests,
@@ -2026,9 +2251,10 @@ ModelInstanceState::ReadOutputTensors(
   bool cuda_copy = false;
   // The serialized string buffer must be valid until output copies are done
   std::vector<std::unique_ptr<std::string>> string_buffer;
-  for (size_t idx = 0; idx < model_state_->ModelOutputs().size(); idx++) {
-    std::string name = model_state_->ModelOutputs()[idx];
-    int op_index = output_index_map_[name];
+  for (auto& output : model_state_->ModelOutputs()) {
+    int op_index = output_index_map_[output.first];
+    auto name = output.first;
+    auto output_tensor_pair = output.second;
 
     if (output_tensors[op_index].isTensor()) {
       torch::Tensor output_flat;
@@ -2086,10 +2312,22 @@ ModelInstanceState::ReadOutputTensors(
                "' is a scalar which is not supported.")
                   .c_str());
         }
+        if (output_tensor_pair.first != -1) {
+          responder.ProcessTensor(
+              name, output_dtype, batchn_shape, output_buffer, memory_type,
+              memory_id);
+        }
+        if (output_tensor_pair.second != -1) {
+          std::vector<TRITONBACKEND_State*> states;
+          states = responder.ProcessStateTensor(
+              name, output_dtype, batchn_shape, output_buffer, memory_type,
+              memory_id);
+          // Update the states
+          for (auto& state : states) {
+            RETURN_IF_ERROR(TRITONBACKEND_StateUpdate(state));
+          }
+        }
 
-        responder.ProcessTensor(
-            name, output_dtype, batchn_shape, output_buffer, memory_type,
-            memory_id);
       } else {
         responder.ProcessBatchOutput(
             name, *batch_output, output_buffer, memory_type, memory_id);
@@ -2119,15 +2357,30 @@ ModelInstanceState::ReadOutputTensors(
 
         // Only need an response tensor for requested outputs.
         if (response != nullptr) {
-          TRITONBACKEND_Output* response_output;
+          if (output_tensor_pair.first != -1) {
+            TRITONBACKEND_Output* response_output;
+            RESPOND_AND_SET_NULL_IF_ERROR(
+                &response, TRITONBACKEND_ResponseOutput(
+                               response, &response_output, name.c_str(),
+                               TRITONSERVER_TYPE_BYTES, batchn_shape.data(),
+                               batchn_shape.size()));
+            string_buffer.emplace_back(new std::string());
+            cuda_copy |= SetStringOutputBuffer(
+                &output_list, &response, response_output, tensor_element_cnt,
+                GetCudaStreamByInstanceKind(), string_buffer.back().get());
+          }
+        }
+        if (output_tensor_pair.second != -1) {
+          TRITONBACKEND_State* response_state;
           RESPOND_AND_SET_NULL_IF_ERROR(
-              &response, TRITONBACKEND_ResponseOutput(
-                             response, &response_output, name.c_str(),
+              &response, TRITONBACKEND_StateNew(
+                             &response_state, request, name.c_str(),
                              TRITONSERVER_TYPE_BYTES, batchn_shape.data(),
                              batchn_shape.size()));
+
           string_buffer.emplace_back(new std::string());
-          cuda_copy |= SetStringOutputBuffer(
-              &output_list, &response, response_output, tensor_element_cnt,
+          cuda_copy |= SetStringStateBuffer(
+              &output_list, &response, response_state, tensor_element_cnt,
               GetCudaStreamByInstanceKind(), string_buffer.back().get());
         }
       }

From 70cb673156dcc09e801be1b9c2aa3c2a97d1bfe3 Mon Sep 17 00:00:00 2001
From: jamied157 <jamied157@gmail.com>
Date: Thu, 27 Apr 2023 15:42:46 +0100
Subject: [PATCH 2/5] formatting

---
 src/libtorch.cc | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/libtorch.cc b/src/libtorch.cc
index a4d8244..f37905d 100644
--- a/src/libtorch.cc
+++ b/src/libtorch.cc
@@ -110,6 +110,10 @@ class ModelState : public BackendModel {
   {
     return model_outputs_;
   }
+  const std::map<std::string, std::pair<int64_t, int64_t>>& ModelOutputs()
+  {
+    return model_outputs_;
+  }
 
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
@@ -544,6 +548,11 @@ class ModelInstanceState : public BackendModelInstance {
   TRITONSERVER_Error* ValidateTypedSequenceControl(
       triton::common::TritonJson::Value& sequence_batching,
       const std::string& control_kind, bool required, bool* have_control);
+  void AddInputToMap(
+      NamingConvention naming_convention, 
+      const std::vector<std::string> allowed_inputs, 
+      const std::string &io_name,
+      const uint32_t index);
   TRITONSERVER_Error* ValidateInputs(const size_t expected_input_cnt);
   void AddInputToMap(
       NamingConvention naming_convention,
@@ -873,6 +882,8 @@ ModelInstanceState::ValidateTypedSequenceControl(
 
   return nullptr;  // success
 }
+void ModelInstanceState::AddInputToMap(NamingConvention naming_convention, const std::vector<std::string> allowed_inputs, const std::string &io_name, const uint32_t index) {
+   std::string deliminator = "__";
 
 void
 ModelInstanceState::AddInputToMap(
@@ -1201,7 +1212,7 @@ ModelInstanceState::ValidateOutputs()
                 TRITONSERVER_ERROR_INTERNAL,
                 ("Triton only supports 1 dimensional List of String as output "
                  "for "
-                 "'" +
+                "'" +
                  std::string(state_name) + "' for model '" +
                  model_state_->Name() + "'")
                     .c_str());
@@ -1768,7 +1779,7 @@ ModelInstanceState::GetNamingConvention(
             ("PyTorch model '" + model_state_->Name() +
              "' is using sequence batching with state but state '" +
              state_name +
-             "' does not follow the <name>__<index> naming convention. ")
+              "' does not follow the <name>__<index> naming convention. ")
                 .c_str());
       } else {
         // check if the index part of the name is not an integer
@@ -2316,17 +2327,17 @@ ModelInstanceState::ReadOutputTensors(
           responder.ProcessTensor(
               name, output_dtype, batchn_shape, output_buffer, memory_type,
               memory_id);
-        }
-        if (output_tensor_pair.second != -1) {
-          std::vector<TRITONBACKEND_State*> states;
-          states = responder.ProcessStateTensor(
+      }
+      if (output_tensor_pair.second != -1) {
+        std::vector<TRITONBACKEND_State*> states;
+        states = responder.ProcessStateTensor(
               name, output_dtype, batchn_shape, output_buffer, memory_type,
               memory_id);
-          // Update the states
-          for (auto& state : states) {
-            RETURN_IF_ERROR(TRITONBACKEND_StateUpdate(state));
-          }
+        // Update the states
+        for (auto& state : states) {
+          RETURN_IF_ERROR(TRITONBACKEND_StateUpdate(state));
         }
+      }
 
       } else {
         responder.ProcessBatchOutput(
@@ -2377,6 +2388,11 @@ ModelInstanceState::ReadOutputTensors(
                              &response_state, request, name.c_str(),
                              TRITONSERVER_TYPE_BYTES, batchn_shape.data(),
                              batchn_shape.size()));
+          RESPOND_AND_SET_NULL_IF_ERROR(
+              &response, TRITONBACKEND_StateNew(
+                             &response_state, request, name.c_str(),
+                             TRITONSERVER_TYPE_BYTES, batchn_shape.data(),
+                             batchn_shape.size()));
 
           string_buffer.emplace_back(new std::string());
           cuda_copy |= SetStringStateBuffer(

From d1035dc313ae4e5eda8126fa771100c22395add1 Mon Sep 17 00:00:00 2001
From: jamied157 <jamied157@gmail.com>
Date: Wed, 7 Jun 2023 17:31:02 +0100
Subject: [PATCH 3/5] MR comments

---
 src/libtorch.cc | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/libtorch.cc b/src/libtorch.cc
index f37905d..f0da914 100644
--- a/src/libtorch.cc
+++ b/src/libtorch.cc
@@ -38,7 +38,6 @@
 #include "triton/backend/backend_output_responder.h"
 #include "triton/common/nvtx.h"
 #include "triton/core/tritonbackend.h"
-#include "triton/core/tritonserver.h"
 
 #ifdef TRITON_PYTORCH_ENABLE_TORCHVISION
 // Suppress warnings in torch headers
@@ -154,7 +153,9 @@ class ModelState : public BackendModel {
       torch_models_;
 
   // model_outputs is a map that contains unique outputs that the model must
-  // provide. In the model configuration, the output in the state configuration
+  // provide. The first pair is the model output index and the second is
+  // the index in the model state, -1 is used if one is not required.
+  // In the model configuration, the output in the state configuration
   // can have intersection with the outputs section of the model. If an output
   // is specified both in the output section and state section, it indicates
   // that the backend must return the output state to the client too.
@@ -548,11 +549,6 @@ class ModelInstanceState : public BackendModelInstance {
   TRITONSERVER_Error* ValidateTypedSequenceControl(
       triton::common::TritonJson::Value& sequence_batching,
       const std::string& control_kind, bool required, bool* have_control);
-  void AddInputToMap(
-      NamingConvention naming_convention, 
-      const std::vector<std::string> allowed_inputs, 
-      const std::string &io_name,
-      const uint32_t index);
   TRITONSERVER_Error* ValidateInputs(const size_t expected_input_cnt);
   void AddInputToMap(
       NamingConvention naming_convention,
@@ -882,8 +878,6 @@ ModelInstanceState::ValidateTypedSequenceControl(
 
   return nullptr;  // success
 }
-void ModelInstanceState::AddInputToMap(NamingConvention naming_convention, const std::vector<std::string> allowed_inputs, const std::string &io_name, const uint32_t index) {
-   std::string deliminator = "__";
 
 void
 ModelInstanceState::AddInputToMap(
@@ -1065,7 +1059,6 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
                   .c_str());
         }
 
-
         // Validate shape for String inputs. Only allow 1 dimension.
         if (state_dtype == "TYPE_STRING") {
           std::vector<int64_t> dims;
@@ -1212,7 +1205,7 @@ ModelInstanceState::ValidateOutputs()
                 TRITONSERVER_ERROR_INTERNAL,
                 ("Triton only supports 1 dimensional List of String as output "
                  "for "
-                "'" +
+                 "'" +
                  std::string(state_name) + "' for model '" +
                  model_state_->Name() + "'")
                     .c_str());
@@ -2333,11 +2326,11 @@ ModelInstanceState::ReadOutputTensors(
         states = responder.ProcessStateTensor(
               name, output_dtype, batchn_shape, output_buffer, memory_type,
               memory_id);
-        // Update the states
-        for (auto& state : states) {
-          RETURN_IF_ERROR(TRITONBACKEND_StateUpdate(state));
+          // Update the states
+          for (auto& state : states) {
+            RETURN_IF_ERROR(TRITONBACKEND_StateUpdate(state));
+          }
         }
-      }
 
       } else {
         responder.ProcessBatchOutput(

From b29019624fa32c1f38d3ba253698fe2d9f7453d6 Mon Sep 17 00:00:00 2001
From: jamied157 <jamied157@gmail.com>
Date: Thu, 8 Jun 2023 09:27:38 +0100
Subject: [PATCH 4/5] clang format

---
 src/libtorch.cc | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/libtorch.cc b/src/libtorch.cc
index f0da914..d5c1601 100644
--- a/src/libtorch.cc
+++ b/src/libtorch.cc
@@ -109,10 +109,6 @@ class ModelState : public BackendModel {
   {
     return model_outputs_;
   }
-  const std::map<std::string, std::pair<int64_t, int64_t>>& ModelOutputs()
-  {
-    return model_outputs_;
-  }
 
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
@@ -1772,7 +1768,7 @@ ModelInstanceState::GetNamingConvention(
             ("PyTorch model '" + model_state_->Name() +
              "' is using sequence batching with state but state '" +
              state_name +
-              "' does not follow the <name>__<index> naming convention. ")
+             "' does not follow the <name>__<index> naming convention. ")
                 .c_str());
       } else {
         // check if the index part of the name is not an integer
@@ -2320,10 +2316,10 @@ ModelInstanceState::ReadOutputTensors(
           responder.ProcessTensor(
               name, output_dtype, batchn_shape, output_buffer, memory_type,
               memory_id);
-      }
-      if (output_tensor_pair.second != -1) {
-        std::vector<TRITONBACKEND_State*> states;
-        states = responder.ProcessStateTensor(
+        }
+        if (output_tensor_pair.second != -1) {
+          std::vector<TRITONBACKEND_State*> states;
+          states = responder.ProcessStateTensor(
               name, output_dtype, batchn_shape, output_buffer, memory_type,
               memory_id);
           // Update the states

From dad98fa6d1ca1f69f97b1a75a1a89ff2bf4ea765 Mon Sep 17 00:00:00 2001
From: Iman Tabrizian <itabrizian@nvidia.com>
Date: Wed, 5 Jul 2023 15:02:57 -0400
Subject: [PATCH 5/5] Fix double StateNew

---
 src/libtorch.cc | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/libtorch.cc b/src/libtorch.cc
index d5c1601..6ac3536 100644
--- a/src/libtorch.cc
+++ b/src/libtorch.cc
@@ -2377,11 +2377,6 @@ ModelInstanceState::ReadOutputTensors(
                              &response_state, request, name.c_str(),
                              TRITONSERVER_TYPE_BYTES, batchn_shape.data(),
                              batchn_shape.size()));
-          RESPOND_AND_SET_NULL_IF_ERROR(
-              &response, TRITONBACKEND_StateNew(
-                             &response_state, request, name.c_str(),
-                             TRITONSERVER_TYPE_BYTES, batchn_shape.data(),
-                             batchn_shape.size()));
 
           string_buffer.emplace_back(new std::string());
           cuda_copy |= SetStringStateBuffer(