From 145b89e435ff8d02dde5a13f350bfab56e84e59b Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Fri, 25 Jul 2025 06:36:12 -0700 Subject: [PATCH 1/2] perf: optimize string tensor deserialization with high performance c++ implementation --- src/pb_tensor.cc | 85 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 9fde62fe..35807b68 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -41,9 +41,15 @@ namespace py = pybind11; typedef SSIZE_T ssize_t; #endif +#include +#include +#include + namespace triton { namespace backend { namespace python { #ifdef TRITON_PB_STUB +py::array deserialize_bytes_tensor_cpp(const uint8_t* data, size_t data_size); + PbTensor::PbTensor(const std::string& name, py::array& numpy_array) : name_(name) { @@ -160,14 +166,9 @@ PbTensor::PbTensor( py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); } else { - py::object numpy_array = py::array( - triton_to_pybind_dtype(TRITONSERVER_TYPE_UINT8), {byte_size}, - (void*)memory_ptr_); - py::module triton_pb_utils = - py::module::import("triton_python_backend_utils"); - numpy_array_ = - triton_pb_utils.attr("deserialize_bytes_tensor")(numpy_array) - .attr("reshape")(dims); + numpy_array_ = deserialize_bytes_tensor_cpp( + static_cast(memory_ptr_), byte_size) + .attr("reshape")(dims); } } else { numpy_array_ = py::none(); @@ -234,6 +235,63 @@ delete_unused_dltensor(PyObject* dlp) } } +py::array +deserialize_bytes_tensor_cpp(const uint8_t* data, size_t data_size) +{ + if (data_size == 0) { + py::module numpy = py::module::import("numpy"); + return numpy.attr("empty")(0, py::dtype("object")); + } + + // First pass: count the number of strings and calculate total size + size_t offset = 0; + size_t num_strings = 0; + size_t total_string_size = 0; + + while (offset < data_size) { + if (offset + 4 > data_size) { + throw PythonBackendException( + "Invalid bytes tensor data: incomplete length field"); + } + + // Read 4-byte length (little-endian) + uint32_t length = *reinterpret_cast(data + offset); + offset += 4; + + if (offset + length > data_size) { + throw PythonBackendException( + "Invalid bytes tensor data: string extends beyond buffer"); + } + + num_strings++; + total_string_size += length; + offset += length; + } + + // Create numpy array of objects using pybind11's numpy module + py::module numpy = py::module::import("numpy"); + py::array result = numpy.attr("empty")(num_strings, py::dtype("object")); + auto result_ptr = static_cast(result.request().ptr); + + // Second pass: extract strings + offset = 0; + size_t string_index = 0; + + while (offset < data_size) { + uint32_t length = *reinterpret_cast(data + offset); + offset += 4; + + // Create Python bytes object using pybind11 + py::bytes bytes_obj(reinterpret_cast(data + offset), length); + Py_INCREF(bytes_obj.ptr()); // Increment reference count + result_ptr[string_index] = bytes_obj.ptr(); + string_index++; + offset += length; + } + + return result; +} + std::shared_ptr PbTensor::FromNumpy(const std::string& name, py::array& numpy_array) { @@ -668,14 +726,9 @@ PbTensor::PbTensor( py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); } else { - py::object numpy_array = py::array( - triton_to_pybind_dtype(TRITONSERVER_TYPE_UINT8), {byte_size_}, - (void*)memory_ptr_); - py::module triton_pb_utils = - py::module::import("triton_python_backend_utils"); - numpy_array_ = - triton_pb_utils.attr("deserialize_bytes_tensor")(numpy_array) - .attr("reshape")(dims_); + numpy_array_ = deserialize_bytes_tensor_cpp( + static_cast(memory_ptr_), byte_size_) + .attr("reshape")(dims_); } } else { numpy_array_ = py::none(); From cf1a489f268df892849b01d212b15562cd2855a4 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Thu, 18 Sep 2025 13:42:34 -0700 Subject: [PATCH 2/2] Address PR comments --- src/pb_tensor.cc | 125 +++++++++++++++++++++++------------------------ 1 file changed, 62 insertions(+), 63 deletions(-) diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 35807b68..26e77586 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -48,7 +48,62 @@ typedef SSIZE_T ssize_t; namespace triton { namespace backend { namespace python { #ifdef TRITON_PB_STUB -py::array deserialize_bytes_tensor_cpp(const uint8_t* data, size_t data_size); +py::array +deserialize_bytes_tensor_cpp(const uint8_t* data, size_t data_size) +{ + if (data_size == 0) { + py::module numpy = py::module::import("numpy"); + return numpy.attr("empty")(0, py::dtype("object")); + } + + // First pass: count the number of strings and calculate total size + size_t offset = 0; + size_t num_strings = 0; + size_t total_string_size = 0; + + while (offset < data_size) { + if (offset + 4 > data_size) { + throw PythonBackendException( + "Invalid bytes tensor data: incomplete length field"); + } + + // Read 4-byte length (little-endian) + uint32_t length = *reinterpret_cast(data + offset); + offset += 4; + + if (offset + length > data_size) { + throw PythonBackendException( + "Invalid bytes tensor data: string extends beyond buffer"); + } + + num_strings++; + total_string_size += length; + offset += length; + } + + // Create numpy array of objects using pybind11's numpy module + py::module numpy = py::module::import("numpy"); + py::array result = numpy.attr("empty")(num_strings, py::dtype("object")); + auto result_ptr = static_cast(result.request().ptr); + + // Second pass: extract strings + offset = 0; + size_t string_index = 0; + + while (offset < data_size) { + uint32_t length = *reinterpret_cast(data + offset); + offset += 4; + + // Create Python bytes object using pybind11 + py::bytes bytes_obj(reinterpret_cast(data + offset), length); + Py_INCREF(bytes_obj.ptr()); // Increment reference count + result_ptr[string_index] = bytes_obj.ptr(); + string_index++; + offset += length; + } + + return result; +} PbTensor::PbTensor(const std::string& name, py::array& numpy_array) : name_(name) @@ -166,9 +221,9 @@ PbTensor::PbTensor( py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); } else { - numpy_array_ = deserialize_bytes_tensor_cpp( - static_cast(memory_ptr_), byte_size) - .attr("reshape")(dims); + py::object numpy_array = deserialize_bytes_tensor_cpp( + static_cast(memory_ptr_), byte_size_); + numpy_array_ = numpy_array.attr("reshape")(dims_); } } else { numpy_array_ = py::none(); @@ -235,62 +290,6 @@ delete_unused_dltensor(PyObject* dlp) } } -py::array -deserialize_bytes_tensor_cpp(const uint8_t* data, size_t data_size) -{ - if (data_size == 0) { - py::module numpy = py::module::import("numpy"); - return numpy.attr("empty")(0, py::dtype("object")); - } - - // First pass: count the number of strings and calculate total size - size_t offset = 0; - size_t num_strings = 0; - size_t total_string_size = 0; - - while (offset < data_size) { - if (offset + 4 > data_size) { - throw PythonBackendException( - "Invalid bytes tensor data: incomplete length field"); - } - - // Read 4-byte length (little-endian) - uint32_t length = *reinterpret_cast(data + offset); - offset += 4; - - if (offset + length > data_size) { - throw PythonBackendException( - "Invalid bytes tensor data: string extends beyond buffer"); - } - - num_strings++; - total_string_size += length; - offset += length; - } - - // Create numpy array of objects using pybind11's numpy module - py::module numpy = py::module::import("numpy"); - py::array result = numpy.attr("empty")(num_strings, py::dtype("object")); - auto result_ptr = static_cast(result.request().ptr); - - // Second pass: extract strings - offset = 0; - size_t string_index = 0; - - while (offset < data_size) { - uint32_t length = *reinterpret_cast(data + offset); - offset += 4; - - // Create Python bytes object using pybind11 - py::bytes bytes_obj(reinterpret_cast(data + offset), length); - Py_INCREF(bytes_obj.ptr()); // Increment reference count - result_ptr[string_index] = bytes_obj.ptr(); - string_index++; - offset += length; - } - - return result; -} std::shared_ptr PbTensor::FromNumpy(const std::string& name, py::array& numpy_array) @@ -726,9 +725,9 @@ PbTensor::PbTensor( py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); } else { - numpy_array_ = deserialize_bytes_tensor_cpp( - static_cast(memory_ptr_), byte_size_) - .attr("reshape")(dims_); + py::object numpy_array = deserialize_bytes_tensor_cpp( + static_cast(memory_ptr_), byte_size_); + numpy_array_ = numpy_array.attr("reshape")(dims_); } } else { numpy_array_ = py::none();