Fix Triton C API mode missing infer requested output datatype bug

triton-inference-server · Aug 7, 2024 · 9dcc8b7 · 9dcc8b7
1 parent c7b1642
commit 9dcc8b7
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 23 deletions.
diff --git a/src/client_backend/triton_c_api/triton_c_api_backend.cc b/src/client_backend/triton_c_api/triton_c_api_backend.cc
@@ -406,7 +406,7 @@ TritonCApiInferRequestedOutput::Create(
     const size_t class_count, const std::string& datatype)
 {
   TritonCApiInferRequestedOutput* local_infer_output =
-      new TritonCApiInferRequestedOutput(name);
+      new TritonCApiInferRequestedOutput(name, datatype);
 
   tc::InferRequestedOutput* triton_infer_output;
   RETURN_IF_TRITON_ERROR(tc::InferRequestedOutput::Create(
@@ -427,8 +427,8 @@ TritonCApiInferRequestedOutput::SetSharedMemory(
 }
 
 TritonCApiInferRequestedOutput::TritonCApiInferRequestedOutput(
-    const std::string& name)
-    : InferRequestedOutput(BackendKind::TRITON_C_API, name)
+    const std::string& name, const std::string& datatype)
+    : InferRequestedOutput(BackendKind::TRITON_C_API, name, datatype)
 {
 }
 

diff --git a/src/client_backend/triton_c_api/triton_c_api_backend.h b/src/client_backend/triton_c_api/triton_c_api_backend.h
@@ -215,7 +215,8 @@ class TritonCApiInferRequestedOutput : public InferRequestedOutput {
       const std::string& name, size_t byte_size, size_t offset = 0) override;
 
  private:
-  explicit TritonCApiInferRequestedOutput(const std::string& name);
+  explicit TritonCApiInferRequestedOutput(
+      const std::string& name, const std::string& datatype);
 
   std::unique_ptr<tc::InferRequestedOutput> output_;
 };

diff --git a/src/client_backend/triton_c_api/triton_loader.cc b/src/client_backend/triton_c_api/triton_loader.cc
@@ -338,25 +338,16 @@ TritonLoader::StartTriton()
   // Create the allocator that will be used to allocate buffers for
   // the result tensors.
   RETURN_IF_TRITONSERVER_ERROR(
-      GetSingleton()->response_allocator_new_fn_(
-          &allocator_,
-          reinterpret_cast<
-              TRITONSERVER_Error* (*)(TRITONSERVER_ResponseAllocator* allocator,
-                                      const char* tensor_name, size_t byte_size,
-                                      TRITONSERVER_MemoryType memory_type,
-                                      int64_t memory_type_id, void* userp,
-                                      void** buffer, void** buffer_userp,
-                                      TRITONSERVER_MemoryType*
-                                          actual_memory_type,
-                                      int64_t* actual_memory_type_id)>(
-              ResponseAlloc),
-          reinterpret_cast<
-              TRITONSERVER_Error* (*)(TRITONSERVER_ResponseAllocator* allocator,
-                                      void* buffer, void* buffer_userp,
-                                      size_t byte_size,
-                                      TRITONSERVER_MemoryType memory_type,
-                                      int64_t memory_type_id)>(ResponseRelease),
-          nullptr /* start_fn */),
+      GetSingleton()
+          ->response_allocator_new_fn_(
+              &allocator_,
+              reinterpret_cast<
+                  TRITONSERVER_Error* (*)(TRITONSERVER_ResponseAllocator * allocator, const char* tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp, void** buffer, void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type, int64_t* actual_memory_type_id)>(
+                  ResponseAlloc),
+              reinterpret_cast<
+                  TRITONSERVER_Error* (*)(TRITONSERVER_ResponseAllocator * allocator, void* buffer, void* buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)>(
+                  ResponseRelease),
+              nullptr /* start_fn */),
       "creating response allocator");
 
   return Error::Success;

diff --git a/src/profile_data_exporter.cc b/src/profile_data_exporter.cc
@@ -205,6 +205,7 @@ ProfileDataExporter::AddDataToJSON(
 {
   // TPA-268: support N-dimensional tensor
   size_t data_size;
+  // TODO TPA-283: Add support for N-dimensional string tensors
   if (data_type == "BYTES" || data_type == "JSON") {
     // return string as is instead of array of chars
     data_size = 1;