Enhancements

triton-inference-server · Jul 18, 2024 · c455789 · c455789
1 parent efc3a32
commit c455789
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 55 deletions.
diff --git a/src/infer_request.cc b/src/infer_request.cc
@@ -1016,17 +1016,15 @@ InferenceRequest::Normalize()
       auto& input = pr.second;
       *input.MutableShape() = input.OriginalShape();
 
-      // For a shape tensor, mark that the input is a shape tensor.
       const inference::ModelInput* input_config;
       RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
       if (input_config->is_shape_tensor()) {
-        input.SetIsShapeTensor(true);
-      }
-
-      // If a tensor uses a non-linear IO format, indicate that the input uses a
-      // non-linear IO format.
-      if (input_config->is_non_linear_format_io()) {
-        input.SetIsNonLinearFormatIo(true);
+        // For a shape tensor, mark that the input is a shape tensor.
+        input.SetIsShapeTensor();
+      } else if (input_config->is_non_linear_format_io()) {
+        // If a tensor uses a non-linear IO format, indicate that the input uses
+        // a non-linear IO format.
+        input.SetIsNonLinearFormatIo();
       }
     }
   } else {
@@ -1040,18 +1038,16 @@ InferenceRequest::Normalize()
       const inference::ModelInput* input_config;
       RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
 
-      // If a tensor uses a non-linear IO format, indicate that the input uses a
-      // non-linear IO format.
-      if (input_config->is_non_linear_format_io()) {
-        input.SetIsNonLinearFormatIo(true);
-      }
-
       // For a shape tensor, keep the tensor's shape as it is and mark
       // that the input is a shape tensor.
       if (input_config->is_shape_tensor()) {
         *input.MutableShape() = input.OriginalShape();
-        input.SetIsShapeTensor(true);
+        input.SetIsShapeTensor();
         continue;
+      } else if (input_config->is_non_linear_format_io()) {
+        // If a tensor uses a non-linear IO format, indicate that the input uses
+        // a non-linear IO format.
+        input.SetIsNonLinearFormatIo();
       }
 
       if (input.OriginalShape().size() == 0) {
@@ -1202,7 +1198,7 @@ InferenceRequest::Normalize()
       const auto& data_type = input.DType();
 
       // Non-linear IO format input byte size validation will be handled in the
-      // backend.
+      // TensorRT backend.
       if (!input.IsNonLinearFormatIo()) {
         TRITONSERVER_MemoryType input_memory_type;
         // Because Triton expects STRING type to be in special format
@@ -1215,12 +1211,7 @@ InferenceRequest::Normalize()
           // FIXME: Temporarily skips byte size checks for GPU tensors. See
           // DLIS-6820.
         } else {
-          // Shape tensor with dynamic batching does not introduce a new
-          // dimension to the tensor but adds an additional value to the 1-D
-          // array.
-          const std::vector<int64_t>& input_dims =
-              input.IsShapeTensor() ? input.OriginalShape()
-                                    : input.ShapeWithBatchDim();
+          const std::vector<int64_t>& input_dims = input.ShapeWithBatchDim();
           int64_t expected_byte_size = INT_MAX;
           expected_byte_size =
               triton::common::GetByteSize(data_type, input_dims);
@@ -1523,17 +1514,17 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
 // Input
 //
 InferenceRequest::Input::Input()
-    : is_shape_tensor_(false), is_non_linear_format_io_(false),
-      data_(new MemoryReference), has_host_policy_specific_data_(false)
+    : tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
+      has_host_policy_specific_data_(false)
 {
 }
 
 InferenceRequest::Input::Input(
     const std::string& name, const inference::DataType datatype,
     const int64_t* shape, const uint64_t dim_count)
     : name_(name), datatype_(datatype),
-      original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
-      is_non_linear_format_io_(false), data_(new MemoryReference),
+      original_shape_(shape, shape + dim_count),
+      tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
       has_host_policy_specific_data_(false)
 {
 }
@@ -1542,8 +1533,8 @@ InferenceRequest::Input::Input(
     const std::string& name, const inference::DataType datatype,
     const std::vector<int64_t>& shape)
     : name_(name), datatype_(datatype), original_shape_(shape),
-      is_shape_tensor_(false), is_non_linear_format_io_(false),
-      data_(new MemoryReference), has_host_policy_specific_data_(false)
+      tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
+      has_host_policy_specific_data_(false)
 {
 }
 
@@ -1558,17 +1549,16 @@ InferenceRequest::Input::SetMetadata(
 }
 
 Status
-InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
+InferenceRequest::Input::SetIsShapeTensor()
 {
-  is_shape_tensor_ = is_shape_tensor;
+  tensor_type_ = TensorType::SHAPE_TENSOR;
   return Status::Success;
 }
 
 Status
-InferenceRequest::Input::SetIsNonLinearFormatIo(
-    const bool is_non_linear_format_io)
+InferenceRequest::Input::SetIsNonLinearFormatIo()
 {
-  is_non_linear_format_io_ = is_non_linear_format_io;
+  tensor_type_ = TensorType::NON_LINEAR;
   return Status::Success;
 }
 

diff --git a/src/infer_request.h b/src/infer_request.h
@@ -82,6 +82,8 @@ class InferenceRequest {
   // Input tensor
   class Input {
    public:
+    enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR };
+
     Input();
     Input(
         const std::string& name, const inference::DataType datatype,
@@ -120,7 +122,14 @@ class InferenceRequest {
     // into batch + shape.
     const std::vector<int64_t>& ShapeWithBatchDim() const
     {
-      return shape_with_batch_dim_;
+      if (tensor_type_ == TensorType::SHAPE_TENSOR) {
+        // Shape tensor with dynamic batching does not introduce a new
+        // dimension to the tensor but adds an additional value to the 1-D
+        // array.
+        return original_shape_;
+      } else {
+        return shape_with_batch_dim_;
+      }
     }
     std::vector<int64_t>* MutableShapeWithBatchDim()
     {
@@ -134,16 +143,22 @@ class InferenceRequest {
     }
 
     // Whether or not the input is a tensorrt shape tensor
-    bool IsShapeTensor() const { return is_shape_tensor_; }
+    bool IsShapeTensor() const
+    {
+      return tensor_type_ == TensorType::SHAPE_TENSOR;
+    }
 
     // Specifies whether the input uses a non-linear IO format
-    bool IsNonLinearFormatIo() const { return is_non_linear_format_io_; }
+    bool IsNonLinearFormatIo() const
+    {
+      return tensor_type_ == TensorType::NON_LINEAR;
+    }
 
     // Set the input to be treated as a shape tensor.
-    Status SetIsShapeTensor(const bool is_shape_tensor);
+    Status SetIsShapeTensor();
 
     // Set the input uses a non-linear IO format
-    Status SetIsNonLinearFormatIo(const bool is_non_linear_format_io_);
+    Status SetIsNonLinearFormatIo();
 
     // The data for this input.
     const std::shared_ptr<Memory>& Data() const { return data_; }
@@ -246,8 +261,7 @@ class InferenceRequest {
     std::vector<int64_t> original_shape_;
     std::vector<int64_t> shape_;
     std::vector<int64_t> shape_with_batch_dim_;
-    bool is_shape_tensor_;
-    bool is_non_linear_format_io_;
+    TensorType tensor_type_;
     std::shared_ptr<Memory> data_;
 
     bool has_host_policy_specific_data_;

diff --git a/src/model_config_utils.cc b/src/model_config_utils.cc
@@ -1712,6 +1712,26 @@ ValidateInstanceGroup(
   return Status::Success;
 }
 
+Status
+ValidateNonLinearFormatIO(
+    const inference::ModelInput& io, const std::string& platform, bool is_input)
+{
+  if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) {
+    return Status(
+        Status::Code::INVALID_ARG,
+        "Non-linear IO format is only supported for the TensorRT platform");
+  }
+
+  if (io.is_non_linear_format_io() && (io.dims_size() != 3)) {
+    std::string io_type = is_input ? "input" : "output";
+    return Status(
+        Status::Code::INVALID_ARG,
+        "Non-linear IO format " + io_type + " requires 3 dims");
+  }
+
+  return Status::Success;
+}
+
 Status
 ValidateModelInput(
     const inference::ModelInput& io, int32_t max_batch_size,
@@ -1732,16 +1752,7 @@ ValidateModelInput(
         "shape tensors are only supported for TensorRT platform");
   }
 
-  if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Non-linear IO format is only supported for the TensorRT platform");
-  }
-
-  if (io.is_non_linear_format_io() && (io.dims_size() != 3)) {
-    return Status(
-        Status::Code::INVALID_ARG, "Non-linear IO format input require 3 dims");
-  }
+  RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/));
 
   return Status::Success;
 }
@@ -1779,11 +1790,7 @@ ValidateModelOutput(
         "shape tensors are only supported for TensorRT platform");
   }
 
-  if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Non-linear IO format is only supported for the TensorRT platform");
-  }
+  RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/));
 
   return Status::Success;
 }

diff --git a/src/model_config_utils.h b/src/model_config_utils.h
@@ -172,6 +172,17 @@ Status ValidateInstanceGroup(
 /// is not valid.
 Status ValidateModelIOConfig(const inference::ModelConfig& config);
 
+/// Validate that Non-linear format inputs or outputs are specified correctly
+/// in a model configuration.
+/// \param io The model input.
+/// \param platform The platform name
+/// \param is_input Specifies whether it is an input or an output.
+/// \return The error status. A non-OK status indicates the configuration
+/// is not valid.
+Status ValidateNonLinearFormatIO(
+    const inference::ModelInput& io, const std::string& platform,
+    bool is_input);
+
 /// Validate that input is specified correctly in a model
 /// configuration.
 /// \param io The model input.