From c4557893092ee45f61ad5197a697ad5e7238e0ac Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Thu, 18 Jul 2024 17:26:37 +0530 Subject: [PATCH] Enhancements --- src/infer_request.cc | 56 ++++++++++++++++----------------------- src/infer_request.h | 28 +++++++++++++++----- src/model_config_utils.cc | 37 +++++++++++++++----------- src/model_config_utils.h | 11 ++++++++ 4 files changed, 77 insertions(+), 55 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 61f3624ca..67860dd10 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1016,17 +1016,15 @@ InferenceRequest::Normalize() auto& input = pr.second; *input.MutableShape() = input.OriginalShape(); - // For a shape tensor, mark that the input is a shape tensor. const inference::ModelInput* input_config; RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config)); if (input_config->is_shape_tensor()) { - input.SetIsShapeTensor(true); - } - - // If a tensor uses a non-linear IO format, indicate that the input uses a - // non-linear IO format. - if (input_config->is_non_linear_format_io()) { - input.SetIsNonLinearFormatIo(true); + // For a shape tensor, mark that the input is a shape tensor. + input.SetIsShapeTensor(); + } else if (input_config->is_non_linear_format_io()) { + // If a tensor uses a non-linear IO format, indicate that the input uses + // a non-linear IO format. + input.SetIsNonLinearFormatIo(); } } } else { @@ -1040,18 +1038,16 @@ InferenceRequest::Normalize() const inference::ModelInput* input_config; RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config)); - // If a tensor uses a non-linear IO format, indicate that the input uses a - // non-linear IO format. - if (input_config->is_non_linear_format_io()) { - input.SetIsNonLinearFormatIo(true); - } - // For a shape tensor, keep the tensor's shape as it is and mark // that the input is a shape tensor. if (input_config->is_shape_tensor()) { *input.MutableShape() = input.OriginalShape(); - input.SetIsShapeTensor(true); + input.SetIsShapeTensor(); continue; + } else if (input_config->is_non_linear_format_io()) { + // If a tensor uses a non-linear IO format, indicate that the input uses + // a non-linear IO format. + input.SetIsNonLinearFormatIo(); } if (input.OriginalShape().size() == 0) { @@ -1202,7 +1198,7 @@ InferenceRequest::Normalize() const auto& data_type = input.DType(); // Non-linear IO format input byte size validation will be handled in the - // backend. + // TensorRT backend. if (!input.IsNonLinearFormatIo()) { TRITONSERVER_MemoryType input_memory_type; // Because Triton expects STRING type to be in special format @@ -1215,12 +1211,7 @@ InferenceRequest::Normalize() // FIXME: Temporarily skips byte size checks for GPU tensors. See // DLIS-6820. } else { - // Shape tensor with dynamic batching does not introduce a new - // dimension to the tensor but adds an additional value to the 1-D - // array. - const std::vector& input_dims = - input.IsShapeTensor() ? input.OriginalShape() - : input.ShapeWithBatchDim(); + const std::vector& input_dims = input.ShapeWithBatchDim(); int64_t expected_byte_size = INT_MAX; expected_byte_size = triton::common::GetByteSize(data_type, input_dims); @@ -1523,8 +1514,8 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter) // Input // InferenceRequest::Input::Input() - : is_shape_tensor_(false), is_non_linear_format_io_(false), - data_(new MemoryReference), has_host_policy_specific_data_(false) + : tensor_type_(TensorType::TENSOR), data_(new MemoryReference), + has_host_policy_specific_data_(false) { } @@ -1532,8 +1523,8 @@ InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const int64_t* shape, const uint64_t dim_count) : name_(name), datatype_(datatype), - original_shape_(shape, shape + dim_count), is_shape_tensor_(false), - is_non_linear_format_io_(false), data_(new MemoryReference), + original_shape_(shape, shape + dim_count), + tensor_type_(TensorType::TENSOR), data_(new MemoryReference), has_host_policy_specific_data_(false) { } @@ -1542,8 +1533,8 @@ InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const std::vector& shape) : name_(name), datatype_(datatype), original_shape_(shape), - is_shape_tensor_(false), is_non_linear_format_io_(false), - data_(new MemoryReference), has_host_policy_specific_data_(false) + tensor_type_(TensorType::TENSOR), data_(new MemoryReference), + has_host_policy_specific_data_(false) { } @@ -1558,17 +1549,16 @@ InferenceRequest::Input::SetMetadata( } Status -InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor) +InferenceRequest::Input::SetIsShapeTensor() { - is_shape_tensor_ = is_shape_tensor; + tensor_type_ = TensorType::SHAPE_TENSOR; return Status::Success; } Status -InferenceRequest::Input::SetIsNonLinearFormatIo( - const bool is_non_linear_format_io) +InferenceRequest::Input::SetIsNonLinearFormatIo() { - is_non_linear_format_io_ = is_non_linear_format_io; + tensor_type_ = TensorType::NON_LINEAR; return Status::Success; } diff --git a/src/infer_request.h b/src/infer_request.h index 74f65f4c0..0d7283682 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -82,6 +82,8 @@ class InferenceRequest { // Input tensor class Input { public: + enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR }; + Input(); Input( const std::string& name, const inference::DataType datatype, @@ -120,7 +122,14 @@ class InferenceRequest { // into batch + shape. const std::vector& ShapeWithBatchDim() const { - return shape_with_batch_dim_; + if (tensor_type_ == TensorType::SHAPE_TENSOR) { + // Shape tensor with dynamic batching does not introduce a new + // dimension to the tensor but adds an additional value to the 1-D + // array. + return original_shape_; + } else { + return shape_with_batch_dim_; + } } std::vector* MutableShapeWithBatchDim() { @@ -134,16 +143,22 @@ class InferenceRequest { } // Whether or not the input is a tensorrt shape tensor - bool IsShapeTensor() const { return is_shape_tensor_; } + bool IsShapeTensor() const + { + return tensor_type_ == TensorType::SHAPE_TENSOR; + } // Specifies whether the input uses a non-linear IO format - bool IsNonLinearFormatIo() const { return is_non_linear_format_io_; } + bool IsNonLinearFormatIo() const + { + return tensor_type_ == TensorType::NON_LINEAR; + } // Set the input to be treated as a shape tensor. - Status SetIsShapeTensor(const bool is_shape_tensor); + Status SetIsShapeTensor(); // Set the input uses a non-linear IO format - Status SetIsNonLinearFormatIo(const bool is_non_linear_format_io_); + Status SetIsNonLinearFormatIo(); // The data for this input. const std::shared_ptr& Data() const { return data_; } @@ -246,8 +261,7 @@ class InferenceRequest { std::vector original_shape_; std::vector shape_; std::vector shape_with_batch_dim_; - bool is_shape_tensor_; - bool is_non_linear_format_io_; + TensorType tensor_type_; std::shared_ptr data_; bool has_host_policy_specific_data_; diff --git a/src/model_config_utils.cc b/src/model_config_utils.cc index a1ab9cbe6..737b1eb6d 100644 --- a/src/model_config_utils.cc +++ b/src/model_config_utils.cc @@ -1712,6 +1712,26 @@ ValidateInstanceGroup( return Status::Success; } +Status +ValidateNonLinearFormatIO( + const inference::ModelInput& io, const std::string& platform, bool is_input) +{ + if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) { + return Status( + Status::Code::INVALID_ARG, + "Non-linear IO format is only supported for the TensorRT platform"); + } + + if (io.is_non_linear_format_io() && (io.dims_size() != 3)) { + std::string io_type = is_input ? "input" : "output"; + return Status( + Status::Code::INVALID_ARG, + "Non-linear IO format " + io_type + " requires 3 dims"); + } + + return Status::Success; +} + Status ValidateModelInput( const inference::ModelInput& io, int32_t max_batch_size, @@ -1732,16 +1752,7 @@ ValidateModelInput( "shape tensors are only supported for TensorRT platform"); } - if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) { - return Status( - Status::Code::INVALID_ARG, - "Non-linear IO format is only supported for the TensorRT platform"); - } - - if (io.is_non_linear_format_io() && (io.dims_size() != 3)) { - return Status( - Status::Code::INVALID_ARG, "Non-linear IO format input require 3 dims"); - } + RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/)); return Status::Success; } @@ -1779,11 +1790,7 @@ ValidateModelOutput( "shape tensors are only supported for TensorRT platform"); } - if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) { - return Status( - Status::Code::INVALID_ARG, - "Non-linear IO format is only supported for the TensorRT platform"); - } + RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/)); return Status::Success; } diff --git a/src/model_config_utils.h b/src/model_config_utils.h index 8bd9af600..60d6b6ac0 100644 --- a/src/model_config_utils.h +++ b/src/model_config_utils.h @@ -172,6 +172,17 @@ Status ValidateInstanceGroup( /// is not valid. Status ValidateModelIOConfig(const inference::ModelConfig& config); +/// Validate that Non-linear format inputs or outputs are specified correctly +/// in a model configuration. +/// \param io The model input. +/// \param platform The platform name +/// \param is_input Specifies whether it is an input or an output. +/// \return The error status. A non-OK status indicates the configuration +/// is not valid. +Status ValidateNonLinearFormatIO( + const inference::ModelInput& io, const std::string& platform, + bool is_input); + /// Validate that input is specified correctly in a model /// configuration. /// \param io The model input.