diff --git a/python/test/test_api.py b/python/test/test_api.py index 93124c470..c15847aab 100644 --- a/python/test/test_api.py +++ b/python/test/test_api.py @@ -70,7 +70,7 @@ exit_on_error=True, strict_model_config=False, model_control_mode=tritonserver.ModelControlMode.EXPLICIT, - exit_timeout=10, + exit_timeout=30, ) @@ -357,6 +357,11 @@ def test_stop(self): { "backend": "python", "parameters": {"decoupled": {"string_value": "False"}}, + # Keep instance count low for fast startup/cleanup. + # Alternatively can use KIND_CPU here, but keeping gpus/count explicit. + "instance_group": [ + {"kind": "KIND_GPU", "gpus": [0], "count": 1} + ], } ) }, diff --git a/src/infer_request.cc b/src/infer_request.cc index 3d93de31d..823d5fd29 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1016,6 +1016,17 @@ InferenceRequest::Normalize() for (auto& pr : original_inputs_) { auto& input = pr.second; *input.MutableShape() = input.OriginalShape(); + + const inference::ModelInput* input_config; + RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config)); + if (input_config->is_shape_tensor()) { + // For a shape tensor, mark that the input is a shape tensor. + input.SetIsShapeTensor(); + } else if (input_config->is_non_linear_format_io()) { + // If a tensor uses a non-linear IO format, indicate that the input uses + // a non-linear IO format. + input.SetIsNonLinearFormatIo(); + } } } else { // Model does support Triton-style batching so each input tensor @@ -1025,15 +1036,19 @@ InferenceRequest::Normalize() batch_size_ = 0; for (auto& pr : original_inputs_) { auto& input = pr.second; + const inference::ModelInput* input_config; + RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config)); // For a shape tensor, keep the tensor's shape as it is and mark // that the input is a shape tensor. - const inference::ModelInput* input_config; - RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config)); if (input_config->is_shape_tensor()) { *input.MutableShape() = input.OriginalShape(); - input.SetIsShapeTensor(true); + input.SetIsShapeTensor(); continue; + } else if (input_config->is_non_linear_format_io()) { + // If a tensor uses a non-linear IO format, indicate that the input uses + // a non-linear IO format. + input.SetIsNonLinearFormatIo(); } if (input.OriginalShape().size() == 0) { @@ -1183,15 +1198,9 @@ InferenceRequest::Normalize() { const auto& data_type = input.DType(); - // FIXME: Skip byte size validation for TensorRT backend because it breaks - // shape-size assumption. See DLIS-6805 for proper fix for TRT backend - // reformat_free tensors. - bool skip_byte_size_check = false; - constexpr char trt_prefix[] = "tensorrt_"; - const std::string& platform = model_raw_->Config().platform(); - skip_byte_size_check |= (platform.rfind(trt_prefix) == 0); - - if (!skip_byte_size_check) { + // Non-linear IO format input byte size validation will be handled in the + // TensorRT backend. + if (!input.IsNonLinearFormatIo()) { TRITONSERVER_MemoryType input_memory_type; // Because Triton expects STRING type to be in special format // (prepend 4 bytes to specify string length), so need to add all the @@ -1201,10 +1210,13 @@ InferenceRequest::Normalize() input_name, input, model_name, &input_memory_type)); // FIXME: Temporarily skips byte size checks for GPU tensors. See // DLIS-6820. - skip_byte_size_check |= - (input_memory_type == TRITONSERVER_MEMORY_GPU); } else { - const auto& input_dims = input.ShapeWithBatchDim(); + // Shape tensor with dynamic batching does not introduce a new + // dimension to the tensor but adds an additional value to the 1-D + // array. + const std::vector& input_dims = + input.IsShapeTensor() ? input.OriginalShape() + : input.ShapeWithBatchDim(); int64_t expected_byte_size = INT_MAX; expected_byte_size = triton::common::GetByteSize(data_type, input_dims); @@ -1524,7 +1536,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter) // Input // InferenceRequest::Input::Input() - : is_shape_tensor_(false), data_(new MemoryReference), + : tensor_type_(TensorType::TENSOR), data_(new MemoryReference), has_host_policy_specific_data_(false) { } @@ -1533,8 +1545,9 @@ InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const int64_t* shape, const uint64_t dim_count) : name_(name), datatype_(datatype), - original_shape_(shape, shape + dim_count), is_shape_tensor_(false), - data_(new MemoryReference), has_host_policy_specific_data_(false) + original_shape_(shape, shape + dim_count), + tensor_type_(TensorType::TENSOR), data_(new MemoryReference), + has_host_policy_specific_data_(false) { } @@ -1542,7 +1555,7 @@ InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const std::vector& shape) : name_(name), datatype_(datatype), original_shape_(shape), - is_shape_tensor_(false), data_(new MemoryReference), + tensor_type_(TensorType::TENSOR), data_(new MemoryReference), has_host_policy_specific_data_(false) { } @@ -1558,9 +1571,16 @@ InferenceRequest::Input::SetMetadata( } Status -InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor) +InferenceRequest::Input::SetIsShapeTensor() +{ + tensor_type_ = TensorType::SHAPE_TENSOR; + return Status::Success; +} + +Status +InferenceRequest::Input::SetIsNonLinearFormatIo() { - is_shape_tensor_ = is_shape_tensor; + tensor_type_ = TensorType::NON_LINEAR; return Status::Success; } diff --git a/src/infer_request.h b/src/infer_request.h index a38b141af..38c89ed63 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -82,6 +82,8 @@ class InferenceRequest { // Input tensor class Input { public: + enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR }; + Input(); Input( const std::string& name, const inference::DataType datatype, @@ -134,10 +136,22 @@ class InferenceRequest { } // Whether or not the input is a tensorrt shape tensor - bool IsShapeTensor() const { return is_shape_tensor_; } + bool IsShapeTensor() const + { + return tensor_type_ == TensorType::SHAPE_TENSOR; + } + + // Specifies whether the input uses a non-linear IO format + bool IsNonLinearFormatIo() const + { + return tensor_type_ == TensorType::NON_LINEAR; + } // Set the input to be treated as a shape tensor. - Status SetIsShapeTensor(const bool is_shape_tensor); + Status SetIsShapeTensor(); + + // Set the input uses a non-linear IO format + Status SetIsNonLinearFormatIo(); // The data for this input. const std::shared_ptr& Data() const { return data_; } @@ -240,7 +254,7 @@ class InferenceRequest { std::vector original_shape_; std::vector shape_; std::vector shape_with_batch_dim_; - bool is_shape_tensor_; + TensorType tensor_type_; std::shared_ptr data_; bool has_host_policy_specific_data_; diff --git a/src/model_config_utils.cc b/src/model_config_utils.cc index 68859abdf..cc31f666c 100644 --- a/src/model_config_utils.cc +++ b/src/model_config_utils.cc @@ -418,6 +418,34 @@ ValidateIOShape( return Status::Success; } +/// Validate that Non-linear format inputs or outputs are specified correctly +/// in a model configuration. +template +Status +ValidateNonLinearFormatIO( + const ModelIO& io, const std::string& platform, bool is_input) +{ + if (!io.is_non_linear_format_io()) { + // Nothing to validate as the tensor is not non-linear format. + return Status::Success; + } + + if (platform != kTensorRTPlanPlatform) { + return Status( + Status::Code::INVALID_ARG, + "Non-linear IO format is only supported for the TensorRT platform"); + } + + if (io.dims_size() != 3) { + std::string io_type = is_input ? "input" : "output"; + return Status( + Status::Code::INVALID_ARG, + "Non-linear IO format " + io_type + " requires 3 dims"); + } + + return Status::Success; +} + } // namespace Status @@ -1732,6 +1760,8 @@ ValidateModelInput( "shape tensors are only supported for TensorRT platform"); } + RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/)); + return Status::Success; } @@ -1768,6 +1798,8 @@ ValidateModelOutput( "shape tensors are only supported for TensorRT platform"); } + RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/)); + return Status::Success; } diff --git a/src/test/response_cache_test.cc b/src/test/response_cache_test.cc index dad7d0faf..8ffb85bd6 100644 --- a/src/test/response_cache_test.cc +++ b/src/test/response_cache_test.cc @@ -70,8 +70,9 @@ InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const int64_t* shape, const uint64_t dim_count) : name_(name), datatype_(datatype), - original_shape_(shape, shape + dim_count), is_shape_tensor_(false), - data_(new MemoryReference), has_host_policy_specific_data_(false) + original_shape_(shape, shape + dim_count), + tensor_type_(TensorType::TENSOR), data_(new MemoryReference), + has_host_policy_specific_data_(false) { }