Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…into yinggeh-DLIS-6657-client-input-byte-size-check
  • Loading branch information
yinggeh committed Jul 27, 2024
2 parents 73d374e + 9ed1544 commit 557af46
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 27 deletions.
7 changes: 6 additions & 1 deletion python/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
exit_on_error=True,
strict_model_config=False,
model_control_mode=tritonserver.ModelControlMode.EXPLICIT,
exit_timeout=10,
exit_timeout=30,
)


Expand Down Expand Up @@ -357,6 +357,11 @@ def test_stop(self):
{
"backend": "python",
"parameters": {"decoupled": {"string_value": "False"}},
# Keep instance count low for fast startup/cleanup.
# Alternatively can use KIND_CPU here, but keeping gpus/count explicit.
"instance_group": [
{"kind": "KIND_GPU", "gpus": [0], "count": 1}
],
}
)
},
Expand Down
62 changes: 41 additions & 21 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,17 @@ InferenceRequest::Normalize()
for (auto& pr : original_inputs_) {
auto& input = pr.second;
*input.MutableShape() = input.OriginalShape();

const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
if (input_config->is_shape_tensor()) {
// For a shape tensor, mark that the input is a shape tensor.
input.SetIsShapeTensor();
} else if (input_config->is_non_linear_format_io()) {
// If a tensor uses a non-linear IO format, indicate that the input uses
// a non-linear IO format.
input.SetIsNonLinearFormatIo();
}
}
} else {
// Model does support Triton-style batching so each input tensor
Expand All @@ -1025,15 +1036,19 @@ InferenceRequest::Normalize()
batch_size_ = 0;
for (auto& pr : original_inputs_) {
auto& input = pr.second;
const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));

// For a shape tensor, keep the tensor's shape as it is and mark
// that the input is a shape tensor.
const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
if (input_config->is_shape_tensor()) {
*input.MutableShape() = input.OriginalShape();
input.SetIsShapeTensor(true);
input.SetIsShapeTensor();
continue;
} else if (input_config->is_non_linear_format_io()) {
// If a tensor uses a non-linear IO format, indicate that the input uses
// a non-linear IO format.
input.SetIsNonLinearFormatIo();
}

if (input.OriginalShape().size() == 0) {
Expand Down Expand Up @@ -1183,15 +1198,9 @@ InferenceRequest::Normalize()
{
const auto& data_type = input.DType();

// FIXME: Skip byte size validation for TensorRT backend because it breaks
// shape-size assumption. See DLIS-6805 for proper fix for TRT backend
// reformat_free tensors.
bool skip_byte_size_check = false;
constexpr char trt_prefix[] = "tensorrt_";
const std::string& platform = model_raw_->Config().platform();
skip_byte_size_check |= (platform.rfind(trt_prefix) == 0);

if (!skip_byte_size_check) {
// Non-linear IO format input byte size validation will be handled in the
// TensorRT backend.
if (!input.IsNonLinearFormatIo()) {
TRITONSERVER_MemoryType input_memory_type;
// Because Triton expects STRING type to be in special format
// (prepend 4 bytes to specify string length), so need to add all the
Expand All @@ -1201,10 +1210,13 @@ InferenceRequest::Normalize()
input_name, input, model_name, &input_memory_type));
// FIXME: Temporarily skips byte size checks for GPU tensors. See
// DLIS-6820.
skip_byte_size_check |=
(input_memory_type == TRITONSERVER_MEMORY_GPU);
} else {
const auto& input_dims = input.ShapeWithBatchDim();
// Shape tensor with dynamic batching does not introduce a new
// dimension to the tensor but adds an additional value to the 1-D
// array.
const std::vector<int64_t>& input_dims =
input.IsShapeTensor() ? input.OriginalShape()
: input.ShapeWithBatchDim();
int64_t expected_byte_size = INT_MAX;
expected_byte_size =
triton::common::GetByteSize(data_type, input_dims);
Expand Down Expand Up @@ -1524,7 +1536,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
// Input
//
InferenceRequest::Input::Input()
: is_shape_tensor_(false), data_(new MemoryReference),
: tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}
Expand All @@ -1533,16 +1545,17 @@ InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const int64_t* shape, const uint64_t dim_count)
: name_(name), datatype_(datatype),
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
data_(new MemoryReference), has_host_policy_specific_data_(false)
original_shape_(shape, shape + dim_count),
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}

InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const std::vector<int64_t>& shape)
: name_(name), datatype_(datatype), original_shape_(shape),
is_shape_tensor_(false), data_(new MemoryReference),
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}
Expand All @@ -1558,9 +1571,16 @@ InferenceRequest::Input::SetMetadata(
}

Status
InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
InferenceRequest::Input::SetIsShapeTensor()
{
tensor_type_ = TensorType::SHAPE_TENSOR;
return Status::Success;
}

Status
InferenceRequest::Input::SetIsNonLinearFormatIo()
{
is_shape_tensor_ = is_shape_tensor;
tensor_type_ = TensorType::NON_LINEAR;
return Status::Success;
}

Expand Down
20 changes: 17 additions & 3 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class InferenceRequest {
// Input tensor
class Input {
public:
enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR };

Input();
Input(
const std::string& name, const inference::DataType datatype,
Expand Down Expand Up @@ -134,10 +136,22 @@ class InferenceRequest {
}

// Whether or not the input is a tensorrt shape tensor
bool IsShapeTensor() const { return is_shape_tensor_; }
bool IsShapeTensor() const
{
return tensor_type_ == TensorType::SHAPE_TENSOR;
}

// Specifies whether the input uses a non-linear IO format
bool IsNonLinearFormatIo() const
{
return tensor_type_ == TensorType::NON_LINEAR;
}

// Set the input to be treated as a shape tensor.
Status SetIsShapeTensor(const bool is_shape_tensor);
Status SetIsShapeTensor();

// Set the input uses a non-linear IO format
Status SetIsNonLinearFormatIo();

// The data for this input.
const std::shared_ptr<Memory>& Data() const { return data_; }
Expand Down Expand Up @@ -240,7 +254,7 @@ class InferenceRequest {
std::vector<int64_t> original_shape_;
std::vector<int64_t> shape_;
std::vector<int64_t> shape_with_batch_dim_;
bool is_shape_tensor_;
TensorType tensor_type_;
std::shared_ptr<Memory> data_;

bool has_host_policy_specific_data_;
Expand Down
32 changes: 32 additions & 0 deletions src/model_config_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,34 @@ ValidateIOShape(
return Status::Success;
}

/// Validate that Non-linear format inputs or outputs are specified correctly
/// in a model configuration.
template <class ModelIO>
Status
ValidateNonLinearFormatIO(
const ModelIO& io, const std::string& platform, bool is_input)
{
if (!io.is_non_linear_format_io()) {
// Nothing to validate as the tensor is not non-linear format.
return Status::Success;
}

if (platform != kTensorRTPlanPlatform) {
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format is only supported for the TensorRT platform");
}

if (io.dims_size() != 3) {
std::string io_type = is_input ? "input" : "output";
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format " + io_type + " requires 3 dims");
}

return Status::Success;
}

} // namespace

Status
Expand Down Expand Up @@ -1732,6 +1760,8 @@ ValidateModelInput(
"shape tensors are only supported for TensorRT platform");
}

RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/));

return Status::Success;
}

Expand Down Expand Up @@ -1768,6 +1798,8 @@ ValidateModelOutput(
"shape tensors are only supported for TensorRT platform");
}

RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/));

return Status::Success;
}

Expand Down
5 changes: 3 additions & 2 deletions src/test/response_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const int64_t* shape, const uint64_t dim_count)
: name_(name), datatype_(datatype),
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
data_(new MemoryReference), has_host_policy_specific_data_(false)
original_shape_(shape, shape + dim_count),
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}

Expand Down

0 comments on commit 557af46

Please sign in to comment.