Skip to content

Commit

Permalink
Enhancements
Browse files Browse the repository at this point in the history
  • Loading branch information
pskiran1 committed Jul 18, 2024
1 parent efc3a32 commit c455789
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 55 deletions.
56 changes: 23 additions & 33 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1016,17 +1016,15 @@ InferenceRequest::Normalize()
auto& input = pr.second;
*input.MutableShape() = input.OriginalShape();

// For a shape tensor, mark that the input is a shape tensor.
const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
if (input_config->is_shape_tensor()) {
input.SetIsShapeTensor(true);
}

// If a tensor uses a non-linear IO format, indicate that the input uses a
// non-linear IO format.
if (input_config->is_non_linear_format_io()) {
input.SetIsNonLinearFormatIo(true);
// For a shape tensor, mark that the input is a shape tensor.
input.SetIsShapeTensor();
} else if (input_config->is_non_linear_format_io()) {
// If a tensor uses a non-linear IO format, indicate that the input uses
// a non-linear IO format.
input.SetIsNonLinearFormatIo();
}
}
} else {
Expand All @@ -1040,18 +1038,16 @@ InferenceRequest::Normalize()
const inference::ModelInput* input_config;
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));

// If a tensor uses a non-linear IO format, indicate that the input uses a
// non-linear IO format.
if (input_config->is_non_linear_format_io()) {
input.SetIsNonLinearFormatIo(true);
}

// For a shape tensor, keep the tensor's shape as it is and mark
// that the input is a shape tensor.
if (input_config->is_shape_tensor()) {
*input.MutableShape() = input.OriginalShape();
input.SetIsShapeTensor(true);
input.SetIsShapeTensor();
continue;
} else if (input_config->is_non_linear_format_io()) {
// If a tensor uses a non-linear IO format, indicate that the input uses
// a non-linear IO format.
input.SetIsNonLinearFormatIo();
}

if (input.OriginalShape().size() == 0) {
Expand Down Expand Up @@ -1202,7 +1198,7 @@ InferenceRequest::Normalize()
const auto& data_type = input.DType();

// Non-linear IO format input byte size validation will be handled in the
// backend.
// TensorRT backend.
if (!input.IsNonLinearFormatIo()) {
TRITONSERVER_MemoryType input_memory_type;
// Because Triton expects STRING type to be in special format
Expand All @@ -1215,12 +1211,7 @@ InferenceRequest::Normalize()
// FIXME: Temporarily skips byte size checks for GPU tensors. See
// DLIS-6820.
} else {
// Shape tensor with dynamic batching does not introduce a new
// dimension to the tensor but adds an additional value to the 1-D
// array.
const std::vector<int64_t>& input_dims =
input.IsShapeTensor() ? input.OriginalShape()
: input.ShapeWithBatchDim();
const std::vector<int64_t>& input_dims = input.ShapeWithBatchDim();
int64_t expected_byte_size = INT_MAX;
expected_byte_size =
triton::common::GetByteSize(data_type, input_dims);
Expand Down Expand Up @@ -1523,17 +1514,17 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
// Input
//
InferenceRequest::Input::Input()
: is_shape_tensor_(false), is_non_linear_format_io_(false),
data_(new MemoryReference), has_host_policy_specific_data_(false)
: tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}

InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const int64_t* shape, const uint64_t dim_count)
: name_(name), datatype_(datatype),
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
is_non_linear_format_io_(false), data_(new MemoryReference),
original_shape_(shape, shape + dim_count),
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}
Expand All @@ -1542,8 +1533,8 @@ InferenceRequest::Input::Input(
const std::string& name, const inference::DataType datatype,
const std::vector<int64_t>& shape)
: name_(name), datatype_(datatype), original_shape_(shape),
is_shape_tensor_(false), is_non_linear_format_io_(false),
data_(new MemoryReference), has_host_policy_specific_data_(false)
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
has_host_policy_specific_data_(false)
{
}

Expand All @@ -1558,17 +1549,16 @@ InferenceRequest::Input::SetMetadata(
}

Status
InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
InferenceRequest::Input::SetIsShapeTensor()
{
is_shape_tensor_ = is_shape_tensor;
tensor_type_ = TensorType::SHAPE_TENSOR;
return Status::Success;
}

Status
InferenceRequest::Input::SetIsNonLinearFormatIo(
const bool is_non_linear_format_io)
InferenceRequest::Input::SetIsNonLinearFormatIo()
{
is_non_linear_format_io_ = is_non_linear_format_io;
tensor_type_ = TensorType::NON_LINEAR;
return Status::Success;
}

Expand Down
28 changes: 21 additions & 7 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class InferenceRequest {
// Input tensor
class Input {
public:
enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR };

Input();
Input(
const std::string& name, const inference::DataType datatype,
Expand Down Expand Up @@ -120,7 +122,14 @@ class InferenceRequest {
// into batch + shape.
const std::vector<int64_t>& ShapeWithBatchDim() const
{
return shape_with_batch_dim_;
if (tensor_type_ == TensorType::SHAPE_TENSOR) {
// Shape tensor with dynamic batching does not introduce a new
// dimension to the tensor but adds an additional value to the 1-D
// array.
return original_shape_;
} else {
return shape_with_batch_dim_;
}
}
std::vector<int64_t>* MutableShapeWithBatchDim()
{
Expand All @@ -134,16 +143,22 @@ class InferenceRequest {
}

// Whether or not the input is a tensorrt shape tensor
bool IsShapeTensor() const { return is_shape_tensor_; }
bool IsShapeTensor() const
{
return tensor_type_ == TensorType::SHAPE_TENSOR;
}

// Specifies whether the input uses a non-linear IO format
bool IsNonLinearFormatIo() const { return is_non_linear_format_io_; }
bool IsNonLinearFormatIo() const
{
return tensor_type_ == TensorType::NON_LINEAR;
}

// Set the input to be treated as a shape tensor.
Status SetIsShapeTensor(const bool is_shape_tensor);
Status SetIsShapeTensor();

// Set the input uses a non-linear IO format
Status SetIsNonLinearFormatIo(const bool is_non_linear_format_io_);
Status SetIsNonLinearFormatIo();

// The data for this input.
const std::shared_ptr<Memory>& Data() const { return data_; }
Expand Down Expand Up @@ -246,8 +261,7 @@ class InferenceRequest {
std::vector<int64_t> original_shape_;
std::vector<int64_t> shape_;
std::vector<int64_t> shape_with_batch_dim_;
bool is_shape_tensor_;
bool is_non_linear_format_io_;
TensorType tensor_type_;
std::shared_ptr<Memory> data_;

bool has_host_policy_specific_data_;
Expand Down
37 changes: 22 additions & 15 deletions src/model_config_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1712,6 +1712,26 @@ ValidateInstanceGroup(
return Status::Success;
}

Status
ValidateNonLinearFormatIO(
const inference::ModelInput& io, const std::string& platform, bool is_input)
{
if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) {
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format is only supported for the TensorRT platform");
}

if (io.is_non_linear_format_io() && (io.dims_size() != 3)) {
std::string io_type = is_input ? "input" : "output";
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format " + io_type + " requires 3 dims");
}

return Status::Success;
}

Status
ValidateModelInput(
const inference::ModelInput& io, int32_t max_batch_size,
Expand All @@ -1732,16 +1752,7 @@ ValidateModelInput(
"shape tensors are only supported for TensorRT platform");
}

if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) {
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format is only supported for the TensorRT platform");
}

if (io.is_non_linear_format_io() && (io.dims_size() != 3)) {
return Status(
Status::Code::INVALID_ARG, "Non-linear IO format input require 3 dims");
}
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/));

return Status::Success;
}
Expand Down Expand Up @@ -1779,11 +1790,7 @@ ValidateModelOutput(
"shape tensors are only supported for TensorRT platform");
}

if ((platform != kTensorRTPlanPlatform) && io.is_non_linear_format_io()) {
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format is only supported for the TensorRT platform");
}
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/));

return Status::Success;
}
Expand Down
11 changes: 11 additions & 0 deletions src/model_config_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,17 @@ Status ValidateInstanceGroup(
/// is not valid.
Status ValidateModelIOConfig(const inference::ModelConfig& config);

/// Validate that Non-linear format inputs or outputs are specified correctly
/// in a model configuration.
/// \param io The model input.
/// \param platform The platform name
/// \param is_input Specifies whether it is an input or an output.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status ValidateNonLinearFormatIO(
const inference::ModelInput& io, const std::string& platform,
bool is_input);

/// Validate that input is specified correctly in a model
/// configuration.
/// \param io The model input.
Expand Down

0 comments on commit c455789

Please sign in to comment.