diff --git a/src/constants.h b/src/constants.h index 8415f8ee9..119d1e9d2 100644 --- a/src/constants.h +++ b/src/constants.h @@ -1,4 +1,4 @@ -// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -92,7 +92,6 @@ constexpr uint64_t NANOS_PER_SECOND = 1000000000; constexpr uint64_t NANOS_PER_MILLIS = 1000000; constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX; constexpr uint64_t SEQUENCE_IDLE_DEFAULT_MICROSECONDS = 1000 * 1000; -constexpr size_t STRING_CORRELATION_ID_MAX_LENGTH_BYTES = 128; constexpr size_t CUDA_IPC_STRUCT_SIZE = 64; #ifdef TRITON_ENABLE_METRICS diff --git a/src/infer_request.cc b/src/infer_request.cc index 0d0c80a0d..83b3bb872 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -1228,11 +1228,16 @@ InferenceRequest::Normalize() } } } + + if (model_config.has_sequence_batching()) { + RETURN_IF_ERROR(ValidateCorrelationId()); + } + return Status::Success; } Status -InferenceRequest::ValidateRequestInputs() +InferenceRequest::ValidateRequestInputs() const { const inference::ModelConfig& model_config = model_raw_->Config(); if ((original_inputs_.size() > (size_t)model_config.input_size()) || @@ -1404,6 +1409,59 @@ InferenceRequest::ValidateBytesInputs( return Status::Success; } +Status +InferenceRequest::ValidateCorrelationId() const +{ + const inference::ModelConfig& model_config = model_raw_->Config(); + const std::string& model_name = ModelName(); + std::string correlation_id_tensor_name; + inference::DataType correlation_id_datatype; + + RETURN_IF_ERROR(GetTypedSequenceControlProperties( + model_config.sequence_batching(), model_config.name(), + inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_CORRID, + false /* required */, &correlation_id_tensor_name, + &correlation_id_datatype)); + + // Make sure request correlation ID type matches model configuration. + if (!correlation_id_tensor_name.empty()) { + const auto& correlation_id = CorrelationId(); + bool dtypes_match = true; + std::string request_corrid_datatype; + if ((correlation_id.Type() == + InferenceRequest::SequenceId::DataType::STRING) && + (correlation_id_datatype != inference::DataType::TYPE_STRING)) { + dtypes_match = false; + request_corrid_datatype = triton::common::DataTypeToProtocolString( + inference::DataType::TYPE_STRING); + } else if ( + (correlation_id.Type() == + InferenceRequest::SequenceId::DataType::UINT64) && + ((correlation_id_datatype != inference::DataType::TYPE_UINT64) && + (correlation_id_datatype != inference::DataType::TYPE_INT64) && + (correlation_id_datatype != inference::DataType::TYPE_UINT32) && + (correlation_id_datatype != inference::DataType::TYPE_INT32))) { + dtypes_match = false; + request_corrid_datatype = triton::common::DataTypeToProtocolString( + inference::DataType::TYPE_UINT64); + } + + if (!dtypes_match) { + return Status( + Status::Code::INVALID_ARG, + LogRequest() + "sequence batching control '" + + correlation_id_tensor_name + "' data-type is '" + + request_corrid_datatype + "', but model '" + model_name + + "' expects '" + + std::string(triton::common::DataTypeToProtocolString( + correlation_id_datatype)) + + "'"); + } + } + + return Status::Success; +} + #ifdef TRITON_ENABLE_STATS void diff --git a/src/infer_request.h b/src/infer_request.h index 38c89ed63..e9bfa49bc 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -771,13 +771,15 @@ class InferenceRequest { Status Normalize(); // Helper for validating Inputs - Status ValidateRequestInputs(); + Status ValidateRequestInputs() const; Status ValidateBytesInputs( const std::string& input_id, const Input& input, const std::string& model_name, TRITONSERVER_MemoryType* buffer_memory_type) const; + Status ValidateCorrelationId() const; + // Helpers for pending request metrics void IncrementPendingRequestCount(); void DecrementPendingRequestCount(); diff --git a/src/sequence_batch_scheduler/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler/sequence_batch_scheduler.cc index 74314e7ab..45e9c037c 100644 --- a/src/sequence_batch_scheduler/sequence_batch_scheduler.cc +++ b/src/sequence_batch_scheduler/sequence_batch_scheduler.cc @@ -1,4 +1,4 @@ -// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -1343,9 +1343,9 @@ SequenceBatch::SetControlTensors( auto& seq_corr_id = seq_slot_corrid_override_; size_t size_p = triton::common::GetDataTypeByteSize(seq_corr_id->DType()); if (seq_corr_id->DType() == inference::DataType::TYPE_STRING) { - // 4 bytes for length of string plus pre-defined max string correlation id - // length in bytes - size_p = 4 + triton::core::STRING_CORRELATION_ID_MAX_LENGTH_BYTES; + // 4 bytes for length of string plus string correlation id length in + // bytes. + size_p = 4 + corrid.StringValue().length(); } TRITONSERVER_MemoryType memory_type;