Skip to content

Commit

Permalink
feat: Per-model customization of histogram metric buckets (#405)
Browse files Browse the repository at this point in the history
  • Loading branch information
yinggeh authored Nov 6, 2024
1 parent 3ad7cad commit 1d18c2b
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 25 deletions.
3 changes: 2 additions & 1 deletion src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ TritonModelInstance::TritonModelInstance(
model_->Server()->ResponseCacheEnabled();
MetricModelReporter::Create(
model_->ModelId(), model_->Version(), id, response_cache_enabled,
model_->IsDecoupled(), model_->Config().metric_tags(), &reporter_);
model_->IsDecoupled(), model_->Config().metric_tags(),
model_->Config().model_metrics(), &reporter_);
}
#endif // TRITON_ENABLE_METRICS
}
Expand Down
1 change: 1 addition & 0 deletions src/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ constexpr char kInitialStateFolder[] = "initial_state";
// Metric names
constexpr char kPendingRequestMetric[] = "inf_pending_request_count";
constexpr char kModelLoadTimeMetric[] = "model_load_time";
constexpr char kFirstResponseHistogram[] = "first_response_histogram";

constexpr uint64_t NANOS_PER_SECOND = 1000000000;
constexpr uint64_t NANOS_PER_MILLIS = 1000000;
Expand Down
2 changes: 1 addition & 1 deletion src/ensemble_scheduler/ensemble_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1477,7 +1477,7 @@ EnsembleScheduler::EnsembleScheduler(
MetricModelReporter::Create(
model_id, 1 /* model_version */, METRIC_REPORTER_ID_CPU,
false /* response_cache_enabled */, is_decoupled, config.metric_tags(),
&metric_reporter_);
config.model_metrics(), &metric_reporter_);
}
#endif // TRITON_ENABLE_METRICS

Expand Down
2 changes: 1 addition & 1 deletion src/infer_response.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ InferenceResponse::UpdateResponseMetrics() const
.count();
if (auto reporter = model_->MetricReporter()) {
reporter->ObserveHistogram(
"first_response_histogram",
kFirstResponseHistogram,
(now_ns - infer_start_ns_) / NANOS_PER_MILLIS);
}
}
Expand Down
39 changes: 32 additions & 7 deletions src/metric_model_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ namespace triton { namespace core {
//
void
MetricReporterConfig::ParseConfig(
bool response_cache_enabled, bool is_decoupled)
bool response_cache_enabled, bool is_decoupled,
const inference::ModelMetrics& model_metrics)
{
// Global config only for now in config map
auto metrics_config_map = Metrics::ConfigMap();
Expand Down Expand Up @@ -74,6 +75,26 @@ MetricReporterConfig::ParseConfig(
// Set flag to signal to stats aggregator if caching is enabled or not
cache_enabled_ = response_cache_enabled;
is_decoupled_ = is_decoupled;

// Override default histogram options if set in model_metrics.
for (const auto& metric_control : model_metrics.metric_control()) {
const std::string& family_name =
metric_control.metric_identifier().family();

// If family name exists, override with new options.
if (metric_map_.find(family_name) != metric_map_.end()) {
// Copy protobuf RepeatedField to std::vector
const auto& buckets_proto = metric_control.histogram_options().buckets();
const prometheus::Histogram::BucketBoundaries buckets(
buckets_proto.begin(), buckets_proto.end());
histogram_options_[metric_map_.at(family_name)] = buckets;
} else {
// metric_control config may be extended to support backend metrics.
LOG_WARNING << "Metric family '" << family_name
<< "' in 'metric_identifier' is not a customizable metric in "
"Triton core.";
}
}
}

prometheus::Summary::Quantiles
Expand Down Expand Up @@ -120,6 +141,7 @@ MetricModelReporter::Create(
const ModelIdentifier& model_id, const int64_t model_version,
const int device, bool response_cache_enabled, bool is_decoupled,
const triton::common::MetricTagsMap& model_tags,
const inference::ModelMetrics& model_metrics,
std::shared_ptr<MetricModelReporter>* metric_model_reporter)
{
static std::mutex mtx;
Expand Down Expand Up @@ -148,21 +170,22 @@ MetricModelReporter::Create(

metric_model_reporter->reset(new MetricModelReporter(
model_id, model_version, device, response_cache_enabled, is_decoupled,
model_tags));
model_tags, model_metrics));
reporter_map.insert({hash_labels, *metric_model_reporter});
return Status::Success;
}

MetricModelReporter::MetricModelReporter(
const ModelIdentifier& model_id, const int64_t model_version,
const int device, bool response_cache_enabled, bool is_decoupled,
const triton::common::MetricTagsMap& model_tags)
const triton::common::MetricTagsMap& model_tags,
const inference::ModelMetrics& model_metrics)
{
std::map<std::string, std::string> labels;
GetMetricLabels(&labels, model_id, model_version, device, model_tags);

// Parse metrics config to control metric setup and behavior
config_.ParseConfig(response_cache_enabled, is_decoupled);
config_.ParseConfig(response_cache_enabled, is_decoupled, model_metrics);

// Initialize families and metrics
InitializeCounters(labels);
Expand Down Expand Up @@ -282,10 +305,11 @@ void
MetricModelReporter::InitializeHistograms(
const std::map<std::string, std::string>& labels)
{
// Update MetricReporterConfig::metric_map_ for new histograms.
// Only create response metrics if decoupled model to reduce metric output
if (config_.latency_histograms_enabled_) {
if (config_.is_decoupled_) {
histogram_families_["first_response_histogram"] =
histogram_families_[kFirstResponseHistogram] =
&Metrics::FamilyFirstResponseDuration();
}
}
Expand All @@ -294,8 +318,9 @@ MetricModelReporter::InitializeHistograms(
const auto& name = iter.first;
auto family_ptr = iter.second;
if (family_ptr) {
histograms_[name] = CreateMetric<prometheus::Histogram>(
*family_ptr, labels, config_.buckets_);
const auto& buckets = config_.histogram_options_[name];
histograms_[name] =
CreateMetric<prometheus::Histogram>(*family_ptr, labels, buckets);
}
}
}
Expand Down
25 changes: 20 additions & 5 deletions src/metric_model_reporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ struct ModelIdentifier;
struct MetricReporterConfig {
#ifdef TRITON_ENABLE_METRICS
// Parses Metrics::ConfigMap and sets fields if specified
void ParseConfig(bool response_cache_enabled, bool is_decoupled);
void ParseConfig(
bool response_cache_enabled, bool is_decoupled,
const inference::ModelMetrics& model_metrics);
// Parses pairs of quantiles "quantile1:error1, quantile2:error2, ..."
// and overwrites quantiles_ field if successful.
prometheus::Summary::Quantiles ParseQuantiles(std::string options);
Expand All @@ -57,10 +59,12 @@ struct MetricReporterConfig {
bool latency_histograms_enabled_ = false;
// Create and use Summaries for per-model latency related metrics
bool latency_summaries_enabled_ = false;
// Buckets used for any histogram metrics. Each value represents
// a bucket boundary. For example, {100, 500, 2000, 5000} are latencies
// Default bucket boundaries used for each histogram metric. Each value
// represents a boundary. For example, {100, 500, 2000, 5000} are latencies.
// in milliseconds in first_response_histogram.
prometheus::Histogram::BucketBoundaries buckets_ = {100, 500, 2000, 5000};
std::unordered_map<std::string, prometheus::Histogram::BucketBoundaries>
histogram_options_ = {{kFirstResponseHistogram, {100, 500, 2000, 5000}}};

// Quantiles used for any summary metrics. Each pair of values represents
// { quantile, error }. For example, {0.90, 0.01} means to compute the
// 90th percentile with 1% error on either side, so the approximate 90th
Expand All @@ -73,6 +77,14 @@ struct MetricReporterConfig {
bool cache_enabled_ = false;

bool is_decoupled_ = false;

private:
// Maps the metric family fullname to its lookup key. This field is required
// because the users are expected to configure metric configuration
// "ModelMetrics" with the full name displayed from metrics reporting while a
// different name is used internally. All new histograms must update the map.
const std::unordered_map<std::string, std::string> metric_map_ = {
{"nv_inference_first_response_histogram_ms", kFirstResponseHistogram}};
#endif // TRITON_ENABLE_METRICS
};

Expand All @@ -86,7 +98,9 @@ class MetricModelReporter {
const triton::core::ModelIdentifier& model_id,
const int64_t model_version, const int device,
bool response_cache_enabled, bool is_decoupled,
// FIXME: [DLIS-7497] Merge model_tags with model_metrics
const triton::common::MetricTagsMap& model_tags,
const inference::ModelMetrics& model_metrics,
std::shared_ptr<MetricModelReporter>* metric_model_reporter);

~MetricModelReporter();
Expand All @@ -112,7 +126,8 @@ class MetricModelReporter {
MetricModelReporter(
const ModelIdentifier& model_id, const int64_t model_version,
const int device, bool response_cache_enabled, bool is_decoupled,
const triton::common::MetricTagsMap& model_tags);
const triton::common::MetricTagsMap& model_tags,
const inference::ModelMetrics& model_metrics);

static void GetMetricLabels(
std::map<std::string, std::string>* labels,
Expand Down
14 changes: 8 additions & 6 deletions src/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,6 @@ Metrics::Metrics()
"execution per-model.")
.Register(*registry_)),

inf_first_response_histogram_ms_family_(
prometheus::BuildHistogram()
.Name("nv_inference_first_response_histogram_ms")
.Help("Duration from request to first response in milliseconds")
.Register(*registry_)),

model_load_time_family_(prometheus::BuildGauge()
.Name("nv_model_load_duration_secs")
.Help("Model load time in seconds")
Expand Down Expand Up @@ -155,6 +149,14 @@ Metrics::Metrics()
"microseconds")
.Register(*registry_)),

// Histograms
// New histograms must be added to MetricReporterConfig.metric_map_
inf_first_response_histogram_ms_family_(
prometheus::BuildHistogram()
.Name("nv_inference_first_response_histogram_ms")
.Help("Duration from request to first response in milliseconds")
.Register(*registry_)),

// Summaries
inf_request_summary_us_family_(
prometheus::BuildSummary()
Expand Down
6 changes: 4 additions & 2 deletions src/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,6 @@ class Metrics {
prometheus::Family<prometheus::Counter>&
inf_compute_output_duration_us_family_;
prometheus::Family<prometheus::Gauge>& inf_pending_request_count_family_;
prometheus::Family<prometheus::Histogram>&
inf_first_response_histogram_ms_family_;
prometheus::Family<prometheus::Gauge>& model_load_time_family_;

prometheus::Family<prometheus::Gauge>& pinned_memory_pool_total_family_;
Expand All @@ -330,6 +328,10 @@ class Metrics {
prometheus::Family<prometheus::Counter>& cache_num_misses_model_family_;
prometheus::Family<prometheus::Counter>& cache_miss_duration_us_model_family_;

// Histograms
prometheus::Family<prometheus::Histogram>&
inf_first_response_histogram_ms_family_;

// Summaries
prometheus::Family<prometheus::Summary>& inf_request_summary_us_family_;
prometheus::Family<prometheus::Summary>& inf_queue_summary_us_family_;
Expand Down
3 changes: 2 additions & 1 deletion src/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ Model::Init(const bool is_config_provided)
#ifdef TRITON_ENABLE_METRICS
MetricModelReporter::Create(
ModelId(), Version(), METRIC_REPORTER_ID_UTILITY, ResponseCacheEnabled(),
IsDecoupled(), Config().metric_tags(), &reporter_);
IsDecoupled(), Config().metric_tags(), Config().model_metrics(),
&reporter_);
#endif // TRITON_ENABLE_METRICS

return Status::Success;
Expand Down
43 changes: 42 additions & 1 deletion src/model_config_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,39 @@ ValidateNonLinearFormatIO(
return Status::Success;
}

// Helper function to validate that model_metrics contains all required data.
Status
ValidateModelMetrics(const inference::ModelMetrics& model_metrics)
{
for (const auto& metric_control : model_metrics.metric_control()) {
if (!metric_control.has_metric_identifier()) {
return Status(
Status::Code::INVALID_ARG,
"metric control must specify 'metric_identifier'");
}

if (metric_control.metric_identifier().family().empty()) {
return Status(
Status::Code::INVALID_ARG,
"metric identifier must specify non-empty 'family'");
}

if (!metric_control.has_histogram_options()) {
return Status(
Status::Code::INVALID_ARG,
"metric control must specify 'histogram_options'");
}

if (metric_control.histogram_options().buckets_size() == 0) {
return Status(
Status::Code::INVALID_ARG,
"histogram options must specify non-empty 'buckets'");
}
}

return Status::Success;
}

} // namespace

Status
Expand Down Expand Up @@ -1591,7 +1624,7 @@ ValidateModelConfig(
}
}

// If ensemble scheduling is specified, validate it. Otherwise,
// If ensemble scheduling is specified, validate it. Otherwise,
// must validate platform and instance_group
if (config.has_ensemble_scheduling()) {
#ifdef TRITON_ENABLE_ENSEMBLE
Expand Down Expand Up @@ -1620,6 +1653,14 @@ ValidateModelConfig(
" cache.");
}

// If model_metric is specified, validate it.
if (config.has_model_metrics()) {
#ifdef TRITON_ENABLE_METRICS
RETURN_IF_ERROR(ValidateModelMetrics(config.model_metrics()));
#else
return Status(Status::Code::INVALID_ARG, "metrics not supported");
#endif // TRITON_ENABLE_METRICS
}
return Status::Success;
}

Expand Down

0 comments on commit 1d18c2b

Please sign in to comment.