Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add histogram metric type #386

Merged
merged 10 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 71 additions & 8 deletions include/triton/core/tritonserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct TRITONSERVER_Server;
struct TRITONSERVER_ServerOptions;
struct TRITONSERVER_Metric;
struct TRITONSERVER_MetricFamily;
struct TRITONSERVER_MetricArgs;

///
/// TRITONSERVER API Version
Expand Down Expand Up @@ -91,7 +92,7 @@ struct TRITONSERVER_MetricFamily;
/// }
///
#define TRITONSERVER_API_VERSION_MAJOR 1
#define TRITONSERVER_API_VERSION_MINOR 33
#define TRITONSERVER_API_VERSION_MINOR 34

/// Get the TRITONBACKEND API version supported by the Triton shared
/// library. This value can be compared against the
Expand Down Expand Up @@ -2615,7 +2616,8 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(
///
typedef enum TRITONSERVER_metrickind_enum {
TRITONSERVER_METRIC_KIND_COUNTER,
TRITONSERVER_METRIC_KIND_GAUGE
TRITONSERVER_METRIC_KIND_GAUGE,
TRITONSERVER_METRIC_KIND_HISTOGRAM
} TRITONSERVER_MetricKind;

/// Create a new metric family object. The caller takes ownership of the
Expand Down Expand Up @@ -2644,6 +2646,42 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily* family);

/// Get the TRITONSERVER_MetricKind of the metric family.
///
/// \param metric The metric family object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_GetMetricFamilyKind(
Tabrizian marked this conversation as resolved.
Show resolved Hide resolved
struct TRITONSERVER_MetricFamily* family, TRITONSERVER_MetricKind* kind);

/// Create a new metric args object. The caller takes ownership of the
/// TRITONSERVER_MetricArgs object and must call TRITONSERVER_MetricArgsDelete
/// to release the object.
///
/// \param args Returns the new metric args object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsNew(
struct TRITONSERVER_MetricArgs** args);

/// Set metric args with prometheus histogram metric parameter.
yinggeh marked this conversation as resolved.
Show resolved Hide resolved
///
/// \param args The metric args object to set.
/// \param buckets The array of bucket boundaries.
/// \param buckets_count The number of bucket boundaries.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_MetricArgsSetHistogram(
struct TRITONSERVER_MetricArgs* args, const double* buckets,
const uint64_t buckets_count);

/// Delete a metric args object.
///
/// \param args The metric args object.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricArgsDelete(
struct TRITONSERVER_MetricArgs* args);

/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
Expand All @@ -2655,12 +2693,35 @@ TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily* family);
/// \param family The metric family to add this new metric to.
/// \param labels The array of labels to associate with this new metric.
/// \param label_count The number of labels.
/// bucket boundaries. For histogram only.
yinggeh marked this conversation as resolved.
Show resolved Hide resolved
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNew(
struct TRITONSERVER_Metric** metric,
struct TRITONSERVER_MetricFamily* family,
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count);

/// Create a new metric object. The caller takes ownership of the
/// TRITONSERVER_Metric object and must call
/// TRITONSERVER_MetricDelete to release the object. The caller is also
/// responsible for ownership of the labels passed in.
/// Each label can be deleted immediately after creating the metric with
/// TRITONSERVER_ParameterDelete if not re-using the labels.
/// Metric args can be deleted immediately after creating the metric with
/// TRITONSERVER_MetricArgsDelete if not re-using the metric args.
///
/// \param metric Returns the new metric object.
/// \param family The metric family to add this new metric to.
/// \param labels The array of labels to associate with this new metric.
/// \param label_count The number of labels.
/// \param args Metric args that store additional arguments to construct
/// particular metric types, e.g. histogram.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNewWithArgs(
struct TRITONSERVER_Metric** metric,
struct TRITONSERVER_MetricFamily* family,
const struct TRITONSERVER_Parameter** labels, const uint64_t label_count,
const struct TRITONSERVER_MetricArgs* args);

/// Delete a metric object.
/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
Expand All @@ -2672,9 +2733,10 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricDelete(
struct TRITONSERVER_Metric* metric);

/// Get the current value of a metric object.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER
/// and TRITONSERVER_METRIC_KIND_GAUGE, and returns
/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER,
/// TRITONSERVER_METRIC_KIND_GAUGE, TRITONSERVER_METRIC_KIND_HISTOGRAM, and
/// returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported
/// TRITONSERVER_MetricKind.
///
/// \param metric The metric object to query.
/// \param value Returns the current value of the metric object.
Expand All @@ -2695,8 +2757,9 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricValue(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
struct TRITONSERVER_Metric* metric, double value);

/// Set the current value of metric to value.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and returns
/// Set the current value of metric to value or observe the value to metric.
/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and
/// TRITONSERVER_METRIC_KIND_HISTOGRAM. Returns
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to explain what it does when TRITONSERVER_METRIC_KIND_HISTOGRAM is histogram (i.e. increment the counter for the bucket that value matches)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "observe" mean? Can we add more details?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's why I still think we need a new C API TRITONSERVER_MetricObserve.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
///
/// \param metric The metric object to update.
Expand All @@ -2705,7 +2768,7 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricSet(
struct TRITONSERVER_Metric* metric, double value);

/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
/// Get the TRITONSERVER_MetricKind of metric of its corresponding family.
///
/// \param metric The metric object to query.
/// \param kind Returns the TRITONSERVER_MetricKind of metric.
Expand Down
65 changes: 61 additions & 4 deletions src/metric_family.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights
// reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -54,6 +55,12 @@ MetricFamily::MetricFamily(
.Help(description)
.Register(*registry));
break;
case TRITONSERVER_METRIC_KIND_HISTOGRAM:
family_ = reinterpret_cast<void*>(&prometheus::BuildHistogram()
.Name(name)
.Help(description)
.Register(*registry));
break;
default:
throw std::invalid_argument(
"Unsupported kind passed to MetricFamily constructor.");
Expand All @@ -63,24 +70,50 @@ MetricFamily::MetricFamily(
}

void*
MetricFamily::Add(std::map<std::string, std::string> label_map, Metric* metric)
MetricFamily::Add(
std::map<std::string, std::string> label_map, Metric* metric,
const TritonServerMetricArgs* args)
{
void* prom_metric = nullptr;
switch (kind_) {
case TRITONSERVER_METRIC_KIND_COUNTER: {
if (args != nullptr) {
throw std::invalid_argument(
"Unexpected args found in counter Metric constructor.");
}
auto counter_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
auto counter_ptr = &counter_family_ptr->Add(label_map);
prom_metric = reinterpret_cast<void*>(counter_ptr);
break;
}
case TRITONSERVER_METRIC_KIND_GAUGE: {
if (args != nullptr) {
throw std::invalid_argument(
"Unexpected args found in gauge Metric constructor.");
}
auto gauge_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
auto gauge_ptr = &gauge_family_ptr->Add(label_map);
prom_metric = reinterpret_cast<void*>(gauge_ptr);
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
if (args == nullptr) {
throw std::invalid_argument(
"Bucket boundaries not found in Metric args.");
}
if (args->kind() != TRITONSERVER_METRIC_KIND_HISTOGRAM) {
throw std::invalid_argument(
"Incorrect Metric args kind in histogram Metric constructor.");
}
auto histogram_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Histogram>*>(family_);
auto histogram_ptr =
&histogram_family_ptr->Add(label_map, args->buckets());
prom_metric = reinterpret_cast<void*>(histogram_ptr);
break;
}
default:
throw std::invalid_argument(
"Unsupported family kind passed to Metric constructor.");
Expand Down Expand Up @@ -134,6 +167,14 @@ MetricFamily::Remove(void* prom_metric, Metric* metric)
gauge_family_ptr->Remove(gauge_ptr);
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
auto histogram_family_ptr =
reinterpret_cast<prometheus::Family<prometheus::Histogram>*>(family_);
auto histogram_ptr =
reinterpret_cast<prometheus::Histogram*>(prom_metric);
histogram_family_ptr->Remove(histogram_ptr);
break;
}
default:
// Invalid kind should be caught in constructor
LOG_ERROR << "Unsupported kind in Metric destructor.";
Expand Down Expand Up @@ -169,7 +210,8 @@ MetricFamily::~MetricFamily()
//
Metric::Metric(
TRITONSERVER_MetricFamily* family,
std::vector<const InferenceParameter*> labels)
std::vector<const InferenceParameter*> labels,
const TritonServerMetricArgs* args)
{
family_ = reinterpret_cast<MetricFamily*>(family);
kind_ = family_->Kind();
Expand All @@ -188,7 +230,7 @@ Metric::Metric(
std::string(reinterpret_cast<const char*>(param->ValuePointer()));
}

metric_ = family_->Add(label_map, this);
metric_ = family_->Add(label_map, this, args);
}

Metric::~Metric()
Expand Down Expand Up @@ -235,6 +277,11 @@ Metric::Value(double* value)
*value = gauge_ptr->Value();
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Value");
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
Expand Down Expand Up @@ -279,6 +326,11 @@ Metric::Increment(double value)
}
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
"TRITONSERVER_METRIC_KIND_HISTOGRAM does not support Increment");
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
Expand Down Expand Up @@ -308,6 +360,11 @@ Metric::Set(double value)
gauge_ptr->Set(value);
break;
}
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
auto histogram_ptr = reinterpret_cast<prometheus::Histogram*>(metric_);
histogram_ptr->Observe(value);
break;
}
default:
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_UNSUPPORTED,
Expand Down
36 changes: 33 additions & 3 deletions src/metric_family.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights
// reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -27,6 +28,7 @@

#ifdef TRITON_ENABLE_METRICS

#include <cstring>
#include <mutex>
#include <set>
#include <unordered_map>
Expand All @@ -37,6 +39,30 @@

namespace triton { namespace core {

//
// TritonServerMetricArgs
//
// Implementation for TRITONSERVER_MetricArgs.
//
class TritonServerMetricArgs {
public:
TritonServerMetricArgs() = default;

void* SetHistogramArgs(const double* buckets, uint64_t bucket_count)
{
kind_ = TRITONSERVER_METRIC_KIND_HISTOGRAM;
buckets_.resize(bucket_count);
std::memcpy(buckets_.data(), buckets, sizeof(double) * bucket_count);
return nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
buckets_.resize(bucket_count);
std::memcpy(buckets_.data(), buckets, sizeof(double) * bucket_count);
buckets_ = std::vector<double>(buckets, buckets + bucket_count);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

}
TRITONSERVER_MetricKind kind() const { return kind_; }
const std::vector<double>& buckets() const { return buckets_; }

private:
TRITONSERVER_MetricKind kind_;
std::vector<double> buckets_;
};

//
// Implementation for TRITONSERVER_MetricFamily.
//
Expand All @@ -50,7 +76,9 @@ class MetricFamily {
void* Family() const { return family_; }
TRITONSERVER_MetricKind Kind() const { return kind_; }

void* Add(std::map<std::string, std::string> label_map, Metric* metric);
void* Add(
std::map<std::string, std::string> label_map, Metric* metric,
const TritonServerMetricArgs* args);
void Remove(void* prom_metric, Metric* metric);

int NumMetrics()
Expand Down Expand Up @@ -86,7 +114,8 @@ class Metric {
public:
Metric(
TRITONSERVER_MetricFamily* family,
std::vector<const InferenceParameter*> labels);
std::vector<const InferenceParameter*> labels,
const TritonServerMetricArgs* args);
~Metric();

MetricFamily* Family() const { return family_; }
Expand All @@ -95,6 +124,7 @@ class Metric {
TRITONSERVER_Error* Value(double* value);
TRITONSERVER_Error* Increment(double value);
TRITONSERVER_Error* Set(double value);
TRITONSERVER_Error* Observe(double value);

// If a MetricFamily is deleted before its dependent Metric, we want to
// invalidate the references so we don't access invalid memory.
Expand Down
3 changes: 2 additions & 1 deletion src/metrics.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -35,6 +35,7 @@

#include "prometheus/counter.h"
#include "prometheus/gauge.h"
#include "prometheus/histogram.h"
#include "prometheus/registry.h"
#include "prometheus/serializer.h"
#include "prometheus/summary.h"
Expand Down
Loading
Loading