Skip to content

Commit

Permalink
feat: Add new histogram metric type (#374) (#376)
Browse files Browse the repository at this point in the history
Co-authored-by: Yingge He <[email protected]>
  • Loading branch information
mc-nv and yinggeh authored Aug 19, 2024
1 parent 1393d6e commit bbb523d
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 29 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1656,12 +1656,12 @@ import triton_python_backend_utils as pb_utils
class TritonPythonModel:
def initialize(self, args):
# Create a MetricFamily object to report the latency of the model
# execution. The 'kind' parameter must be either 'COUNTER' or
# 'GAUGE'.
# execution. The 'kind' parameter must be either 'COUNTER',
# 'GAUGE' or 'HISTOGRAM'.
self.metric_family = pb_utils.MetricFamily(
name="preprocess_latency_ns",
description="Cumulative time spent pre-processing requests",
kind=pb_utils.MetricFamily.COUNTER # or pb_utils.MetricFamily.GAUGE
kind=pb_utils.MetricFamily.COUNTER
)

# Create a Metric object under the MetricFamily object. The 'labels'
Expand Down
3 changes: 2 additions & 1 deletion src/ipc_message.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -63,6 +63,7 @@ typedef enum PYTHONSTUB_commandtype_enum {
PYTHONSTUB_MetricRequestValue,
PYTHONSTUB_MetricRequestIncrement,
PYTHONSTUB_MetricRequestSet,
PYTHONSTUB_MetricRequestObserve,
PYTHONSTUB_LoadModelRequest,
PYTHONSTUB_UnloadModelRequest,
PYTHONSTUB_ModelReadinessRequest,
Expand Down
106 changes: 97 additions & 9 deletions src/metric.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -32,9 +32,12 @@

namespace triton { namespace backend { namespace python {

Metric::Metric(const std::string& labels, void* metric_family_address)
: labels_(labels), operation_value_(0), metric_address_(nullptr),
metric_family_address_(metric_family_address), is_cleared_(false)
Metric::Metric(
const std::string& labels, std::optional<const std::vector<double>> buckets,
void* metric_family_address)
: labels_(labels), buckets_(buckets), operation_value_(0),
metric_address_(nullptr), metric_family_address_(metric_family_address),
is_cleared_(false)
{
#ifdef TRITON_PB_STUB
SendCreateMetricRequest();
Expand Down Expand Up @@ -62,6 +65,20 @@ Metric::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
custom_metric_shm_ptr_->metric_family_address = metric_family_address_;
custom_metric_shm_ptr_->metric_address = metric_address_;

// Histogram specific case
if (buckets_.has_value()) {
auto buckets_size = buckets_.value().size() * sizeof(double);
std::unique_ptr<PbMemory> buckets_shm = PbMemory::Create(
shm_pool, TRITONSERVER_MemoryType::TRITONSERVER_MEMORY_CPU, 0,
buckets_size, reinterpret_cast<char*>(buckets_.value().data()),
false /* copy_gpu */);
custom_metric_shm_ptr_->buckets_shm_handle = buckets_shm->ShmHandle();
buckets_shm_ = std::move(buckets_shm);
} else {
custom_metric_shm_ptr_->buckets_shm_handle = 0;
buckets_shm_ = nullptr;
}

// Save the references to shared memory.
custom_metric_shm_ = std::move(custom_metric_shm);
labels_shm_ = std::move(labels_shm);
Expand All @@ -80,17 +97,40 @@ Metric::LoadFromSharedMemory(
std::unique_ptr<PbString> labels_shm = PbString::LoadFromSharedMemory(
shm_pool, custom_metric_shm_ptr->labels_shm_handle);

return std::unique_ptr<Metric>(new Metric(custom_metric_shm, labels_shm));
std::unique_ptr<PbMemory> buckets_shm = nullptr;
if (custom_metric_shm_ptr->buckets_shm_handle != 0) {
buckets_shm = PbMemory::LoadFromSharedMemory(
shm_pool, custom_metric_shm_ptr->buckets_shm_handle,
false /* open_cuda_handle */);
}

return std::unique_ptr<Metric>(
new Metric(custom_metric_shm, labels_shm, buckets_shm));
}

Metric::Metric(
AllocatedSharedMemory<MetricShm>& custom_metric_shm,
std::unique_ptr<PbString>& labels_shm)
std::unique_ptr<PbString>& labels_shm,
std::unique_ptr<PbMemory>& buckets_shm)
: custom_metric_shm_(std::move(custom_metric_shm)),
labels_shm_(std::move(labels_shm))
labels_shm_(std::move(labels_shm)), buckets_shm_(std::move(buckets_shm))
{
custom_metric_shm_ptr_ = custom_metric_shm_.data_.get();

// FIXME: This constructor is called during each
// set/increment/observe/get_value call. It only needs the pointers.
labels_ = labels_shm_->String();
if (buckets_shm_ != nullptr) { // Histogram
size_t bucket_size = buckets_shm_->ByteSize() / sizeof(double);
std::vector<double> buckets;
buckets.reserve(bucket_size);
for (size_t i = 0; i < bucket_size; ++i) {
buckets.emplace_back(
reinterpret_cast<double*>(buckets_shm_->DataPtr())[i]);
}
buckets_ = std::move(buckets);
}

operation_value_ = custom_metric_shm_ptr_->operation_value;
metric_family_address_ = custom_metric_shm_ptr_->metric_family_address;
metric_address_ = custom_metric_shm_ptr_->metric_address;
Expand Down Expand Up @@ -161,6 +201,24 @@ Metric::SendSetValueRequest(const double& value)
}
}

void
Metric::SendObserveRequest(const double& value)
{
try {
CheckIfCleared();
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
operation_value_ = value;
SaveToSharedMemory(stub->ShmPool());
AllocatedSharedMemory<CustomMetricsMessage> custom_metrics_shm;
stub->SendMessage<CustomMetricsMessage>(
custom_metrics_shm, PYTHONSTUB_MetricRequestObserve, shm_handle_);
}
catch (const PythonBackendException& pb_exception) {
throw PythonBackendException(
"Failed to observe metric value: " + std::string(pb_exception.what()));
}
}

double
Metric::SendGetValueRequest()
{
Expand Down Expand Up @@ -222,14 +280,35 @@ Metric::InitializeTritonMetric()
{
std::vector<const TRITONSERVER_Parameter*> labels_params;
ParseLabels(labels_params, labels_);
TRITONSERVER_MetricKind kind;
THROW_IF_TRITON_ERROR(TRITONSERVER_GetMetricFamilyKind(
reinterpret_cast<TRITONSERVER_MetricFamily*>(metric_family_address_),
&kind));
TRITONSERVER_MetricArgs* args = nullptr;
switch (kind) {
case TRITONSERVER_METRIC_KIND_COUNTER:
case TRITONSERVER_METRIC_KIND_GAUGE:
break;
case TRITONSERVER_METRIC_KIND_HISTOGRAM: {
const std::vector<double>& buckets = buckets_.value();
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricArgsNew(&args));
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricArgsSetHistogram(
args, buckets.data(), buckets.size()));
break;
}
default:
break;
}

TRITONSERVER_Metric* triton_metric = nullptr;
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricNew(
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricNewWithArgs(
&triton_metric,
reinterpret_cast<TRITONSERVER_MetricFamily*>(metric_family_address_),
labels_params.data(), labels_params.size()));
labels_params.data(), labels_params.size(), args));
for (const auto label : labels_params) {
TRITONSERVER_ParameterDelete(const_cast<TRITONSERVER_Parameter*>(label));
}
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricArgsDelete(args));
return reinterpret_cast<void*>(triton_metric);
}

Expand Down Expand Up @@ -262,6 +341,8 @@ Metric::HandleMetricOperation(
Increment(operation_value_);
} else if (command_type == PYTHONSTUB_MetricRequestSet) {
SetValue(operation_value_);
} else if (command_type == PYTHONSTUB_MetricRequestObserve) {
Observe(operation_value_);
} else {
throw PythonBackendException("Unknown metric operation");
}
Expand All @@ -281,6 +362,13 @@ Metric::SetValue(const double& value)
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricSet(triton_metric, value));
}

void
Metric::Observe(const double& value)
{
auto triton_metric = reinterpret_cast<TRITONSERVER_Metric*>(metric_address_);
THROW_IF_TRITON_ERROR(TRITONSERVER_MetricObserve(triton_metric, value));
}

double
Metric::GetValue()
{
Expand Down
26 changes: 23 additions & 3 deletions src/metric.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -26,9 +26,11 @@

#pragma once

#include <optional>
#include <string>

#include "ipc_message.h"
#include "pb_memory.h"
#include "pb_string.h"
#include "pb_utils.h"

Expand All @@ -47,6 +49,8 @@ namespace triton { namespace backend { namespace python {
struct MetricShm {
// The shared memory handle of the labels in PbString format.
bi::managed_external_buffer::handle_t labels_shm_handle;
// The shared memory handle of the buckets in PbMemory format.
bi::managed_external_buffer::handle_t buckets_shm_handle;
// The value used for incrementing or setting the metric.
double operation_value;
// The address of the TRITONSERVER_Metric object.
Expand All @@ -58,7 +62,10 @@ struct MetricShm {

class Metric {
public:
Metric(const std::string& labels, void* metric_family_address);
Metric(
const std::string& labels,
std::optional<const std::vector<double>> buckets,
void* metric_family_address);

~Metric();

Expand Down Expand Up @@ -97,6 +104,10 @@ class Metric {
/// \param value The value to set the metric to.
void SendSetValueRequest(const double& value);

/// Send the request to the parent process to observe the value to the metric.
/// \param value The value to set the metric to.
void SendObserveRequest(const double& value);

/// Send the request to the parent process to get the value of the metric.
/// \return Returns the value of the metric.
double SendGetValueRequest();
Expand Down Expand Up @@ -132,6 +143,10 @@ class Metric {
/// \param value The value to set the metric to.
void SetValue(const double& value);

/// Use Triton C API to sample the observation to the metric.
/// \param value The value to sample observation to the metric.
void Observe(const double& value);

/// Use Triton C API to get the value of the metric.
double GetValue();

Expand All @@ -146,10 +161,14 @@ class Metric {
// The private constructor for creating a Metric object from shared memory.
Metric(
AllocatedSharedMemory<MetricShm>& custom_metric_shm,
std::unique_ptr<PbString>& labels_shm);
std::unique_ptr<PbString>& labels_shm,
std::unique_ptr<PbMemory>& buckets);

// The labels of the metric, which is the identifier of the metric.
std::string labels_;
// Monotonically increasing values representing bucket boundaries for creating
// histogram metric.
std::optional<std::vector<double>> buckets_;
// The value used for incrementing or setting the metric.
double operation_value_;
// The address of the TRITONSERVER_Metric object.
Expand All @@ -168,6 +187,7 @@ class Metric {
MetricShm* custom_metric_shm_ptr_;
bi::managed_external_buffer::handle_t shm_handle_;
std::unique_ptr<PbString> labels_shm_;
std::unique_ptr<PbMemory> buckets_shm_;
};

}}}; // namespace triton::backend::python
32 changes: 27 additions & 5 deletions src/metric_family.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -166,19 +166,39 @@ MetricFamily::SendCreateMetricFamilyRequest()
}

std::shared_ptr<Metric>
MetricFamily::CreateMetric(const py::object& labels)
MetricFamily::CreateMetric(const py::object& labels, const py::object& buckets)
{
if (!labels.is_none()) {
if (!py::isinstance<py::dict>(labels)) {
throw PythonBackendException(
"Failed to create metric. Labels must be a "
"dictionary.");
"Failed to create metric. Labels must be a dictionary.");
}
}

py::module json = py::module_::import("json");
std::string labels_str = std::string(py::str(json.attr("dumps")(labels)));
auto metric = std::make_shared<Metric>(labels_str, metric_family_address_);

std::optional<std::vector<double>> buckets_vec;
if (!buckets.is_none()) {
if (!py::isinstance<py::list>(buckets)) {
throw PythonBackendException(
"Failed to create metric. Buckets must be a list.");
}
if (kind_ == kCounter || kind_ == kGauge) {
throw PythonBackendException(
"Failed to create metric. Unexpected buckets found.");
}
buckets_vec = buckets.cast<std::vector<double>>();
} else {
if (kind_ == kHistogram) {
throw PythonBackendException(
"Failed to create metric. Missing required buckets.");
}
buckets_vec = std::nullopt;
}

auto metric =
std::make_shared<Metric>(labels_str, buckets_vec, metric_family_address_);
{
std::lock_guard<std::mutex> lock(metric_map_mu_);
metric_map_.insert({metric->MetricAddress(), metric});
Expand All @@ -205,6 +225,8 @@ MetricFamily::ToTritonServerMetricKind(const MetricKind& kind)
return TRITONSERVER_METRIC_KIND_COUNTER;
case kGauge:
return TRITONSERVER_METRIC_KIND_GAUGE;
case kHistogram:
return TRITONSERVER_METRIC_KIND_HISTOGRAM;
default:
throw PythonBackendException("Unknown metric kind");
}
Expand Down
11 changes: 7 additions & 4 deletions src/metric_family.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -97,8 +97,11 @@ class MetricFamily {

/// Create a metric from the metric family and store it in the metric map.
/// \param labels The labels of the metric.
/// \param buckets Monotonically increasing values representing bucket
/// boundaries for creating histogram metric.
/// \return Returns the shared pointer to the created metric.
std::shared_ptr<Metric> CreateMetric(const py::object& labels);
std::shared_ptr<Metric> CreateMetric(
const py::object& labels, const py::object& buckets);
#else
/// Initialize the TRITONSERVER_MetricFamily object.
/// \return Returns the address of the TRITONSERVER_MetricFamily object.
Expand Down Expand Up @@ -128,8 +131,8 @@ class MetricFamily {
std::string name_;
// The description of the metric family.
std::string description_;
// The metric kind of the metric family. Currently only supports GAUGE and
// COUNTER.
// The metric kind of the metric family. Currently only supports GAUGE,
// COUNTER and HISTOGRAM.
MetricKind kind_;
// The address of the TRITONSERVER_MetricFamily object.
void* metric_family_address_;
Expand Down
Loading

0 comments on commit bbb523d

Please sign in to comment.