Skip to content

Commit

Permalink
consolidate per shard RocksDB properties (#15998)
Browse files Browse the repository at this point in the history
Splitting them is just to avoid a warning and doesn't reduce number of
metrics.

Enlarging the warning threshold instead.
  • Loading branch information
msmouse authored Mar 1, 2025
1 parent 1f9607d commit 81007c2
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 75 deletions.
12 changes: 6 additions & 6 deletions crates/aptos-faucet/metrics-server/src/gather_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ pub static NUM_METRICS: Lazy<IntCounterVec> = Lazy::new(|| {
pub fn gather_metrics() -> Vec<prometheus::proto::MetricFamily> {
let metric_families = aptos_metrics_core::gather();
let mut total: u64 = 0;
let mut families_over_1000: u64 = 0;
let mut families_over_2000: u64 = 0;

// Take metrics of metric gathering so we know possible overhead of this process
for metric_family in &metric_families {
let family_count = metric_family.get_metric().len();
if family_count > 1000 {
families_over_1000 = families_over_1000.saturating_add(1);
if family_count > 2000 {
families_over_2000 = families_over_2000.saturating_add(1);
let name = metric_family.get_name();
warn!(
count = family_count,
metric_family = name,
"Metric Family '{}' over 1000 dimensions '{}'",
"Metric Family '{}' over 2000 dimensions '{}'",
name,
family_count
);
Expand All @@ -37,8 +37,8 @@ pub fn gather_metrics() -> Vec<prometheus::proto::MetricFamily> {
// These metrics will be reported on the next pull, rather than create a new family
NUM_METRICS.with_label_values(&["total"]).inc_by(total);
NUM_METRICS
.with_label_values(&["families_over_1000"])
.inc_by(families_over_1000);
.with_label_values(&["families_over_2000"])
.inc_by(families_over_2000);

metric_families
}
12 changes: 6 additions & 6 deletions crates/aptos-inspection-service/src/server/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,18 @@ pub fn get_encoded_metrics(encoder: impl Encoder) -> Vec<u8> {
fn get_metric_families() -> Vec<MetricFamily> {
let metric_families = aptos_metrics_core::gather();
let mut total: u64 = 0;
let mut families_over_1000: u64 = 0;
let mut families_over_2000: u64 = 0;

// Take metrics of metric gathering so we know possible overhead of this process
for metric_family in &metric_families {
let family_count = metric_family.get_metric().len();
if family_count > 1000 {
families_over_1000 = families_over_1000.saturating_add(1);
if family_count > 2000 {
families_over_2000 = families_over_2000.saturating_add(1);
let name = metric_family.get_name();
warn!(
count = family_count,
metric_family = name,
"Metric Family '{}' over 1000 dimensions '{}'",
"Metric Family '{}' over 2000 dimensions '{}'",
name,
family_count
);
Expand All @@ -72,8 +72,8 @@ fn get_metric_families() -> Vec<MetricFamily> {
// These metrics will be reported on the next pull, rather than create a new family
NUM_METRICS.with_label_values(&["total"]).inc_by(total);
NUM_METRICS
.with_label_values(&["families_over_1000"])
.inc_by(families_over_1000);
.with_label_values(&["families_over_2000"])
.inc_by(families_over_2000);

metric_families
}
Expand Down
48 changes: 11 additions & 37 deletions storage/aptosdb/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,45 +156,19 @@ pub static ROCKSDB_PROPERTIES: Lazy<IntGaugeVec> = Lazy::new(|| {
.unwrap()
});

pub(crate) static STATE_KV_DB_PROPERTIES_METRIC_VECTOR: Lazy<Vec<IntGaugeVec>> = Lazy::new(|| {
(0..16)
.map(|shard_id| {
register_int_gauge_vec!(
// metric name
&format!("aptos_state_kv_db_properties_{}", shard_id),
// metric description
&format!(
"StateKvDb rocksdb integer properties for shard {}",
shard_id
),
// metric labels (dimensions)
&["cf_name", "property_name"]
)
.unwrap()
})
.collect()
/// Rocksdb metrics
pub static ROCKSDB_SHARD_PROPERTIES: Lazy<IntGaugeVec> = Lazy::new(|| {
register_int_gauge_vec!(
// metric name
"aptos_rocksdb_shard_properties",
// metric description
"sharded rocksdb integer properties",
// metric labels (dimensions)
&["shard_id", "cf_name", "property_name",]
)
.unwrap()
});

pub(crate) static STATE_MERKLE_DB_PROPERTIES_METRIC_VECTOR: Lazy<Vec<IntGaugeVec>> =
Lazy::new(|| {
(0..16)
.map(|shard_id| {
register_int_gauge_vec!(
// metric name
&format!("aptos_state_merkle_db_properties_{}", shard_id),
// metric description
&format!(
"StateMerkleDb rocksdb integer properties for shard {}",
shard_id
),
// metric labels (dimensions)
&["cf_name", "property_name"]
)
.unwrap()
})
.collect()
});

// Async committer gauges:
pub(crate) static LATEST_SNAPSHOT_VERSION: Lazy<IntGauge> = Lazy::new(|| {
register_int_gauge!(
Expand Down
41 changes: 15 additions & 26 deletions storage/aptosdb/src/rocksdb_property_reporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,14 @@ use crate::{
write_set_db_column_families,
},
ledger_db::LedgerDb,
metrics::{
OTHER_TIMERS_SECONDS, ROCKSDB_PROPERTIES, STATE_KV_DB_PROPERTIES_METRIC_VECTOR,
STATE_MERKLE_DB_PROPERTIES_METRIC_VECTOR,
},
metrics::{OTHER_TIMERS_SECONDS, ROCKSDB_PROPERTIES, ROCKSDB_SHARD_PROPERTIES},
state_kv_db::StateKvDb,
state_merkle_db::StateMerkleDb,
};
use anyhow::Result;
use aptos_infallible::Mutex;
use aptos_logger::prelude::*;
use aptos_metrics_core::IntGaugeVec;
use aptos_schemadb::DB;
use aptos_schemadb::{ColumnFamilyName, DB};
use aptos_storage_interface::state_store::NUM_STATE_SHARDS;
use once_cell::sync::Lazy;
use std::{
Expand Down Expand Up @@ -84,16 +80,19 @@ fn set_property(cf_name: &str, db: &DB) -> Result<()> {
Ok(())
}

fn set_shard_property(
cf_name: &str,
db: &DB,
db_shard_id: usize,
metrics: &Lazy<Vec<IntGaugeVec>>,
) -> Result<()> {
const SHARD_NAME_BY_ID: [&str; NUM_STATE_SHARDS] = [
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
];

fn set_shard_property(cf_name: ColumnFamilyName, db: &DB, shard: usize) -> Result<()> {
if !skip_reporting_cf(cf_name) {
for (rockdb_property_name, aptos_rocksdb_property_name) in &*ROCKSDB_PROPERTY_MAP {
metrics[db_shard_id]
.with_label_values(&[cf_name, aptos_rocksdb_property_name])
ROCKSDB_SHARD_PROPERTIES
.with_label_values(&[
SHARD_NAME_BY_ID[shard],
cf_name,
aptos_rocksdb_property_name,
])
.set(db.get_property(cf_name, rockdb_property_name)? as i64);
}
}
Expand Down Expand Up @@ -144,12 +143,7 @@ fn update_rocksdb_properties(
for cf in state_kv_db_new_key_column_families() {
set_property(cf, state_kv_db.metadata_db())?;
for shard in 0..NUM_STATE_SHARDS {
set_shard_property(
cf,
state_kv_db.db_shard(shard as u8),
shard,
&STATE_KV_DB_PROPERTIES_METRIC_VECTOR,
)?;
set_shard_property(cf, state_kv_db.db_shard(shard as u8), shard)?;
}
}
}
Expand All @@ -163,12 +157,7 @@ fn update_rocksdb_properties(
set_property(cf_name, state_merkle_db.metadata_db())?;
if state_merkle_db.sharding_enabled() {
for shard in 0..NUM_STATE_SHARDS {
set_shard_property(
cf_name,
state_merkle_db.db_shard(shard as u8),
shard,
&STATE_MERKLE_DB_PROPERTIES_METRIC_VECTOR,
)?;
set_shard_property(cf_name, state_merkle_db.db_shard(shard as u8), shard)?;
}
}
}
Expand Down

0 comments on commit 81007c2

Please sign in to comment.