From bb9b06ffb0734cf85cc222dd7e50cfbca9662a44 Mon Sep 17 00:00:00 2001 From: Peng Junzhi <78788603+Pengzna@users.noreply.github.com> Date: Mon, 21 Oct 2024 12:28:32 +0800 Subject: [PATCH] Metric: Discard Error Metric When Taking Snapthot (#13823) (cherry picked from commit 84fe96b31d4b0f0ea4dbdd46e95aa710aa17de53) --- .../iotdb/metrics/core/type/IoTDBTimer.java | 29 ++++++++++++++----- .../prometheus/PrometheusReporter.java | 6 ++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/iotdb-core/metrics/core/src/main/java/org/apache/iotdb/metrics/core/type/IoTDBTimer.java b/iotdb-core/metrics/core/src/main/java/org/apache/iotdb/metrics/core/type/IoTDBTimer.java index 8fef7f2485c4..19def110f4b4 100644 --- a/iotdb-core/metrics/core/src/main/java/org/apache/iotdb/metrics/core/type/IoTDBTimer.java +++ b/iotdb-core/metrics/core/src/main/java/org/apache/iotdb/metrics/core/type/IoTDBTimer.java @@ -23,10 +23,15 @@ import org.apache.iotdb.metrics.type.Timer; import org.apache.iotdb.metrics.utils.AbstractMetricMBean; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; import java.util.concurrent.TimeUnit; public class IoTDBTimer extends AbstractMetricMBean implements Timer, IoTDBTimerMBean { + private static final Logger LOGGER = LoggerFactory.getLogger(IoTDBTimer.class); io.micrometer.core.instrument.Timer timer; public IoTDBTimer(io.micrometer.core.instrument.Timer timer) { @@ -40,37 +45,47 @@ public void update(long duration, TimeUnit unit) { @Override public HistogramSnapshot takeSnapshot() { - return new IoTDBTimerHistogramSnapshot(timer); + try { + return new IoTDBTimerHistogramSnapshot(timer); + } catch (ArrayIndexOutOfBoundsException e) { + LOGGER.warn( + "Detected an error while taking snapshot, may cause a miss during this recording.", e); + return null; + } } @Override public double getSum() { - return this.takeSnapshot().getSum(); + return Optional.ofNullable(takeSnapshot()).map(HistogramSnapshot::getSum).orElse(0.0); } @Override public double getMax() { - return this.takeSnapshot().getMax(); + return Optional.ofNullable(takeSnapshot()).map(HistogramSnapshot::getMax).orElse(0.0); } @Override public double getMean() { - return this.takeSnapshot().getMean(); + return Optional.ofNullable(takeSnapshot()).map(HistogramSnapshot::getMean).orElse(0.0); } @Override public int getSize() { - return this.takeSnapshot().size(); + return Optional.ofNullable(takeSnapshot()).map(HistogramSnapshot::size).orElse(0); } @Override public double get50thPercentile() { - return this.takeSnapshot().getValue(0.5); + return Optional.ofNullable(takeSnapshot()) + .map(histogramSnapshot -> histogramSnapshot.getValue(0.5)) + .orElse(0.0); } @Override public double get99thPercentile() { - return this.takeSnapshot().getValue(0.99); + return Optional.ofNullable(takeSnapshot()) + .map(histogramSnapshot -> histogramSnapshot.getValue(0.99)) + .orElse(0.0); } @Override diff --git a/iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/reporter/prometheus/PrometheusReporter.java b/iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/reporter/prometheus/PrometheusReporter.java index 07f5dab4e1c6..08f1e7becd77 100644 --- a/iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/reporter/prometheus/PrometheusReporter.java +++ b/iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/reporter/prometheus/PrometheusReporter.java @@ -50,6 +50,7 @@ import java.time.Duration; import java.util.HashMap; import java.util.Map; +import java.util.Objects; public class PrometheusReporter implements Reporter { private static final Logger LOGGER = LoggerFactory.getLogger(PrometheusReporter.class); @@ -144,6 +145,11 @@ private String scrape() { } else if (metric instanceof Timer) { Timer timer = (Timer) metric; HistogramSnapshot snapshot = timer.takeSnapshot(); + if (Objects.isNull(snapshot)) { + LOGGER.warn( + "Detected an error when taking metric timer snapshot, will discard this metric"); + continue; + } name += "_seconds"; writeSnapshotAndCount( name,