From 3b16f6621ea7bb3f18d69b2f039f6e2aed5ed87d Mon Sep 17 00:00:00 2001 From: Gianbelinche <39842759+gianbelinche@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:41:21 -0300 Subject: [PATCH] Dashboard metrics (#268) * Add observability dashboard to init * Add blob retrieval metric server * Add metrics to docker * Format code * Change yq and sponge for node modules * Change blob size to 2MB * Format code --- .dockerignore | 1 + Cargo.lock | 123 +++++++++- Cargo.toml | 1 + EigenDA.json | 333 +++++++++++++++++++++++++++ core/node/da_clients/src/eigen_da.rs | 2 +- docker-compose.yml | 13 +- docker/retrieve-blobs/Dockerfile | 35 +++ etc/env/configs/dev_validium.toml | 2 +- infrastructure/zk/src/init.ts | 45 +++- retrieve-blobs/.gitignore | 1 + retrieve-blobs/Cargo.toml | 12 + retrieve-blobs/src/main.rs | 114 +++++++++ 12 files changed, 675 insertions(+), 7 deletions(-) create mode 100644 EigenDA.json create mode 100644 docker/retrieve-blobs/Dockerfile create mode 100644 retrieve-blobs/.gitignore create mode 100644 retrieve-blobs/Cargo.toml create mode 100644 retrieve-blobs/src/main.rs diff --git a/.dockerignore b/.dockerignore index 39efdabca19..aa2384a4af8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,6 +17,7 @@ keys/setup !db/ !backups/ !core/ +!retrieve-blobs/ !prover/ !yarn.lock !package.json diff --git a/Cargo.lock b/Cargo.lock index 22217d0ea31..3402f61f396 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2290,6 +2290,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + [[package]] name = "fastrand" version = "2.0.1" @@ -4881,6 +4887,24 @@ dependencies = [ "indexmap 2.1.0", ] +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher 0.3.11", +] + [[package]] name = "pin-project" version = "1.1.3" @@ -5027,6 +5051,35 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "postgres-protocol" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23" +dependencies = [ + "base64 0.22.1", + "byteorder", + "bytes", + "fallible-iterator", + "hmac 0.12.1", + "md-5", + "memchr", + "rand 0.8.5", + "sha2 0.10.8", + "stringprep", +] + +[[package]] +name = "postgres-types" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f" +dependencies = [ + "bytes", + "fallible-iterator", + "postgres-protocol", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -5158,6 +5211,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror", +] + [[package]] name = "prometheus-client" version = "0.22.2" @@ -5274,6 +5342,12 @@ dependencies = [ "prost 0.12.1", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "protox" version = "0.5.1" @@ -5679,6 +5753,18 @@ dependencies = [ "typemap_rev", ] +[[package]] +name = "retrieve-blobs" +version = "0.1.0" +dependencies = [ + "hyper 0.14.29", + "lazy_static", + "prometheus", + "reqwest 0.12.5", + "tokio", + "tokio-postgres", +] + [[package]] name = "rfc6979" version = "0.3.1" @@ -6683,6 +6769,12 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "siphasher" version = "1.0.1" @@ -6794,7 +6886,7 @@ dependencies = [ "serde_json", "sha2 0.10.8", "sha3 0.10.8", - "siphasher", + "siphasher 1.0.1", "slab", "smallvec", "soketto 0.7.1", @@ -6833,7 +6925,7 @@ dependencies = [ "rand_chacha", "serde", "serde_json", - "siphasher", + "siphasher 1.0.1", "slab", "smol", "smoldot", @@ -7767,6 +7859,32 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-postgres" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b5d3742945bc7d7f210693b0c58ae542c6fd47b17adbbda0885f3dcb34a6bdb" +dependencies = [ + "async-trait", + "byteorder", + "bytes", + "fallible-iterator", + "futures-channel", + "futures-util", + "log", + "parking_lot", + "percent-encoding", + "phf", + "pin-project-lite", + "postgres-protocol", + "postgres-types", + "rand 0.8.5", + "socket2", + "tokio", + "tokio-util", + "whoami", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -8573,6 +8691,7 @@ checksum = "0fec781d48b41f8163426ed18e8fc2864c12937df9ce54c88ede7bd47270893e" dependencies = [ "redox_syscall", "wasite", + "web-sys", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 400cf9ab799..2c136d62803 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ members = [ "core/tests/test_account", "core/tests/loadnext", "core/tests/vm-benchmark", + "retrieve-blobs" ] resolver = "2" diff --git a/EigenDA.json b/EigenDA.json new file mode 100644 index 00000000000..cee1ffdcd76 --- /dev/null +++ b/EigenDA.json @@ -0,0 +1,333 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": false, + "expr": "any", + "hide": false, + "iconColor": "red", + "name": "flux", + "tagKeys": "flux", + "target": { + "refId": "Anno", + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "description": "", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 4, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": false, + "tags": [ + "Era-Main" + ], + "targetBlank": false, + "title": "", + "tooltip": "", + "type": "dashboards", + "url": "" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": false, + "keepTime": false, + "tags": [ + "Era" + ], + "targetBlank": false, + "title": "Other Era Dashboards", + "tooltip": "", + "type": "dashboards", + "url": "" + } + ], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11035, + "panels": [], + "title": "EigenDA", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 11034, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "blob_retrievals_total", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Blob Total", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Blob Amount", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 11036, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "blob_avg_size", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Blob Avg Size (Bytes)", + "type": "stat" + } + ], + "refresh": "1m", + "revision": 1, + "schemaVersion": 39, + "tags": [ + "Era-Main" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "PBFA97CFB590B2093" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "^(?!prometheus-1$|prometheus$|vm-mainnet$|vm-gpu-sandbox$|vm-testnets$|vm-infra$|vm-infra-staging$|default$).*", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(server_processed_txs{stage=~\"state_keeper\"}, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(server_processed_txs{stage=~\"state_keeper\"}, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "^(?!snapshot_recovery_test$).*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "", + "title": "EigenDA", + "uid": "0Y8DUff4j", + "version": 1, + "weekStart": "" +} diff --git a/core/node/da_clients/src/eigen_da.rs b/core/node/da_clients/src/eigen_da.rs index 54f17cce87e..65217b0d0c9 100644 --- a/core/node/da_clients/src/eigen_da.rs +++ b/core/node/da_clients/src/eigen_da.rs @@ -14,7 +14,7 @@ pub struct EigenDAClient { } impl EigenDAClient { - pub const BLOB_SIZE_LIMIT_IN_BYTES: usize = 10 * 1024 * 1024; // 10MB + pub const BLOB_SIZE_LIMIT_IN_BYTES: usize = 2 * 1024 * 1024; // 2MB pub async fn new(config: EigenDAConfig) -> anyhow::Result { Ok(Self { diff --git a/docker-compose.yml b/docker-compose.yml index c5a6f57cbd8..165f3b7c9d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -61,4 +61,15 @@ services: image: ghcr.io/layr-labs/eigenda-proxy ports: - "4242:4242" - command: ./eigenda-proxy --addr 0.0.0.0 --port 4242 --memstore.enabled + command: ./eigenda-proxy --addr 0.0.0.0 --port 4242 --memstore.enabled --eigenda-max-blob-length "2MiB" + + eigenda-metrics: + depends_on: + - eigenda-proxy + - postgres + build: # This is the project root + context: . + dockerfile: ./docker/retrieve-blobs/Dockerfile + ports: + - "7070:7070" + command: ./retrieve-blobs diff --git a/docker/retrieve-blobs/Dockerfile b/docker/retrieve-blobs/Dockerfile new file mode 100644 index 00000000000..6b307c2ea99 --- /dev/null +++ b/docker/retrieve-blobs/Dockerfile @@ -0,0 +1,35 @@ +# Use the official Rust image to build the application +FROM rust:latest AS builder + +# Set the working directory inside the container +WORKDIR /usr/src/zksync + +# Copy the Cargo.toml and Cargo.lock files to the container +COPY ./retrieve-blobs . + +# Fetch and build the project dependencies (this will cache the dependencies) +RUN cargo fetch + +# Build the application in release mode +RUN cargo build --release + +# Use a smaller image for runtime +FROM ubuntu:latest + +# Install necessary dependencies for running the application (like PostgreSQL client) +RUN apt-get update && apt-get install -y \ + libpq-dev \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Set the working directory inside the runtime container +WORKDIR /usr/src/zksync + +# Copy the built application from the builder image +COPY --from=builder /usr/src/zksync/target/release/retrieve-blobs . + +# Expose the port for the Prometheus metrics server +EXPOSE 7070 + +# Set the environment variable to run the application with correct permissions +ENV RUST_LOG=info diff --git a/etc/env/configs/dev_validium.toml b/etc/env/configs/dev_validium.toml index 5ed4ccb38e4..b3d8879cf1c 100644 --- a/etc/env/configs/dev_validium.toml +++ b/etc/env/configs/dev_validium.toml @@ -6,7 +6,7 @@ pubdata_overhead_part=0.0 batch_overhead_l1_gas=1000000 # This value should be higher for Validium, but now it is not compatible with the current blobs model. # For this config to be set to its proper value we need to fully integrate Validium + Blobs. -max_pubdata_per_batch=100000 +max_pubdata_per_batch=2097152 fee_model_version="V2" l1_batch_commit_data_generator_mode="Validium" diff --git a/infrastructure/zk/src/init.ts b/infrastructure/zk/src/init.ts index 6dbad67b489..484966399a1 100644 --- a/infrastructure/zk/src/init.ts +++ b/infrastructure/zk/src/init.ts @@ -15,9 +15,16 @@ import * as run from './run'; import * as server from './server'; import { createVolumes, up } from './up'; +const fs = require('fs'); +const yaml = require('yaml'); + // Checks if all required tools are installed with the correct versions -const checkEnv = async (): Promise => { +const checkEnv = async (runObservability: boolean): Promise => { const tools = ['node', 'yarn', 'docker', 'cargo']; + if (runObservability) { + tools.push('yq'); + } + for (const tool of tools) { await utils.exec(`which ${tool}`); } @@ -37,6 +44,36 @@ const submoduleUpdate = async (): Promise => { await utils.exec('git submodule update'); }; +// clone dockprom and zksync-era dashboards +const setupObservability = async (): Promise => { + // clone dockprom, era-observability repos and export era dashboards to dockprom + await utils.spawn( + `rm -rf ./target/dockprom && git clone https://github.com/stefanprodan/dockprom.git ./target/dockprom \ + && rm -rf ./target/era-observability && git clone https://github.com/matter-labs/era-observability.git ./target/era-observability \ + && cp ./target/era-observability/dashboards/* ./target/dockprom/grafana/provisioning/dashboards + ` + ); + + const fileContents = fs.readFileSync('./target/dockprom/prometheus/prometheus.yml', 'utf8'); + let config = yaml.parse(fileContents); + config.scrape_configs.push({ + job_name: 'proxy-blob-retriever', + scrape_interval: '5s', + honor_labels: true, + static_configs: [{ targets: ['host.docker.internal:7070'] }] + }); + config.scrape_configs.push({ + job_name: 'zksync', + scrape_interval: '5s', + honor_labels: true, + static_configs: [{ targets: ['host.docker.internal:3312'] }] + }); + const newYaml = yaml.stringify(config); + fs.writeFileSync('./target/dockprom/prometheus/prometheus.yml', newYaml, 'utf8'); + + await utils.spawn('cp EigenDA.json ./target/dockprom/grafana/provisioning/dashboards/EigenDA.json'); +}; + // Sets up docker environment and compiles contracts type InitSetupOptions = { skipEnvSetup: boolean; @@ -50,6 +87,10 @@ const initSetup = async ({ runObservability, deploymentMode }: InitSetupOptions): Promise => { + if (runObservability) { + await announced('Pulling observability repos', setupObservability()); + } + await announced( `Initializing in ${deploymentMode == contract.DeploymentMode.Validium ? 'Validium mode' : 'Roll-up mode'}` ); @@ -58,7 +99,7 @@ const initSetup = async ({ } if (!process.env.CI && !skipEnvSetup) { await announced('Pulling images', docker.pull()); - await announced('Checking environment', checkEnv()); + await announced('Checking environment', checkEnv(runObservability)); await announced('Checking git hooks', env.gitHooks()); await announced('Create volumes', createVolumes()); await announced('Setting up containers', up(runObservability)); diff --git a/retrieve-blobs/.gitignore b/retrieve-blobs/.gitignore new file mode 100644 index 00000000000..ea8c4bf7f35 --- /dev/null +++ b/retrieve-blobs/.gitignore @@ -0,0 +1 @@ +/target diff --git a/retrieve-blobs/Cargo.toml b/retrieve-blobs/Cargo.toml new file mode 100644 index 00000000000..4de0e5d3a28 --- /dev/null +++ b/retrieve-blobs/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "retrieve-blobs" +version = "0.1.0" +edition = "2021" + +[dependencies] +reqwest = "0.12.1" +tokio = { version = "1", features = ["full"] } +tokio-postgres = "0.7" +prometheus = "0.13" +hyper = { version = "0.14", features = ["full", "server"] } +lazy_static = "1.5" diff --git a/retrieve-blobs/src/main.rs b/retrieve-blobs/src/main.rs new file mode 100644 index 00000000000..b45239a1978 --- /dev/null +++ b/retrieve-blobs/src/main.rs @@ -0,0 +1,114 @@ +use std::{collections::HashMap, convert::Infallible, error::Error, time::Duration}; + +use hyper::{ + service::{make_service_fn, service_fn}, + Body, Request, Response, Server, +}; +use prometheus::{register_counter, register_gauge, Counter, Encoder, Gauge, TextEncoder}; +use tokio::time::sleep; +use tokio_postgres::NoTls; + +// Define Prometheus metrics +lazy_static::lazy_static! { + static ref BLOB_RETRIEVALS: Counter = register_counter!( + "blob_retrievals_total", + "Total number of blobs successfully retrieved" + ).unwrap(); + + static ref BLOB_AVG_SIZE: Gauge = register_gauge!( + "blob_avg_size", + "Average size of blobs in bytes" + ).unwrap(); +} + +#[tokio::main] +async fn main() { + // Start the metrics HTTP server + tokio::spawn(start_metrics_server()); + + let mut blobs = HashMap::new(); + + loop { + // Perform blob retrievals + match get_blobs(&mut blobs).await { + Ok(_) => println!("Blob retrieval successful"), + Err(e) => eprintln!("Blob retrieval error: {}", e), + }; + sleep(Duration::from_secs(1)).await; + } +} + +async fn get_blobs(blobs: &mut HashMap) -> Result<(), Box> { + // Connect to the PostgreSQL server + let (client, connection) = tokio_postgres::connect( + "host=postgres user=postgres password=notsecurepassword dbname=zksync_local", + NoTls, + ) + .await?; + + // Spawn a background task to handle the connection + tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("Connection error: {}", e); + } + }); + + // Run the SELECT query + let rows = client + .query("SELECT blob_id FROM data_availability", &[]) + .await?; + + for row in rows { + let blob_id: &str = row.get(0); + let blob_id = blob_id.to_string(); + + if !blobs.contains_key(&blob_id) { + let blob = get(blob_id.clone()).await?; + blobs.insert(blob_id.clone(), blob.len()); + + if !blob.is_empty() { + BLOB_RETRIEVALS.inc(); // Increment counter if blob retrieval succeeds + } + } + BLOB_AVG_SIZE.set(blobs.values().sum::() as f64 / blobs.len() as f64); + } + + Ok(()) +} + +async fn get(commitment: String) -> Result, Box> { + let url = format!("http://host.docker.internal:4242/get/0x{commitment}"); + + let client = reqwest::Client::new(); + let response = client.get(url).send().await?; + + if response.status().is_success() { + // Expecting the response body to be binary data + let body = response.bytes().await?; + Ok(body.to_vec()) + } else { + Ok(vec![]) + } +} + +// Start the Prometheus metrics server +async fn start_metrics_server() { + let make_svc = + make_service_fn(|_conn| async { Ok::<_, Infallible>(service_fn(metrics_handler)) }); + + let addr = ([0, 0, 0, 0], 7070).into(); + let server = Server::bind(&addr).serve(make_svc); + + println!("Serving metrics on http://{}", addr); + server.await.unwrap(); +} + +// Handle the /metrics endpoint +async fn metrics_handler(_: Request) -> Result, Infallible> { + let encoder = TextEncoder::new(); + let metric_families = prometheus::gather(); + let mut buffer = Vec::new(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + + Ok(Response::new(Body::from(buffer))) +}