From f17d1a9608889a229727c36998b42e02096e5565 Mon Sep 17 00:00:00 2001 From: NeoPlays <80448387+NeoPlays@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:53:54 +0200 Subject: [PATCH] ADD: Obol Alerts (#2018) * ADD: Alert Rules * CHANGE: name of alert files * ADD: Function for Obol Alerts in Stereum * FIX: Correct alarm filtering logic in alert boxes (#2017) * FIX: update alarm filter in node page * FIX: update alarm filter in control page * ADD: Alert Rules * CHANGE: name of alert files * ADD: Function for Obol Alerts in Stereum * Fix: the control alert with obol alerts * fix: tooltip of yellow obol alert * FIX: now filter algo for the alarm controls * IMPROVE: error handling * FIX: new alarm filter algo in the node alert box * FIX: wire obol warning in the node alert box --------- Co-authored-by: mabasian <54101509+mabasian@users.noreply.github.com> --- .../CharonService/alerting/CharonAlerts.yaml | 727 ++++++++++++++++++ .../{alerts.yaml => HardwareAlerts.yaml} | 0 launcher/public/output.css | 14 - launcher/src/backend/Monitoring.js | 130 +++- launcher/src/backend/SSHService.js | 7 +- .../src/backend/ValidatorAccountManager.js | 2 +- launcher/src/background.js | 4 + .../node-page/components/alert/NodeAlert.vue | 138 +++- .../UI/the-control/ControlAlert.vue | 145 +++- launcher/src/store/ControlService.js | 4 + 10 files changed, 1080 insertions(+), 91 deletions(-) create mode 100644 controls/roles/manage-service/files/grafana/CharonService/alerting/CharonAlerts.yaml rename controls/roles/manage-service/files/grafana/PrometheusNodeExporterService/alerting/{alerts.yaml => HardwareAlerts.yaml} (100%) diff --git a/controls/roles/manage-service/files/grafana/CharonService/alerting/CharonAlerts.yaml b/controls/roles/manage-service/files/grafana/CharonService/alerting/CharonAlerts.yaml new file mode 100644 index 000000000..dd22fb372 --- /dev/null +++ b/controls/roles/manage-service/files/grafana/CharonService/alerting/CharonAlerts.yaml @@ -0,0 +1,727 @@ +apiVersion: 1 +groups: + - orgId: 1 + name: Cluster + folder: Charon + interval: 1m + rules: + - uid: f0e3fcb2-7325-47c0-a63b-c9d5cb31178f + title: ' Cluster BeaconNode Down ' + condition: B + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer) == 2' + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + dashboardUid: d6qujIJVk + panelId: 141 + noDataState: OK + execErrState: OK + for: 10m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "141" + summary: Beacon node is down for node {{ $labels.cluster_peer }} in {{ $labels.cluster_name }} + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + cluster_peer: '{{ $labels.cluster_peer }}' + severity: notify + isPaused: false + - uid: fdd9f034-22e8-428b-9d7c-52e031bf8cee + title: ' Cluster BeaconNode Syncing ' + condition: B + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer) == 3' + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + dashboardUid: d6qujIJVk + panelId: 141 + noDataState: OK + execErrState: OK + for: 10m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "141" + summary: Beacon node is syncing for node {{ $labels.cluster_peer }} in {{ $labels.cluster_name }} + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + cluster_peer: '{{ $labels.cluster_peer }}' + severity: notify + isPaused: false + - uid: d1cf2f49-e3a9-4aaa-98d9-f478b1faa1d2 + title: Cluster Missed Attestations + condition: B + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max(increase(core_tracker_failed_duties_total[$__rate_interval])) by (cluster_hash, cluster_name) > 0 ' + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: "-100" + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + dashboardUid: d6qujIJVk + panelId: 85 + noDataState: OK + execErrState: OK + for: 10m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "85" + summary: Missed attestations in cluster {{ $labels.cluster_name }} + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + severity: notify + isPaused: false + - uid: e874c008-2ff0-46ef-a1a1-bf62a60337cf + title: Cluster in Unknown Status + condition: B + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer, cluster_network) == 0' + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: "-100" + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: count + refId: B + settings: + mode: "" + type: reduce + dashboardUid: d6qujIJVk + panelId: 87 + noDataState: OK + execErrState: OK + for: 10m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "87" + summary: Node {{ $labels.cluster_peer }} in cluster {{ $labels.cluster_name }} is in unknown state + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + isPaused: false + - uid: c23c531e-4386-466a-b9e8-3a3809148cb5 + title: Cluster Insufficient Peers + condition: B + data: + - refId: A + relativeTimeRange: + from: 60 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max((app_monitoring_readyz)) by (cluster_name,cluster_hash,cluster_peer) == 4' + hide: false + intervalMs: 1000 + legendFormat: '{{cluster_name}} {{cluster_peer}}' + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 60 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: max + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + dashboardUid: d6qujIJVk + panelId: 186 + noDataState: OK + execErrState: OK + for: 10m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "186" + summary: Insufficient Peers for node {{ $labels.cluster_peer }} in cluster {{ $labels.cluster_name }} + labels: + cluster_hash: '{{$labels.cluster_hash}}' + cluster_name: '{{$labels.cluster_name}}' + cluster_peer: '{{$labels.cluster_peer}}' + severity: warning + isPaused: false + - uid: a2cf313f-e5dc-4a3c-a3ff-d7e51b55183e + title: Cluster Failure Rate + condition: B + data: + - refId: A + relativeTimeRange: + from: 900 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: "floor(\n 100 * \n (\n max(increase(core_tracker_success_duties_total[$__range])) by (cluster_hash, cluster_name) / \n max(increase(core_tracker_expect_duties_total[$__range])) by (cluster_hash, cluster_name)\n )\n) < 95" + hide: false + instant: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 900 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: "-100" + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + dashboardUid: d6qujIJVk + panelId: 91 + noDataState: OK + execErrState: OK + for: 6h + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "91" + summary: Cluster {{ $labels.cluster_name }} success rate is less than {{ $values.B }}% + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + severity: notify + isPaused: false + - uid: f5b9fb9d-66b8-49b6-813d-cdb2928425f2 + title: ' Cluster VC missing validators ' + condition: B + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer) == 6' + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + noDataState: OK + execErrState: OK + for: 10m + annotations: + summary: VC missing some validators for node {{ $labels.cluster_peer }} in cluster {{ $labels.cluster_name }} + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + cluster_peer: '{{ $labels.cluster_peer }}' + severity: warning + isPaused: false + - uid: fdhg9rxn9mmf4a + title: ' Cluster BeaconNode has zero peers ' + condition: B + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: 'max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer) == 7' + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + noDataState: OK + execErrState: OK + for: 10m + annotations: + summary: BeaconNode has zero peers for node {{ $labels.cluster_peer }} in cluster {{ $labels.cluster_name }} + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + cluster_peer: '{{ $labels.cluster_peer }}' + service: charon + isPaused: false + - uid: e7e9de6b-f7f4-4a69-85b4-0512f5ef0aec + title: High percentage failed sync message duty + condition: B + data: + - refId: A + relativeTimeRange: + from: 10800 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + editorMode: code + expr: "(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer)\n) \n/ \n(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n + \n sum(increase(core_bcast_broadcast_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n) > 0.1" + hide: false + instant: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 10800 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: "-100" + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: max + refId: B + settings: + mode: dropNN + type: reduce + noDataState: OK + execErrState: OK + for: 5m + annotations: + summary: High percentage failed sync message duty for node {{ $labels.cluster_peer }} in cluster {{ $labels.cluster_name }} + labels: + cluster_hash: '{{ $labels.cluster_hash }}' + cluster_name: '{{ $labels.cluster_name }}' + cluster_peer: '{{ $labels.cluster_peer }}' + service: charon + isPaused: false + - uid: f20e8a2f-8533-4524-bd5f-587e167c05ab + title: Cluster - Number of Connected Relays + condition: B + data: + - refId: A + relativeTimeRange: + from: 3600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: 'group (p2p_relay_connections) by (cluster_peer)' + instant: false + interval: 2m + intervalMs: 15000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 3600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + - 0 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: min + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: "" + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + type: classic_conditions + dashboardUid: d6qujIJVk + panelId: 156 + noDataState: OK + execErrState: Error + for: 10m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "156" + summary: Cluster has no Connected Relays + isPaused: false + - uid: ee8dd3f8-bf16-4d1e-b644-ba01bb8c273b + title: Peer ping latency (400ms threshold) + condition: C + data: + - refId: A + relativeTimeRange: + from: 3600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: prometheus + editorMode: code + exemplar: true + expr: 'histogram_quantile(0.90, sum(rate(p2p_ping_latency_secs_bucket[2m])) by (le,peer)) ' + interval: 2m + intervalMs: 15000 + legendFormat: '{{peer}}' + maxDataPoints: 43200 + range: true + refId: A + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0.4 + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: B + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + dashboardUid: d6qujIJVk + panelId: 25 + noDataState: OK + execErrState: Error + for: 5m + annotations: + __dashboardUid__: d6qujIJVk + __panelId__: "25" + summary: Peer ping latency is greater than (400ms threshold) + isPaused: false + \ No newline at end of file diff --git a/controls/roles/manage-service/files/grafana/PrometheusNodeExporterService/alerting/alerts.yaml b/controls/roles/manage-service/files/grafana/PrometheusNodeExporterService/alerting/HardwareAlerts.yaml similarity index 100% rename from controls/roles/manage-service/files/grafana/PrometheusNodeExporterService/alerting/alerts.yaml rename to controls/roles/manage-service/files/grafana/PrometheusNodeExporterService/alerting/HardwareAlerts.yaml diff --git a/launcher/public/output.css b/launcher/public/output.css index 913994ac7..6ad9d34d7 100755 --- a/launcher/public/output.css +++ b/launcher/public/output.css @@ -1800,10 +1800,6 @@ video { height: 50px; } -.h-\[53px\]{ - height: 53px; -} - .h-\[554px\]{ height: 554px; } @@ -5017,11 +5013,6 @@ video { --tw-shadow: var(--tw-shadow-colored); } -.shadow-zinc-700{ - --tw-shadow-color: #3f3f46; - --tw-shadow: var(--tw-shadow-colored); -} - .shadow-zinc-800{ --tw-shadow-color: #27272a; --tw-shadow: var(--tw-shadow-colored); @@ -5626,11 +5617,6 @@ html body { background-color: rgb(43 48 52 / var(--tw-bg-opacity)); } -.hover\:bg-\[\#2f5a50\]:hover{ - --tw-bg-opacity: 1; - background-color: rgb(47 90 80 / var(--tw-bg-opacity)); -} - .hover\:bg-\[\#325e5a\]:hover{ --tw-bg-opacity: 1; background-color: rgb(50 94 90 / var(--tw-bg-opacity)); diff --git a/launcher/src/backend/Monitoring.js b/launcher/src/backend/Monitoring.js index 10bb3fb66..527512a74 100755 --- a/launcher/src/backend/Monitoring.js +++ b/launcher/src/backend/Monitoring.js @@ -436,11 +436,11 @@ export class Monitoring { var query = rpc_method.trim().indexOf("{") < 0 ? JSON.stringify({ - jsonrpc: "2.0", - method: rpc_method.trim(), - params: rpc_params, - id: 1, - }) + jsonrpc: "2.0", + method: rpc_method.trim(), + params: rpc_params, + id: 1, + }) : rpc_method; // Define default response @@ -2623,8 +2623,8 @@ export class Monitoring { const addr_type = Array.isArray(addr) ? "arr" : typeof addr === "string" && ["public", "local"].includes(addr) - ? "str" - : "invalid"; + ? "str" + : "invalid"; addr = addr_type == "str" ? addr.toLowerCase().trim() : addr; if (addr_type == "invalid") { return { @@ -2712,7 +2712,7 @@ export class Monitoring { for (let i = 0; i < serviceInfos.length; i++) { const hashDependencies = serviceInfos[i].config.dependencies.consensusClients.length || - serviceInfos[i].config.dependencies.executionClients.length + serviceInfos[i].config.dependencies.executionClients.length ? "yes" : "no"; easyInfos.push({ @@ -3249,9 +3249,8 @@ rm -rf diskoutput const parsedJson = JSON.parse(stdoutJson); let message = - `${parsedJson?.message || ""}${parsedJson?.message && parsedJson?.stacktraces ? "\n" : ""}${ - parsedJson?.stacktraces || "" - }`.trim() || output; + `${parsedJson?.message || ""}${parsedJson?.message && parsedJson?.stacktraces ? "\n" : ""}${parsedJson?.stacktraces || "" + }`.trim() || output; return { pubkey: pubkey, @@ -3341,4 +3340,113 @@ rm -rf diskoutput ]; } } + /** + * Will gather metrics from Prometheus and evaluate. + * If thresholds are exceeded, an alert will be generated and added to the retuned array. + * @returns {Object[]} Array of alerts e.g. [{name: "Cluster in Unknown Status", level: "warning"}, {name: "Beacon Node Down", level: "critical"}] + */ + async fetchObolCharonAlerts() { + try { + const serviceInfos = await this.getServiceInfos("CharonService"); + if (serviceInfos.length < 1) { + return []; + } + const queries = { + app_monitoring_readyz: "max((app_monitoring_readyz)) by (cluster_name, cluster_hash, cluster_peer)", + cluster_missed_attestations: "max(increase(core_tracker_failed_duties_total[10m])) by (cluster_hash, cluster_name)", + cluster_failure_rate: "floor(100 * (max(increase(core_tracker_success_duties_total[15m])) by (cluster_hash, cluster_name) / max(increase(core_tracker_expect_duties_total[15m])) by (cluster_hash, cluster_name)))", + percentage_failed_sync_message_duty: "(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer)\n) \n/ \n(\n sum(increase(core_tracker_failed_duties_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n + \n sum(increase(core_bcast_broadcast_total[1h])) by (cluster_name,cluster_hash,cluster_peer) \n)", + connected_relays: "group (p2p_relay_connections) by (cluster_peer)", + peer_ping_latency: "histogram_quantile(0.90, sum(rate(p2p_ping_latency_secs_bucket[2m])) by (le,peer))", + } + + const queryPromises = Object.entries(queries).map(([key, query]) => { + return this.queryPrometheus(encodeURIComponent(query)).then(result => ({ key, result })); + }); + + const results = await Promise.all(queryPromises); + + let alerts = results.map((metric) => { + if (metric.result.status != "success") { + return; + } + if (metric.key === "peer_ping_latency") { + let value = Math.max(...metric.result.data.result.map((r) => r.value[1])); + return this.parseObolCharonAlerts(metric.key, value); + } + let value = metric.result.data.result[0].value[1]; + return this.parseObolCharonAlerts(metric.key, value); + }).filter((alert) => alert); + + return alerts; + + } catch (error) { + log.error("Fetching Obol Charon Alerts Failed:\n" + error); + return [] + } + } + + parseObolCharonAlerts(key, value) { + value = 0 + //app_monitoring_readyz + if (key === "app_monitoring_readyz") { + switch (value) { + case 0: + return { + name: "Cluster in Unknown Status", + level: "warning" + }; + case 2: + return { + name: "Beacon Node Down", + level: "critical" + }; + case 4: + return { + name: "Cluster Insufficient Peers", + level: "warning" + }; + case 6: + return { + name: "Cluster Missing Validators", + level: "critical" + }; + case 7: + return { + name: "Beacon Node Zero Peers", + level: "critical" + }; + } + } + if (key === "cluster_missed_attestations" && value > 0) { + return { + name: "Cluster Missed Attestations", + level: "critical" + }; + } + if (key === "cluster_failure_rate" && value < 95) { + return { + name: "Cluster Failure Rate", + level: "critical" + }; + } + if (key === "percentage_failed_sync_message_duty" && value > 0.1) { + return { + name: "Failed Sync Msg Duty", + level: "critical" + }; + } + if (key === "connected_relays" && value < 1) { + return { + name: "Num. Connected Relays", + level: "warning" + }; + } + if (key === "peer_ping_latency" && value > 0.4) { + return { + name: "Peer Ping Latency", + level: "warning" + }; + } + } } diff --git a/launcher/src/backend/SSHService.js b/launcher/src/backend/SSHService.js index fa16b6a5b..29211b56d 100755 --- a/launcher/src/backend/SSHService.js +++ b/launcher/src/backend/SSHService.js @@ -571,7 +571,10 @@ export class SSHService { * @param {Client} [conn] * @returns `void` */ - async uploadFileSSH(localPath, remotePath, conn = this.getConnectionFromPool()) { + async uploadFileSSH(localPath, remotePath, conn) { + if (!conn) { + conn = await this.getConnectionFromPool(); + } return new Promise((resolve, reject) => { const readStream = fs.createReadStream(localPath); readStream.on("error", reject); @@ -621,7 +624,7 @@ export class SSHService { if (item.isDirectory()) { await this.uploadDirectorySSH(localFilePath, remoteFilePath, conn); } else { - await this.uploadFileSSH(localFilePath, remoteFilePath); + await this.uploadFileSSH(localFilePath, remoteFilePath, conn); } } return true; diff --git a/launcher/src/backend/ValidatorAccountManager.js b/launcher/src/backend/ValidatorAccountManager.js index b7f5dddb9..438fa035a 100755 --- a/launcher/src/backend/ValidatorAccountManager.js +++ b/launcher/src/backend/ValidatorAccountManager.js @@ -1127,7 +1127,7 @@ export class ValidatorAccountManager { this.nodeConnection.sshService.exec(`rm -rf ${dataDir}`); const result = await this.nodeConnection.sshService.uploadDirectorySSH(path.normalize(localPath), dataDir); if (result) { - log.info("Obol Backup downloaded from: ", localPath); + log.info("Obol Backup uploaded from: ", localPath); } } catch (err) { log.error("Error uploading Obol Backup: ", err); diff --git a/launcher/src/background.js b/launcher/src/background.js index b24ed453d..9e606f633 100755 --- a/launcher/src/background.js +++ b/launcher/src/background.js @@ -741,6 +741,10 @@ ipcMain.handle("readGasConfigFile", async (event, args) => { return await tekuGasLimitConfig.readGasConfigFile(args); }); +ipcMain.handle("fetchObolCharonAlerts", async () => { + return await monitoring.fetchObolCharonAlerts(); +}); + // Scheme must be registered before the app is ready protocol.registerSchemesAsPrivileged([{ scheme: "app", privileges: { secure: true, standard: true } }]); diff --git a/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue b/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue index ce4ac9b74..03953c454 100755 --- a/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue +++ b/launcher/src/components/UI/node-page/components/alert/NodeAlert.vue @@ -8,31 +8,27 @@
green
green
green
@@ -56,7 +52,7 @@
@@ -71,7 +67,7 @@
@@ -87,7 +83,7 @@ -