From 854aee15ad99b41771c91a9caac023b0fdaffee9 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 8 Apr 2024 17:29:44 +0200 Subject: [PATCH] migrate: a mash of detail fixes --- dashboards/cluster.jsonnet | 4 ++-- dashboards/jupyterhub.jsonnet | 12 +++++------ dashboards/support.jsonnet | 16 +++++++------- dashboards/usage-report.jsonnet | 22 +++++++------------- global-dashboards/global-usage-stats.jsonnet | 9 ++------ 5 files changed, 26 insertions(+), 37 deletions(-) diff --git a/dashboards/cluster.jsonnet b/dashboards/cluster.jsonnet index 987c3d2..52f26be 100755 --- a/dashboards/cluster.jsonnet +++ b/dashboards/cluster.jsonnet @@ -447,10 +447,10 @@ dashboard.new('Cluster Information') ]), row.new('Node Stats') + row.withPanels([ - nodeCPUCommit, - nodeMemoryCommit, nodeCPUUtil, nodeMemoryUtil, + nodeCPUCommit, + nodeMemoryCommit, ]), ], panelWidth=12, diff --git a/dashboards/jupyterhub.jsonnet b/dashboards/jupyterhub.jsonnet index 2578b3c..36ab0f0 100755 --- a/dashboards/jupyterhub.jsonnet +++ b/dashboards/jupyterhub.jsonnet @@ -22,7 +22,7 @@ local currentActiveUsers = ||| ) + ts.standardOptions.withDecimals(0) - // FIXME: not migrated config stack=true, + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', @@ -51,7 +51,7 @@ local dailyActiveUsers = ) // FIXME: not migrated config legend_hideZero=false, + ts.standardOptions.withDecimals(0) - // FIXME: not migrated config stack=true, + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', @@ -76,7 +76,7 @@ local weeklyActiveUsers = ) // FIXME: not migrated config legend_hideZero=false, + ts.standardOptions.withDecimals(0) - // FIXME: not migrated config stack=true, + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', @@ -101,7 +101,7 @@ local monthlyActiveUsers = ) // FIXME: not migrated config legend_hideZero=false, + ts.standardOptions.withDecimals(0) - // FIXME: not migrated config stack=true, + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', @@ -380,7 +380,7 @@ local nonRunningPods = ||| ) // decimalsY1=0, - // FIXME: not migrated config stack=true, + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', @@ -560,7 +560,7 @@ local notebookImagesUsed = ) // FIXME: not migrated config legend_hideZero=false, + ts.standardOptions.withDecimals(0) - // FIXME: not migrated config stack=false, + + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', diff --git a/dashboards/support.jsonnet b/dashboards/support.jsonnet index d4bc766..25c980d 100755 --- a/dashboards/support.jsonnet +++ b/dashboards/support.jsonnet @@ -94,8 +94,8 @@ local nfsServerWriteLatency = ||| sum(rate(node_disk_write_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_writes_completed_total{job="prometheus-nfsd-server"}[5m])) by (device) ||| - + prometheus.withLegendFormat('{{ device }}'), - ), + ) + + prometheus.withLegendFormat('{{ device }}'), ]); local nfsServerReadLatency = @@ -107,8 +107,8 @@ local nfsServerReadLatency = ||| sum(rate(node_disk_read_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_reads_completed_total{job="prometheus-nfsd-server"}[5m])) by (device) ||| - + prometheus.withLegendFormat('{{ device }}'), - ), + ) + + prometheus.withLegendFormat('{{ device }}'), ]); // Support Metrics @@ -161,15 +161,15 @@ local prometheusNetwork = ||| sum(rate(container_network_receive_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m])) ||| - + prometheus.withLegendFormat('receive'), - ), + ) + + prometheus.withLegendFormat('receive'), prometheus.new( '$PROMETHEUS_DS', ||| sum(rate(container_network_send_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m])) ||| - + prometheus.withLegendFormat('send'), - ), + ) + + prometheus.withLegendFormat('send'), ]); dashboard.new('NFS and Support Information') diff --git a/dashboards/usage-report.jsonnet b/dashboards/usage-report.jsonnet index 74a936f..9ae0a20 100755 --- a/dashboards/usage-report.jsonnet +++ b/dashboards/usage-report.jsonnet @@ -6,24 +6,12 @@ local prometheus = grafonnet.query.prometheus; local common = import './common.libsonnet'; -// FIXME: apply threshold coloring, provided like this historically, for all -// four panels in this dashboard -// FIXME: not migrated config thresholds, should apply to all panels -// thresholds=[ -// { -// value: 0, -// color: 'green', -// }, -// ] -// IDEAS: -// + barGauge.standardOptions.thresholds.withMode('percentage') -// + barGauge.standardOptions.color.withMode('continuous-greens') -// - local memoryUsageUserPods = common.barGaugeOptions + barGauge.new('User pod memory usage') + barGauge.standardOptions.withUnit('bytes') + + barGauge.standardOptions.color.withMode('fixed') + + barGauge.standardOptions.color.withFixedColor('green') + barGauge.queryOptions.withTargets([ // Computes sum of pod memory requests, grouped by username, for notebook pods prometheus.new( @@ -53,6 +41,8 @@ local memoryUsageDaskWorkerPods = common.barGaugeOptions + barGauge.new('Dask-gateway worker pod memory usage') + barGauge.standardOptions.withUnit('bytes') + + barGauge.standardOptions.color.withMode('fixed') + + barGauge.standardOptions.color.withFixedColor('green') + barGauge.queryOptions.withTargets([ // Computes sum of pod memory requests, grouped by username, and dask-gateway cluster // for dask-gateway worker pods @@ -85,6 +75,8 @@ local memoryUsageDaskSchedulerPods = common.barGaugeOptions + barGauge.new('Dask-gateway scheduler pod memory usage') + barGauge.standardOptions.withUnit('bytes') + + barGauge.standardOptions.color.withMode('fixed') + + barGauge.standardOptions.color.withFixedColor('green') + barGauge.queryOptions.withTargets([ // Computes sum of pod memory requests, grouped by username, and dask-gateway cluster // for dask-gateway scheduler pods @@ -117,6 +109,8 @@ local memoryUsageGPUPods = common.barGaugeOptions + barGauge.new('GPU pod memory usage') + barGauge.standardOptions.withUnit('bytes') + + barGauge.standardOptions.color.withMode('fixed') + + barGauge.standardOptions.color.withFixedColor('green') + barGauge.queryOptions.withTargets([ // Computes sum of pod memory requests, grouped by username for notebook gpu pods prometheus.new( diff --git a/global-dashboards/global-usage-stats.jsonnet b/global-dashboards/global-usage-stats.jsonnet index 2dc25ce..36f17b1 100755 --- a/global-dashboards/global-usage-stats.jsonnet +++ b/global-dashboards/global-usage-stats.jsonnet @@ -9,13 +9,8 @@ local prometheus = grafonnet.query.prometheus; function(datasources) local weeklyActiveUsers = barGauge.new('Active users (over 7 days)') - // FIXME: not migrated config thresholds - //thresholds=[ - // { - // value: 0, - // color: 'green', - // }, - //], + + barGauge.standardOptions.color.withMode('fixed') + + barGauge.standardOptions.color.withFixedColor('green') + barGauge.queryOptions.withInterval('7d') + barGauge.queryOptions.withTargets([ prometheus.new(