Skip to content

Commit

Permalink
migrate: a mash of detail fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
consideRatio committed Apr 8, 2024
1 parent 9d96e51 commit 854aee1
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 37 deletions.
4 changes: 2 additions & 2 deletions dashboards/cluster.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,10 @@ dashboard.new('Cluster Information')
]),
row.new('Node Stats')
+ row.withPanels([
nodeCPUCommit,
nodeMemoryCommit,
nodeCPUUtil,
nodeMemoryUtil,
nodeCPUCommit,
nodeMemoryCommit,
]),
],
panelWidth=12,
Expand Down
12 changes: 6 additions & 6 deletions dashboards/jupyterhub.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ local currentActiveUsers =
|||
)
+ ts.standardOptions.withDecimals(0)
// FIXME: not migrated config stack=true,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand Down Expand Up @@ -51,7 +51,7 @@ local dailyActiveUsers =
)
// FIXME: not migrated config legend_hideZero=false,
+ ts.standardOptions.withDecimals(0)
// FIXME: not migrated config stack=true,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand All @@ -76,7 +76,7 @@ local weeklyActiveUsers =
)
// FIXME: not migrated config legend_hideZero=false,
+ ts.standardOptions.withDecimals(0)
// FIXME: not migrated config stack=true,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand All @@ -101,7 +101,7 @@ local monthlyActiveUsers =
)
// FIXME: not migrated config legend_hideZero=false,
+ ts.standardOptions.withDecimals(0)
// FIXME: not migrated config stack=true,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand Down Expand Up @@ -380,7 +380,7 @@ local nonRunningPods =
|||
)
// decimalsY1=0,
// FIXME: not migrated config stack=true,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand Down Expand Up @@ -560,7 +560,7 @@ local notebookImagesUsed =
)
// FIXME: not migrated config legend_hideZero=false,
+ ts.standardOptions.withDecimals(0)
// FIXME: not migrated config stack=false,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand Down
16 changes: 8 additions & 8 deletions dashboards/support.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ local nfsServerWriteLatency =
|||
sum(rate(node_disk_write_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_writes_completed_total{job="prometheus-nfsd-server"}[5m])) by (device)
|||
+ prometheus.withLegendFormat('{{ device }}'),
),
)
+ prometheus.withLegendFormat('{{ device }}'),
]);

local nfsServerReadLatency =
Expand All @@ -107,8 +107,8 @@ local nfsServerReadLatency =
|||
sum(rate(node_disk_read_time_seconds_total{job="prometheus-nfsd-server"}[5m])) by (device) / sum(rate(node_disk_reads_completed_total{job="prometheus-nfsd-server"}[5m])) by (device)
|||
+ prometheus.withLegendFormat('{{ device }}'),
),
)
+ prometheus.withLegendFormat('{{ device }}'),
]);

// Support Metrics
Expand Down Expand Up @@ -161,15 +161,15 @@ local prometheusNetwork =
|||
sum(rate(container_network_receive_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))
|||
+ prometheus.withLegendFormat('receive'),
),
)
+ prometheus.withLegendFormat('receive'),
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(rate(container_network_send_bytes_total{pod=~"support-prometheus-server-.*",namespace="support"}[5m]))
|||
+ prometheus.withLegendFormat('send'),
),
)
+ prometheus.withLegendFormat('send'),
]);

dashboard.new('NFS and Support Information')
Expand Down
22 changes: 8 additions & 14 deletions dashboards/usage-report.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,12 @@ local prometheus = grafonnet.query.prometheus;

local common = import './common.libsonnet';

// FIXME: apply threshold coloring, provided like this historically, for all
// four panels in this dashboard
// FIXME: not migrated config thresholds, should apply to all panels
// thresholds=[
// {
// value: 0,
// color: 'green',
// },
// ]
// IDEAS:
// + barGauge.standardOptions.thresholds.withMode('percentage')
// + barGauge.standardOptions.color.withMode('continuous-greens')
//

local memoryUsageUserPods =
common.barGaugeOptions
+ barGauge.new('User pod memory usage')
+ barGauge.standardOptions.withUnit('bytes')
+ barGauge.standardOptions.color.withMode('fixed')
+ barGauge.standardOptions.color.withFixedColor('green')
+ barGauge.queryOptions.withTargets([
// Computes sum of pod memory requests, grouped by username, for notebook pods
prometheus.new(
Expand Down Expand Up @@ -53,6 +41,8 @@ local memoryUsageDaskWorkerPods =
common.barGaugeOptions
+ barGauge.new('Dask-gateway worker pod memory usage')
+ barGauge.standardOptions.withUnit('bytes')
+ barGauge.standardOptions.color.withMode('fixed')
+ barGauge.standardOptions.color.withFixedColor('green')
+ barGauge.queryOptions.withTargets([
// Computes sum of pod memory requests, grouped by username, and dask-gateway cluster
// for dask-gateway worker pods
Expand Down Expand Up @@ -85,6 +75,8 @@ local memoryUsageDaskSchedulerPods =
common.barGaugeOptions
+ barGauge.new('Dask-gateway scheduler pod memory usage')
+ barGauge.standardOptions.withUnit('bytes')
+ barGauge.standardOptions.color.withMode('fixed')
+ barGauge.standardOptions.color.withFixedColor('green')
+ barGauge.queryOptions.withTargets([
// Computes sum of pod memory requests, grouped by username, and dask-gateway cluster
// for dask-gateway scheduler pods
Expand Down Expand Up @@ -117,6 +109,8 @@ local memoryUsageGPUPods =
common.barGaugeOptions
+ barGauge.new('GPU pod memory usage')
+ barGauge.standardOptions.withUnit('bytes')
+ barGauge.standardOptions.color.withMode('fixed')
+ barGauge.standardOptions.color.withFixedColor('green')
+ barGauge.queryOptions.withTargets([
// Computes sum of pod memory requests, grouped by username for notebook gpu pods
prometheus.new(
Expand Down
9 changes: 2 additions & 7 deletions global-dashboards/global-usage-stats.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,8 @@ local prometheus = grafonnet.query.prometheus;
function(datasources)
local weeklyActiveUsers =
barGauge.new('Active users (over 7 days)')
// FIXME: not migrated config thresholds
//thresholds=[
// {
// value: 0,
// color: 'green',
// },
//],
+ barGauge.standardOptions.color.withMode('fixed')
+ barGauge.standardOptions.color.withFixedColor('green')
+ barGauge.queryOptions.withInterval('7d')
+ barGauge.queryOptions.withTargets([
prometheus.new(
Expand Down

0 comments on commit 854aee1

Please sign in to comment.