From 4c708bb97308504b507bffa14fb5317fca95575b Mon Sep 17 00:00:00 2001 From: 0yukali0 Date: Sun, 29 Oct 2023 14:31:34 +0800 Subject: [PATCH] [YUNIKORN-1626] Listing Yunikorn metrics revealed in the prometheus --- docs/metrics/queue.md | 6 +- docs/metrics/scheduler.mdx | 191 +++++++++++++++++-------------------- 2 files changed, 92 insertions(+), 105 deletions(-) diff --git a/docs/metrics/queue.md b/docs/metrics/queue.md index 060b4d5fc31..0aa1f7cd36c 100644 --- a/docs/metrics/queue.md +++ b/docs/metrics/queue.md @@ -36,7 +36,7 @@ These application states include `running`, `accepted`, `rejected`, `failed` and **TYPE**: `yunikorn__queue_app` -``` +```json yunikorn_root_default_queue_app{state="accepted"} 3 yunikorn_root_default_queue_app{state="running"} 3 ``` @@ -53,11 +53,11 @@ These resource states include `guaranteed`, `max`, `allocated`, `pending`, `pree **TYPE**: `yunikorn__queue_resource` -``` +```json yunikorn_root_queue_resource{resource="ephemeral-storage",state="max"} 9.41009558e+10 yunikorn_root_queue_resource{resource="hugepages-1Gi",state="max"} 0 yunikorn_root_queue_resource{resource="hugepages-2Mi",state="max"} 0 yunikorn_root_queue_resource{resource="memory",state="max"} 1.6223076352e+10 yunikorn_root_queue_resource{resource="pods",state="max"} 110 yunikorn_root_queue_resource{resource="vcore",state="max"} 8000 -``` \ No newline at end of file +``` diff --git a/docs/metrics/scheduler.mdx b/docs/metrics/scheduler.mdx index 381e0185394..cc186e203b3 100644 --- a/docs/metrics/scheduler.mdx +++ b/docs/metrics/scheduler.mdx @@ -84,7 +84,7 @@ This metric includes latencies, such as `Node sorting`, `Trynode` and `Trypreemp **Subsystem**: `scheduler` -``` +```json yunikorn_scheduler_scheduling_latency_milliseconds_bucket{le="0.0001"} 0 yunikorn_scheduler_scheduling_latency_milliseconds_bucket{le="0.001"} 0 yunikorn_scheduler_scheduling_latency_milliseconds_bucket{le="0.01"} 0 @@ -117,53 +117,47 @@ Latencies including `node sorting`, `application sorting` and `queue sorting`, i ]}> -```json -/* -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.0001"} 5 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.001"} 6 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.01"} 6 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.1"} 6 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="1"} 6 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="10"} 6 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="+Inf"} 6 -yunikorn_scheduler_node_sorting_latency_milliseconds_sum{level="app"} 0.00026345400000000004 -yunikorn_scheduler_node_sorting_latency_milliseconds_count{level="app"} 6 -*/ -``` + ```json + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.0001"} 5 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.001"} 6 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.01"} 6 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="0.1"} 6 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="1"} 6 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="10"} 6 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="app",le="+Inf"} 6 + yunikorn_scheduler_node_sorting_latency_milliseconds_sum{level="app"} 0.00026345400000000004 + yunikorn_scheduler_node_sorting_latency_milliseconds_count{level="app"} 6 + ``` -```json -/* -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.0001"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.001"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.01"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.1"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="1"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="10"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="+Inf"} 3 -yunikorn_scheduler_node_sorting_latency_milliseconds_sum{level="node"} 2.5013999999999998e-05 -yunikorn_scheduler_node_sorting_latency_milliseconds_count{level="node"} 3 -*/ -``` + ```json + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.0001"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.001"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.01"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="0.1"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="1"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="10"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="node",le="+Inf"} 3 + yunikorn_scheduler_node_sorting_latency_milliseconds_sum{level="node"} 2.5013999999999998e-05 + yunikorn_scheduler_node_sorting_latency_milliseconds_count{level="node"} 3 + ``` -```json -/* -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.0001"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.001"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.01"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.1"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="1"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="10"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="+Inf"} 9 -yunikorn_scheduler_node_sorting_latency_milliseconds_sum{level="queue"} 4.0093e-05 -yunikorn_scheduler_node_sorting_latency_milliseconds_count{level="queue"} 9 -*/ -``` + ```json + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.0001"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.001"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.01"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="0.1"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="1"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="10"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_bucket{level="queue",le="+Inf"} 9 + yunikorn_scheduler_node_sorting_latency_milliseconds_sum{level="queue"} 4.0093e-05 + yunikorn_scheduler_node_sorting_latency_milliseconds_count{level="queue"} 9 + ``` @@ -179,7 +173,7 @@ Latency of node condition checks for container allocations, such as placement co **Subsystem**: `scheduler` -``` +```json yunikorn_scheduler_trynode_latency_milliseconds_bucket{le="0.0001"} 0 yunikorn_scheduler_trynode_latency_milliseconds_bucket{le="0.001"} 0 yunikorn_scheduler_trynode_latency_milliseconds_bucket{le="0.01"} 0 @@ -201,7 +195,8 @@ Latency of preemption condition checks for container allocations, in millisecond **Namespace**: `yunikorn` **Subsystem**: `scheduler` -``` + +```json yunikorn_scheduler_trypreemption_latency_milliseconds_bucket{le="0.0001"} 0 yunikorn_scheduler_trypreemption_latency_milliseconds_bucket{le="0.001"} 0 yunikorn_scheduler_trypreemption_latency_milliseconds_bucket{le="0.01"} 0 @@ -249,78 +244,70 @@ Total resource usage of node, by resource name. ]}> -```json -yunikorn_scheduler_ephemeral_storage_node_usage_total -/* -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(10%, 20%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(20%,30%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(30%,40%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(40%,50%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(50%,60%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(60%,70%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(70%,80%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(80%,90%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(90%,100%]"} 0 -yunikorn_scheduler_ephemeral_storage_node_usage_total{range="[0,10%]"} 1 -*/ -``` + ```json + yunikorn_scheduler_ephemeral_storage_node_usage_total + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(10%, 20%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(20%,30%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(30%,40%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(40%,50%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(50%,60%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(60%,70%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(70%,80%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(80%,90%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="(90%,100%]"} 0 + yunikorn_scheduler_ephemeral_storage_node_usage_total{range="[0,10%]"} 1 + ``` -```json -yunikorn_scheduler_memory_node_usage_total -/* -yunikorn_scheduler_memory_node_usage_total{range="(10%, 20%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(20%,30%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(30%,40%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(40%,50%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(50%,60%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(60%,70%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(70%,80%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(80%,90%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="(90%,100%]"} 0 -yunikorn_scheduler_memory_node_usage_total{range="[0,10%]"} 1 -*/ -``` + ```json + yunikorn_scheduler_memory_node_usage_total + yunikorn_scheduler_memory_node_usage_total{range="(10%, 20%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(20%,30%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(30%,40%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(40%,50%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(50%,60%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(60%,70%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(70%,80%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(80%,90%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="(90%,100%]"} 0 + yunikorn_scheduler_memory_node_usage_total{range="[0,10%]"} 1 + ``` -```json -yunikorn_scheduler_pods_node_usage_total -/* -yunikorn_scheduler_pods_node_usage_total{range="(10%, 20%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(20%,30%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(30%,40%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(40%,50%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(50%,60%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(60%,70%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(70%,80%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(80%,90%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="(90%,100%]"} 0 -yunikorn_scheduler_pods_node_usage_total{range="[0,10%]"} 1 -*/ -``` + ```json + yunikorn_scheduler_pods_node_usage_total + yunikorn_scheduler_pods_node_usage_total{range="(10%, 20%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(20%,30%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(30%,40%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(40%,50%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(50%,60%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(60%,70%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(70%,80%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(80%,90%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="(90%,100%]"} 0 + yunikorn_scheduler_pods_node_usage_total{range="[0,10%]"} 1 + ``` -```json -yunikorn_scheduler_vcore_node_usage_total -/* -yunikorn_scheduler_vcore_node_usage_total{range="(10%, 20%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(20%,30%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(30%,40%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(40%,50%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(50%,60%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(60%,70%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(70%,80%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(80%,90%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="(90%,100%]"} 0 -yunikorn_scheduler_vcore_node_usage_total{range="[0,10%]"} 1 -*/ -``` + ```json + yunikorn_scheduler_vcore_node_usage_total + yunikorn_scheduler_vcore_node_usage_total{range="(10%, 20%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(20%,30%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(30%,40%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(40%,50%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(50%,60%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(60%,70%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(70%,80%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(80%,90%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="(90%,100%]"} 0 + yunikorn_scheduler_vcore_node_usage_total{range="[0,10%]"} 1 + ```