From 4991729ef9bcf1b90be0ff6164ba79deac640b50 Mon Sep 17 00:00:00 2001
From: Kushal Shukla <kushalshukla110@gmail.com>
Date: Wed, 18 Sep 2024 23:27:38 +0530
Subject: [PATCH 1/4] Signed-off-by: kushal shukla <kushalshukla110@gmail.com>

Replaced Old metrics with the new ones
---
 .../prombench/benchmark/6_loadgen.yaml        | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/prombench/manifests/prombench/benchmark/6_loadgen.yaml b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
index 0c3b4d6ff..f70bf0b10 100644
--- a/prombench/manifests/prombench/benchmark/6_loadgen.yaml
+++ b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
@@ -48,6 +48,30 @@ data:
         - expr: histogram_quantile(0.99, sum by(path, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(path, method, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(instance, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
+      - name: arithmetic operation
+        interval: 10s
+        type: instant
+        queries:
+        - expr: sum by (container, pod) (rate(container_cpu_usage_seconds_total[5m])) / sum by (container, pod) (container_spec_cpu_quota)
+        - expr: rate(node_network_receive_bytes_total[5m]) * 60
+        - expr: sum(rate(storage_operation_duration_seconds_sum[5m])) + sum(rate(storage_operation_duration_seconds_count[5m]))
+        - expr: sum(rate(kubelet_runtime_operations_duration_seconds_sum[5m])) by (operation_type) / sum(rate(kubelet_runtime_operations_duration_seconds_count[5m])) by (operation_type)  
+      - name: logic_operator
+        interval: 10s
+        type: instant
+        queries:
+        - expr: node_filesystem_avail_bytes{mountpoint="/"} and on(instance) node_filesystem_size_bytes{mountpoint="/"}
+        - expr: container_ulimits_soft{namespace="kube-system"} or container_ulimits_soft{cloud_google_com_gke_nodepool="main-node"}
+        - expr: container_memory_working_set_bytes{namespace=~"^prombench-[0-9]+"} or container_memory_rss{namespace=~"^prombench-[0-9]+"}
+        - expr: rate(node_network_receive_bytes_total{device="eth0"}[5m]) unless rate(node_network_receive_packets_dropped_total{device="eth0"}[5m])
+      - name: topk
+        interval: 10s
+        type: instant
+        queries:
+        - expr: topk(3, sum(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance))
+        - expr: topk(5, sum(container_cpu_usage_seconds_total) by (instance))     
+        - expr: topk(3, sum(container_memory_usage_bytes) by (pod))
+        - expr: topk(5, sum(container_sockets) by (namespace))
 ---
 apiVersion: apps/v1
 kind: Deployment

From 0a30d46bad9be1cea00b6a18fca8d96048398c19 Mon Sep 17 00:00:00 2001
From: Kushal Shukla <kushalshukla110@gmail.com>
Date: Tue, 1 Oct 2024 12:11:14 +0530
Subject: [PATCH 2/4] Signed-off-by: Kushal Shukla <kushalshukla110@gmail.com>

updated metrics with some heavy count
---
 .../prombench/benchmark/6_loadgen.yaml        | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/prombench/manifests/prombench/benchmark/6_loadgen.yaml b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
index f70bf0b10..24939050e 100644
--- a/prombench/manifests/prombench/benchmark/6_loadgen.yaml
+++ b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
@@ -48,30 +48,33 @@ data:
         - expr: histogram_quantile(0.99, sum by(path, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(path, method, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(instance, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
-      - name: arithmetic operation
+      - name: arithmetic_operation
         interval: 10s
         type: instant
         queries:
-        - expr: sum by (container, pod) (rate(container_cpu_usage_seconds_total[5m])) / sum by (container, pod) (container_spec_cpu_quota)
-        - expr: rate(node_network_receive_bytes_total[5m]) * 60
-        - expr: sum(rate(storage_operation_duration_seconds_sum[5m])) + sum(rate(storage_operation_duration_seconds_count[5m]))
-        - expr: sum(rate(kubelet_runtime_operations_duration_seconds_sum[5m])) by (operation_type) / sum(rate(kubelet_runtime_operations_duration_seconds_count[5m])) by (operation_type)  
+        - expr: rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m]) + 100  
+        - expr: rate(go_memstats_frees_total[5m]) * 60
+        - expr: rate(codelab_api_requests_total{method=~"GET|POST"}[5m]) - rate(codelab_api_request_duration_seconds_sum{method=~"GET|POST"}[5m])
+        - expr: rate(go_gc_duration_seconds_sum{job=~"fake-webservers-[0-9]+"}[5m]) / rate(go_gc_duration_seconds_count{job=~"fake-webservers-[0-9]+"}[5m])        
+        - expr: sum by (instance, job) (rate(codelab_api_request_errors_total[5m])) / sum by (instance, job) (rate(go_memstats_mallocs_total[10m]))
       - name: logic_operator
         interval: 10s
         type: instant
         queries:
-        - expr: node_filesystem_avail_bytes{mountpoint="/"} and on(instance) node_filesystem_size_bytes{mountpoint="/"}
-        - expr: container_ulimits_soft{namespace="kube-system"} or container_ulimits_soft{cloud_google_com_gke_nodepool="main-node"}
-        - expr: container_memory_working_set_bytes{namespace=~"^prombench-[0-9]+"} or container_memory_rss{namespace=~"^prombench-[0-9]+"}
-        - expr: rate(node_network_receive_bytes_total{device="eth0"}[5m]) unless rate(node_network_receive_packets_dropped_total{device="eth0"}[5m])
+        - expr: codelab_api_request_duration_seconds_bucket{method="GET"} or codelab_api_request_duration_seconds_bucket{method="POST"}
+        - expr: codelab_api_request_duration_seconds_sum{status="200"} or codelab_api_request_duration_seconds_sum{status="500"}
+        - expr: codelab_api_request_duration_seconds_bucket{status="200"} and codelab_api_request_duration_seconds_bucket{method="GET"}
+        - expr: codelab_api_request_duration_seconds_count{method="POST"} and codelab_api_request_duration_seconds_count{status="500"}
+        - expr: codelab_api_request_duration_seconds_bucket{status="200"} or codelab_api_request_duration_seconds_bucket{method="GET"}
       - name: topk
         interval: 10s
         type: instant
         queries:
-        - expr: topk(3, sum(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance))
-        - expr: topk(5, sum(container_cpu_usage_seconds_total) by (instance))     
-        - expr: topk(3, sum(container_memory_usage_bytes) by (pod))
-        - expr: topk(5, sum(container_sockets) by (namespace))
+        - expr: topk(2000, sum(rate(go_gc_duration_seconds_count[5m])) by (instance, job))
+        - expr: topk(10000, sum(codelab_api_request_duration_seconds_bucket) by (method,job))     
+        - expr: topk(1000, count(go_threads) by (job, instance))
+        - expr: topk(2000, count(codelab_api_http_requests_in_progress) by (job, instance))
+        - expr: topk(15000, count(codelab_api_request_duration_seconds_sum) by (job, instance))
 ---
 apiVersion: apps/v1
 kind: Deployment

From b74ac896fa594072f2f891c3a9a782e5eb806b1e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Sun, 20 Oct 2024 18:01:08 +0100
Subject: [PATCH 3/4] Trim down newly added queries

Slow down arithmetic_operation and logic_operator; take out a few
queries to avoid overloading the server.

Stop querying `_bucket` series directly; those should be used by
`histogram_quantile` or similar.

Use more realistic `k` parameters to `topk`.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 .../prombench/benchmark/6_loadgen.yaml          | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/prombench/manifests/prombench/benchmark/6_loadgen.yaml b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
index 24939050e..fd20d6e6c 100644
--- a/prombench/manifests/prombench/benchmark/6_loadgen.yaml
+++ b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
@@ -49,32 +49,25 @@ data:
         - expr: histogram_quantile(0.99, sum by(path, method, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(instance, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
       - name: arithmetic_operation
-        interval: 10s
+        interval: 30s
         type: instant
         queries:
-        - expr: rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m]) + 100  
         - expr: rate(go_memstats_frees_total[5m]) * 60
         - expr: rate(codelab_api_requests_total{method=~"GET|POST"}[5m]) - rate(codelab_api_request_duration_seconds_sum{method=~"GET|POST"}[5m])
         - expr: rate(go_gc_duration_seconds_sum{job=~"fake-webservers-[0-9]+"}[5m]) / rate(go_gc_duration_seconds_count{job=~"fake-webservers-[0-9]+"}[5m])        
         - expr: sum by (instance, job) (rate(codelab_api_request_errors_total[5m])) / sum by (instance, job) (rate(go_memstats_mallocs_total[10m]))
       - name: logic_operator
-        interval: 10s
+        interval: 30s
         type: instant
         queries:
-        - expr: codelab_api_request_duration_seconds_bucket{method="GET"} or codelab_api_request_duration_seconds_bucket{method="POST"}
         - expr: codelab_api_request_duration_seconds_sum{status="200"} or codelab_api_request_duration_seconds_sum{status="500"}
-        - expr: codelab_api_request_duration_seconds_bucket{status="200"} and codelab_api_request_duration_seconds_bucket{method="GET"}
-        - expr: codelab_api_request_duration_seconds_count{method="POST"} and codelab_api_request_duration_seconds_count{status="500"}
-        - expr: codelab_api_request_duration_seconds_bucket{status="200"} or codelab_api_request_duration_seconds_bucket{method="GET"}
+        - expr: codelab_api_request_duration_seconds_count{method="POST"} unless codelab_api_request_duration_seconds_count{status="500"}
       - name: topk
         interval: 10s
         type: instant
         queries:
-        - expr: topk(2000, sum(rate(go_gc_duration_seconds_count[5m])) by (instance, job))
-        - expr: topk(10000, sum(codelab_api_request_duration_seconds_bucket) by (method,job))     
-        - expr: topk(1000, count(go_threads) by (job, instance))
-        - expr: topk(2000, count(codelab_api_http_requests_in_progress) by (job, instance))
-        - expr: topk(15000, count(codelab_api_request_duration_seconds_sum) by (job, instance))
+        - expr: topk(20, sum(rate(go_gc_duration_seconds_count[5m])) by (instance, job))
+        - expr: topk(10, sum(codelab_api_request_duration_seconds_count) by (method,job))     
 ---
 apiVersion: apps/v1
 kind: Deployment

From c66f3655b8f11c923407f23c96c89b18e80af7eb Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Sun, 20 Oct 2024 18:03:44 +0100
Subject: [PATCH 4/4] Trim down pre-existing queries

For balance, to retain about the same overall load on the server as
before.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 prombench/manifests/prombench/benchmark/6_loadgen.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/prombench/manifests/prombench/benchmark/6_loadgen.yaml b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
index fd20d6e6c..86b72d6cb 100644
--- a/prombench/manifests/prombench/benchmark/6_loadgen.yaml
+++ b/prombench/manifests/prombench/benchmark/6_loadgen.yaml
@@ -20,7 +20,7 @@ data:
         - expr: codelab_api_http_requests_in_progress
         - expr: 'codelab_api_requests_total{method="GET",path="/api/bar",status="200"}'
       - name: aggr_instant
-        interval: 5s
+        interval: 15s
         type: instant
         queries:
         - expr: sum by(image) (container_memory_rss)
@@ -45,7 +45,6 @@ data:
         queries:
         - expr: rate(codelab_api_requests_total{method=~"GET|POST"}[5m])
         - expr: sum without(instance) (rate(codelab_api_requests_total{method=~"GET|POST"}[5m]))
-        - expr: histogram_quantile(0.99, sum by(path, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(path, method, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
         - expr: histogram_quantile(0.99, sum by(instance, le) (rate(codelab_api_request_duration_seconds_bucket{method="POST"}[5m])))
       - name: arithmetic_operation