From bc1504a2f738d2fb2227623eed003865de9fd75f Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Fri, 14 Jun 2024 15:35:16 -0400 Subject: [PATCH] roachtest: ignore workload for 5 mins after start in wal failover Previously, we'd look at p99 latencies for the workload since its very start, in the disk-stall/wal-failover roachtest. This was relatively ambitious as the workload is a high-concurrency kv workload with no ramping period at the start, so the chance of high p99 latency even under normal performance is high. This change ignores the workload's metrics from the first 5 mins of the workload (as opposed to just the first minute), and explicitly adds a 1min ramp period to the workload where concurrency is gradually increased. Fixes #124977. Epic: none Release note: None --- pkg/cmd/roachtest/tests/disk_stall.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index 21ba32b449e0..e2ba7954c782 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -103,7 +103,7 @@ func runDiskStalledWALFailover( m := c.NewMonitor(ctx, c.Range(1, 3)) m.Go(func(ctx context.Context) error { c.Run(ctx, option.WithNodes(c.Node(4)), `./cockroach workload run kv --read-percent 0 `+ - `--duration 60m --concurrency 4096 --max-rate 4096 --tolerate-errors `+ + `--duration 60m --concurrency 4096 --ramp=1m --max-rate 4096 --tolerate-errors `+ ` --min-block-bytes=2048 --max-block-bytes=2048 --timeout 1s `+ `{pgurl:1-3}`) return nil @@ -160,7 +160,7 @@ func runDiskStalledWALFailover( } data := mustGetMetrics(ctx, c, t, adminURL, - workloadStartAt.Add(time.Minute), + workloadStartAt.Add(5*time.Minute), timeutil.Now().Add(-time.Minute), []tsQuery{ {name: "cr.node.sql.exec.latency-p99.99", queryType: total, sources: []string{"2"}},