Skip to content

Commit

Permalink
Add Vegeta rates / targets to SLA in performance tests (#14429)
Browse files Browse the repository at this point in the history
* Add Vegeta rates / targets to SLA in performance tests

Signed-off-by: pingjiang <[email protected]>

* dataplane-probe use total request

Signed-off-by: pingjiang <[email protected]>

* fix after review

Signed-off-by: pingjiang <[email protected]>

* delete useless condition

Signed-off-by: pingjiang <[email protected]>

* fix after review

Signed-off-by: pingjiang <[email protected]>

* add a deviation to vegeta total requests test

Signed-off-by: pingjiang <[email protected]>

* add threshold in vegeta total requests check

Signed-off-by: pingjiang <[email protected]>

---------

Signed-off-by: pingjiang <[email protected]>
  • Loading branch information
xiangpingjiang authored Jan 15, 2024
1 parent e5602d7 commit 8162fe2
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 10 deletions.
11 changes: 9 additions & 2 deletions test/performance/benchmarks/dataplane-probe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ LOOP:
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults, t.slaMin, t.slaMax); err != nil {
if err := checkSLA(metricResults, t.slaMin, t.slaMax, rate, *duration); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand All @@ -168,7 +168,7 @@ LOOP:
log.Println("Dataplane probe test finished")
}

func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duration) error {
func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duration, rate vegeta.ConstantPacer, duration time.Duration) error {
// SLA 1: The p95 latency hitting the target has to be between the range defined
// in the target map on top.
if results.Latencies.P95 >= slaMin && results.Latencies.P95 <= slaMax {
Expand All @@ -177,5 +177,12 @@ func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duratio
return fmt.Errorf("SLA 1 failed. P95 latency is not in %d-%dms time range: %s", slaMin, slaMax, results.Latencies.P95)
}

// SLA 2: making sure the defined total request is met
if results.Requests == uint64(rate.Rate(time.Second)*duration.Seconds()) {
log.Printf("SLA 2 passed. vegeta total request is %d", results.Requests)
} else {
return fmt.Errorf("SLA 2 failed. vegeta total request is %d, expected total request is %f", results.Requests, rate.Rate(time.Second)*duration.Seconds())
}

return nil
}
17 changes: 15 additions & 2 deletions test/performance/benchmarks/load-test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func main() {
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults); err != nil {
if err := checkSLA(metricResults, pacers, durations); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand Down Expand Up @@ -156,7 +156,7 @@ func processResults(ctx context.Context, results <-chan *vegeta.Result, reporter
}
}

func checkSLA(results *vegeta.Metrics) error {
func checkSLA(results *vegeta.Metrics, pacers []vegeta.Pacer, durations []time.Duration) error {
// SLA 1: the p95 latency has to be over the 0->3k stepped burst
// falls in the +15ms range (we sleep 100 ms, so 100-115ms).
// This includes a mix of cold-starts and steady state (once the autoscaling decisions have leveled off).
Expand All @@ -183,5 +183,18 @@ func checkSLA(results *vegeta.Metrics) error {
return fmt.Errorf("SLA 3 failed. Errors occurred: %d", len(results.Errors))
}

// SLA 4: making sure the defined vegeta total requests is met
var expectedSum float64
var expectedRequests uint64
for i := 0; i < len(pacers); i++ {
expectedSum = expectedSum + pacers[i].Rate(time.Second)*durations[i].Seconds()
}
expectedRequests = uint64(expectedSum)
if results.Requests >= expectedRequests-(expectedRequests/1000) {
log.Printf("SLA 4 passed. total requests is %d, expected threshold is %d", results.Requests, expectedRequests-(expectedRequests/1000))
} else {
return fmt.Errorf("SLA 4 failed. total requests is %d, expected threshold is %d", results.Requests, expectedRequests-(expectedRequests/1000))
}

return nil
}
11 changes: 9 additions & 2 deletions test/performance/benchmarks/real-traffic-test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ LOOP:
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults); err != nil {
if err := checkSLA(metricResults, rate); err != nil {
cleanup()
influxReporter.FlushAndShutdown()
log.Fatal(err.Error())
Expand Down Expand Up @@ -289,13 +289,20 @@ func getRandomBool() bool {
return rand.Intn(2) == 1
}

func checkSLA(results *vegeta.Metrics) error {
func checkSLA(results *vegeta.Metrics, rate vegeta.ConstantPacer) error {
// SLA 1: All requests should pass successfully.
if len(results.Errors) == 0 {
log.Println("SLA 1 passed. No errors occurred")
} else {
return fmt.Errorf("SLA 1 failed. Errors occurred: %d", len(results.Errors))
}

// SLA 2: making sure the defined vegeta rates is met
if results.Rate == rate.Rate(time.Second) {
log.Printf("SLA 2 passed. vegeta rate is %f", rate.Rate(time.Second))
} else {
return fmt.Errorf("SLA 2 failed. vegeta rate is %f, expected Rate is %f", results.Rate, rate.Rate(time.Second))
}

return nil
}
12 changes: 10 additions & 2 deletions test/performance/benchmarks/rollout-probe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"flag"
"fmt"
"log"
"math"
"net/http"
"os"
"strings"
Expand Down Expand Up @@ -210,7 +211,7 @@ LOOP:
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults); err != nil {
if err := checkSLA(metricResults, rate); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand All @@ -219,7 +220,7 @@ LOOP:
log.Println("Load test finished")
}

func checkSLA(results *vegeta.Metrics) error {
func checkSLA(results *vegeta.Metrics, rate vegeta.ConstantPacer) error {
// SLA 1: The p95 latency hitting a Knative Service
// going through either JUST the queue-proxy or BOTH the activator and queue-proxy
// falls in the +10ms range. Given that we sleep 100ms, the SLA is between 100-110ms.
Expand All @@ -229,5 +230,12 @@ func checkSLA(results *vegeta.Metrics) error {
return fmt.Errorf("SLA 1 failed. P95 latency is not in 100-110ms time range: %s", results.Latencies.P95)
}

// SLA 2: making sure the defined vegeta rates is met
if math.Round(results.Rate) == rate.Rate(time.Second) {
log.Printf("SLA 2 passed. vegeta rate is %f", rate.Rate(time.Second))
} else {
return fmt.Errorf("SLA 2 failed. vegeta rate is %f, expected Rate is %f", results.Rate, rate.Rate(time.Second))
}

return nil
}
11 changes: 9 additions & 2 deletions test/performance/benchmarks/scale-from-zero/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ func main() {
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

sla := slas[*parallelCount]
if err := checkSLA(metricResults, sla.p95min, sla.p95max, sla.latencyMax); err != nil {
if err := checkSLA(metricResults, sla.p95min, sla.p95max, sla.latencyMax, *parallelCount); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand Down Expand Up @@ -343,7 +343,7 @@ func runScaleFromZero(ctx context.Context, clients *test.Clients, idx int, ro *v
}
}

func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duration, latencyMax time.Duration) error {
func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duration, latencyMax time.Duration, parallel int) error {
// SLA 1: The p95 latency hitting the target has to be between the range defined
if results.Latencies.P95 >= p95min && results.Latencies.P95 <= p95max {
log.Printf("SLA 1 passed. P95 latency is in %d-%dms time range", p95min, p95max)
Expand All @@ -358,5 +358,12 @@ func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duratio
return fmt.Errorf("SLA 2 failed. Max latency is higher than %dms: %s", latencyMax, results.Latencies.Max)
}

// SLA 3: making sure the defined vegeta total requests is met, the defined vegeta total requests should equal to the count of ksvcs we want to run scale-from-zero in parallel
if results.Requests == uint64(parallel) {
log.Printf("SLA 3 passed. total requests is %d", results.Requests)
} else {
return fmt.Errorf("SLA 3 failed. total requests is %d, expected total requests is %d", results.Requests, uint64(parallel))
}

return nil
}

0 comments on commit 8162fe2

Please sign in to comment.