Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Vegeta rates / targets to SLA in performance tests #14429

Merged
merged 10 commits into from
Jan 15, 2024
11 changes: 9 additions & 2 deletions test/performance/benchmarks/dataplane-probe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ LOOP:
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults, t.slaMin, t.slaMax); err != nil {
if err := checkSLA(metricResults, t.slaMin, t.slaMax, rate, *duration); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand All @@ -168,7 +168,7 @@ LOOP:
log.Println("Dataplane probe test finished")
}

func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duration) error {
func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duration, rate vegeta.ConstantPacer, duration time.Duration) error {
// SLA 1: The p95 latency hitting the target has to be between the range defined
// in the target map on top.
if results.Latencies.P95 >= slaMin && results.Latencies.P95 <= slaMax {
Expand All @@ -177,5 +177,12 @@ func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duratio
return fmt.Errorf("SLA 1 failed. P95 latency is not in %d-%dms time range: %s", slaMin, slaMax, results.Latencies.P95)
}

// SLA 2: making sure the defined total request is met
if results.Requests == uint64(rate.Rate(time.Second)*duration.Seconds()) {
log.Printf("SLA 2 passed. vegeta total request is %d", results.Requests)
} else {
return fmt.Errorf("SLA 2 failed. vegeta total request is %d, expected total request is %f", results.Requests, rate.Rate(time.Second)*duration.Seconds())
}

return nil
}
17 changes: 15 additions & 2 deletions test/performance/benchmarks/load-test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func main() {
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults); err != nil {
if err := checkSLA(metricResults, pacers, durations); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand Down Expand Up @@ -156,7 +156,7 @@ func processResults(ctx context.Context, results <-chan *vegeta.Result, reporter
}
}

func checkSLA(results *vegeta.Metrics) error {
func checkSLA(results *vegeta.Metrics, pacers []vegeta.Pacer, durations []time.Duration) error {
// SLA 1: the p95 latency has to be over the 0->3k stepped burst
// falls in the +15ms range (we sleep 100 ms, so 100-115ms).
// This includes a mix of cold-starts and steady state (once the autoscaling decisions have leveled off).
Expand All @@ -183,5 +183,18 @@ func checkSLA(results *vegeta.Metrics) error {
return fmt.Errorf("SLA 3 failed. Errors occurred: %d", len(results.Errors))
}

// SLA 4: making sure the defined vegeta total requests is met
var expectedSum float64
var expectedRequests uint64
for i := 0; i < len(pacers); i++ {
expectedSum = expectedSum + pacers[i].Rate(time.Second)*durations[i].Seconds()
}
expectedRequests = uint64(expectedSum)
if results.Requests >= expectedRequests-(expectedRequests/1000) && results.Requests <= expectedRequests+(expectedRequests/1000) {
ReToCode marked this conversation as resolved.
Show resolved Hide resolved
log.Printf("SLA 4 passed. total requests is in %d-%d range", expectedRequests-(expectedRequests/1000), expectedRequests+(expectedRequests/1000))
} else {
return fmt.Errorf("SLA 4 failed. total requests is %d, not in %d-%d range", results.Requests, expectedRequests-(expectedRequests/1000), expectedRequests+(expectedRequests/1000))
}

return nil
}
11 changes: 9 additions & 2 deletions test/performance/benchmarks/real-traffic-test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ LOOP:
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults); err != nil {
if err := checkSLA(metricResults, rate); err != nil {
cleanup()
influxReporter.FlushAndShutdown()
log.Fatal(err.Error())
Expand Down Expand Up @@ -289,13 +289,20 @@ func getRandomBool() bool {
return rand.Intn(2) == 1
}

func checkSLA(results *vegeta.Metrics) error {
func checkSLA(results *vegeta.Metrics, rate vegeta.ConstantPacer) error {
// SLA 1: All requests should pass successfully.
if len(results.Errors) == 0 {
log.Println("SLA 1 passed. No errors occurred")
} else {
return fmt.Errorf("SLA 1 failed. Errors occurred: %d", len(results.Errors))
}

// SLA 2: making sure the defined vegeta rates is met
if results.Rate == rate.Rate(time.Second) {
log.Printf("SLA 2 passed. vegeta rate is %f", rate.Rate(time.Second))
} else {
return fmt.Errorf("SLA 2 failed. vegeta rate is %f, expected Rate is %f", results.Rate, rate.Rate(time.Second))
}

return nil
}
12 changes: 10 additions & 2 deletions test/performance/benchmarks/rollout-probe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"flag"
"fmt"
"log"
"math"
"net/http"
"os"
"strings"
Expand Down Expand Up @@ -210,7 +211,7 @@ LOOP:
influxReporter.AddDataPointsForMetrics(metricResults, benchmarkName)
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

if err := checkSLA(metricResults); err != nil {
if err := checkSLA(metricResults, rate); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand All @@ -219,7 +220,7 @@ LOOP:
log.Println("Load test finished")
}

func checkSLA(results *vegeta.Metrics) error {
func checkSLA(results *vegeta.Metrics, rate vegeta.ConstantPacer) error {
// SLA 1: The p95 latency hitting a Knative Service
// going through either JUST the queue-proxy or BOTH the activator and queue-proxy
// falls in the +10ms range. Given that we sleep 100ms, the SLA is between 100-110ms.
Expand All @@ -229,5 +230,12 @@ func checkSLA(results *vegeta.Metrics) error {
return fmt.Errorf("SLA 1 failed. P95 latency is not in 100-110ms time range: %s", results.Latencies.P95)
}

// SLA 2: making sure the defined vegeta rates is met
if math.Round(results.Rate) == rate.Rate(time.Second) {
log.Printf("SLA 2 passed. vegeta rate is %f", rate.Rate(time.Second))
} else {
return fmt.Errorf("SLA 2 failed. vegeta rate is %f, expected Rate is %f", results.Rate, rate.Rate(time.Second))
}

return nil
}
11 changes: 9 additions & 2 deletions test/performance/benchmarks/scale-from-zero/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ func main() {
_ = vegeta.NewTextReporter(metricResults).Report(os.Stdout)

sla := slas[*parallelCount]
if err := checkSLA(metricResults, sla.p95min, sla.p95max, sla.latencyMax); err != nil {
if err := checkSLA(metricResults, sla.p95min, sla.p95max, sla.latencyMax, *parallelCount); err != nil {
// make sure to still write the stats
influxReporter.FlushAndShutdown()
log.Fatalf(err.Error())
Expand Down Expand Up @@ -343,7 +343,7 @@ func runScaleFromZero(ctx context.Context, clients *test.Clients, idx int, ro *v
}
}

func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duration, latencyMax time.Duration) error {
func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duration, latencyMax time.Duration, parallel int) error {
// SLA 1: The p95 latency hitting the target has to be between the range defined
if results.Latencies.P95 >= p95min && results.Latencies.P95 <= p95max {
log.Printf("SLA 1 passed. P95 latency is in %d-%dms time range", p95min, p95max)
Expand All @@ -358,5 +358,12 @@ func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duratio
return fmt.Errorf("SLA 2 failed. Max latency is higher than %dms: %s", latencyMax, results.Latencies.Max)
}

// SLA 3: making sure the defined vegeta total requests is met, the defined vegeta total requests should equal to the count of ksvcs we want to run scale-from-zero in parallel
if results.Requests == uint64(parallel) {
xiangpingjiang marked this conversation as resolved.
Show resolved Hide resolved
log.Printf("SLA 3 passed. total requests is %d", results.Requests)
} else {
return fmt.Errorf("SLA 3 failed. total requests is %d, expected total requests is %d", results.Requests, uint64(parallel))
}

return nil
}
Loading