From 3cffe5c7801e7045c5148dbee5927493fba0783f Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 10 Jan 2025 15:35:29 +0100 Subject: [PATCH] ruler: increase retries backoff limit to 1m (#10403) * ruler: increase retries backoff limit to 1m the previous limit of 2s is too small and doesn't end up spreading out retries for long enough Signed-off-by: Dimitar Dimitrov * Update CHANGELOG.md entry Signed-off-by: Dimitar Dimitrov --------- Signed-off-by: Dimitar Dimitrov (cherry picked from commit 8bedb97dd18aa7b12863caa4c413b52c81cbc8ec) --- pkg/ruler/remotequerier.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/ruler/remotequerier.go b/pkg/ruler/remotequerier.go index f02eb4d866..3616d88b0d 100644 --- a/pkg/ruler/remotequerier.go +++ b/pkg/ruler/remotequerier.go @@ -363,8 +363,9 @@ func (q *RemoteQuerier) sendRequest(ctx context.Context, req *httpgrpc.HTTPReque return nil, fmt.Errorf("couldn't reserve a retry token") } // We want to wait at least the time for the backoff, but also don't want to exceed the rate limit. - // All of this is capped to the max backoff, so that we are less likely to overrun into the next evaluation. - retryDelay := max(retry.NextDelay(), min(retryConfig.MaxBackoff, retryReservation.Delay())) + // All of this is capped to 1m, so that we are less likely to overrun into the next evaluation. + // 1m was selected as giving enough time to spread out the retries. + retryDelay := max(retry.NextDelay(), min(time.Minute, retryReservation.Delay())) level.Warn(logger).Log("msg", "failed to remotely evaluate query expression, will retry", "err", err, "retry_delay", retryDelay) select { case <-time.After(retryDelay):