Skip to content

Commit

Permalink
Merge pull request #652 from gulfofmaine/backoff
Browse files Browse the repository at this point in the history
Exponential backoff from timeouts
  • Loading branch information
abkfenris authored Dec 22, 2022
2 parents 82990b1 + 3020bec commit 758dc29
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 21 deletions.
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Changes:
Fixes:

- Avoids scheduling additional dataset or server refreshes if there is already a refresh task in the queue. This works by when the `/refresh/` view is triggered, it instead schedules a quick task that then checks if the refresh is scheduled (or active or reserved). If it is already scheduled it throws a warning, otherwise it schedules the task.
- Add exponential backoff when timeout related errors are encountered.

## 0.4.11 - 12/20/2022

Expand Down
68 changes: 47 additions & 21 deletions app/deployments/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,10 @@ def update_values_for_timeseries(timeseries):
return

except Timeout as error:
logger.error(
(
f"Timeout when trying to retrieve dataset {timeseries[0].dataset.name} "
f"with constraint {timeseries[0].constraints}: {error}"
),
extra={
"timeseries": timeseries,
"constraints": timeseries[0].constraints,
},
exc_info=True,
raise BackoffError(
f"Timeout when trying to retrieve dataset {timeseries[0].dataset.name} "
f"with constraint {timeseries[0].constraints}: {error}",
)
return

except OSError as error:
logger.error(
Expand Down Expand Up @@ -130,7 +122,19 @@ def refresh_dataset(dataset_id: int, healthcheck: bool = False):

for constraints, timeseries in groups.items():
time.sleep(request_refresh_time_seconds)
update_values_for_timeseries(timeseries)

try:
update_values_for_timeseries(timeseries)
except BackoffError:
new_request_refresh_time_seconds = max(request_refresh_time_seconds, 1) * 2
logger.error(
f"Some form of timeout encountered while refreshing dataset {dataset_id}"
f"Increasing backoff from {request_refresh_time_seconds} to "
"{new_request_refresh_time_seconds}",
extra={"timeseries": timeseries, "constraints": constraints},
exc_info=True,
)
request_refresh_time_seconds = new_request_refresh_time_seconds

if healthcheck:
dataset.healthcheck_complete()
Expand Down Expand Up @@ -335,6 +339,10 @@ def error_extra(timeseries_group, compare_text: str = None):
return extra


class BackoffError(Exception):
"""Raise when a timeout occurs to trigger a backoff and slow down requests"""


def handle_500_unrecognized_constraint(timeseries_group, compare_text: str) -> bool:
"""Handle when one of the constraints is invalid
Expand Down Expand Up @@ -371,29 +379,44 @@ def handle_500_errors(timeseries_group, compare_text: str) -> bool:
return False


def handle_400_errors(timeseries_group, compare_text: str) -> bool:
def handle_400_errors(timeseries_group, compare_text: str, error: Exception) -> bool:
"""Handle various types of known 400 errors"""
if handle_400_unrecognized_variable(timeseries_group, compare_text):
return True

if handle_404_errors(timeseries_group, compare_text):
return True

if handle_429_too_many_requests(timeseries_group, compare_text):
if handle_429_too_many_requests(timeseries_group, compare_text, error):
return True

return False


def handle_429_too_many_requests(timeseries_group, compare_text: str) -> bool:
def handle_408_request_timeout(
timeseries_group,
compare_text: str,
error: Exception,
) -> bool:
"""Handle 408 timeouts"""
if "code=408" in compare_text and "TimeoutException" in compare_text:
raise BackoffError(
f"Too many requests to server {timeseries_group[0].dataset.server}",
) from error

return False


def handle_429_too_many_requests(
timeseries_group,
compare_text: str,
error: Exception,
) -> bool:
"""Too many requests too quickly to the server"""
if "Too Many Requests" in compare_text and "code=429" in compare_text:
logger.error(
raise BackoffError(
f"Too many requests to server {timeseries_group[0].dataset.server}",
extra=error_extra(timeseries_group, compare_text),
exc_info=True,
)
return True
) from error

return False

Expand Down Expand Up @@ -519,6 +542,9 @@ def handle_http_errors(timeseries_group, error: HTTPError) -> bool:
)
return True

if error.response.status_code == 408:
raise BackoffError("408 Backoff encountered") from error

if error.response.status_code == 500:
url = error.request.url

Expand Down Expand Up @@ -550,7 +576,7 @@ def handle_http_errors(timeseries_group, error: HTTPError) -> bool:
except AttributeError:
pass

if handle_400_errors(timeseries_group, str(error)):
if handle_400_errors(timeseries_group, str(error), error):
return True

if handle_500_errors(timeseries_group, str(error)):
Expand Down

0 comments on commit 758dc29

Please sign in to comment.