Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatic retries #182

Merged
merged 4 commits into from
Apr 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 97 additions & 1 deletion lib/aws/client.ex
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,108 @@ defmodule AWS.Client do
%{client | http_client: http_client}
end

@doc """
Makes a HTTP request using the specified client.

## Retries and options.
The option `:enable_retries?` enables request retries on known errors such as 500s.

* `enable_retries?` - Defaults to `false`.
* `retry_opts` - the options to configure retries in case of errors. This uses exponential backoff with jitter.
* `:max_retries` - the maximum number of retries (plus the initial request). Defaults to `10`.
* `:base_sleep_time` - the base sleep time in milliseconds. Defaults to `5`.
* `:cap_sleep_time` - the maximum sleep time between atttempts. Defaults to `5_000`.

See "FullJitter" at: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/

## Examples

iex> AWS.Client.request(client, :post, url, payload, headers, options)
{:ok, %{status_code: 200, body: body}}

iex> AWS.Client.request(client, :post, url, payload, headers, enable_retries?: true)
{:ok, %{status_code: 200, body: body}}

iex> AWS.Client.request(client, :post, url, payload, headers, enable_retries?: true, retry_opts: [max_retries: 3])
{:ok, %{status_code: 200, body: body}}

"""
def request(client, method, url, body, headers, opts \\ []) do
Doerge marked this conversation as resolved.
Show resolved Hide resolved
# Pop off all retry-related options from opts, so they aren't passed to the HTTP client.
{enable_retries?, opts} = Keyword.pop(opts, :enable_retries?, false)
Doerge marked this conversation as resolved.
Show resolved Hide resolved
{retry_num, opts} = Keyword.pop(opts, :retry_num, 0)
{retry_opts, opts} = Keyword.pop(opts, :retry_opts, [])
# Defaults for retry_opts
retry_opts =
Keyword.merge([max_retries: 10, base_sleep_time: 5, cap_sleep_time: 5_000], retry_opts)
Doerge marked this conversation as resolved.
Show resolved Hide resolved

# HTTP Client options
{mod, options} = Map.fetch!(client, :http_client)
options = Keyword.merge(options, opts)
apply(mod, :request, [method, url, body, headers, options])

resp = apply(mod, :request, [method, url, body, headers, options])

retriable?(resp)
|> case do
:ok ->
resp

:retry ->
if enable_retries? and should_retry?(retry_num, retry_opts) do
updated_opts =
Keyword.merge(opts,
retry_num: retry_num + 1,
enable_retries?: enable_retries?,
retry_opts: retry_opts
)

request(client, method, url, body, headers, updated_opts)
else
resp
end

:error ->
resp
end
end

def should_retry?(retry_num, retry_opts) do
max_retries = Keyword.fetch!(retry_opts, :max_retries)

if retry_num >= max_retries do
# The max-limit of retries has been reached. Give up.
false
else
# Sleep and retry
base_sleep_time = Keyword.fetch!(retry_opts, :base_sleep_time)
cap_sleep_time = Keyword.fetch!(retry_opts, :cap_sleep_time)

# This equivalent to "FullJitter" in https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
max_sleep_time =
min(cap_sleep_time, base_sleep_time * :math.pow(2, retry_num))
|> round()
Doerge marked this conversation as resolved.
Show resolved Hide resolved

Enum.random(0..max_sleep_time)
|> Process.sleep()

true
end
end

# Retry on 500
defp retriable?({:ok, %{status_code: status}}) when status >= 500, do: :retry
# Hackney specific
defp retriable?({:error, :closed}), do: :retry
defp retriable?({:error, :connect_timeout}), do: :retry
defp retriable?({:error, :checkout_timeout}), do: :retry
# Finch/Mint specific
defp retriable?({:error, %{reason: :closed}}), do: :retry
defp retriable?({:error, %{reason: :timeout}}), do: :retry
# Do not retry on other erors
defp retriable?({:error, _}), do: :error
defp retriable?({:ok, _}), do: :ok
defp retriable?({:ok, _, _}), do: :ok

def encode!(_client, payload, :query), do: AWS.Util.encode_query(payload)

def encode!(client, payload, format) do
Expand Down
46 changes: 46 additions & 0 deletions test/aws/client_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,52 @@ defmodule AWS.ClientTest do
# Bypass should not assert on the request having been completed.
Bypass.pass(bypass)
end

test "retries failed requests", %{client: client, bypass: bypass} do
# Setup a Counter
counter = :counters.new(1, [:atomics])

Bypass.expect(bypass, "GET", "/timeout", fn conn ->
# Increase counter
:counters.add(counter, 1, 1)
# Return 500 so we force a retry from AWS.Client
Plug.Conn.resp(conn, 500, "")
end)

assert {:ok, %{status_code: 500}} =
AWS.Client.request(client, :get, "#{url(bypass)}timeout", "", [],
# Enable retries
enable_retries?: true
)

# Assert 1 request + 10 (default max_retries value) retries was made
assert :counters.get(counter, 1) == 1 + 10
end

test "retries failed requests with retry_opts", %{client: client, bypass: bypass} do
# Setup a Counter
counter = :counters.new(1, [:atomics])

Bypass.expect(bypass, "GET", "/timeout", fn conn ->
# Increase counter
:counters.add(counter, 1, 1)
# Return 500 so we force a retry from AWS.Client
Plug.Conn.resp(conn, 500, "")
end)

max_retries = 2

assert {:ok, %{status_code: 500}} =
AWS.Client.request(client, :get, "#{url(bypass)}timeout", "", [],
# Enable retries
enable_retries?: true,
# Set retry options
retry_opts: [max_retries: max_retries, base_sleep_time: 10, cap_sleep_time: 1000]
)

# Assert 1st request + N retries was made
assert :counters.get(counter, 1) == 1 + max_retries
end
end

defp url(bypass), do: "http://localhost:#{bypass.port}/"
Expand Down
Loading