From f4783edac5f93637cff434e40a1532f2e4d9d925 Mon Sep 17 00:00:00 2001 From: Aske Date: Sat, 23 Mar 2024 10:52:07 +0100 Subject: [PATCH 1/4] Add retrying to AWS.Client. - Retrying is turned off by default. - Hackney specific errors are retried too. - Add retry test, and format. --- lib/aws/client.ex | 87 +++++++++++++++++++++++++++++++++++++++- test/aws/client_test.exs | 62 ++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/lib/aws/client.ex b/lib/aws/client.ex index 8380df6a..4d7049fb 100644 --- a/lib/aws/client.ex +++ b/lib/aws/client.ex @@ -194,12 +194,97 @@ defmodule AWS.Client do %{client | http_client: http_client} end + @doc """ + Makes a HTTP request using the specified client. + + `opts` may contain the keyword `:enable_retries?` that enables request retries on known + errors such as 500s. + The keyword `:retry_opts` can further supply the arguments: `:max_retries`, `:base_sleep_time` & + `:cap_sleep_time` that control the retry mechanism. This uses exponential backoff with jitter to + sleep in between retry attempts. Default values are: + `retry_opts: [max_retries: 10, base_sleep_time: 5, cap_sleep_time: 5_000]` + + See "FullJitter" at: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + + ### Examples + + iex> AWS.Client.request(client, :post, url, payload, headers, options) + {:ok, %{status_code: 200, body: body}} + + iex> AWS.Client.request(client, :post, url, payload, headers, enable_retries?: true) + {:ok, %{status_code: 200, body: body}} + """ def request(client, method, url, body, headers, opts \\ []) do + # Pop off all retry-related options from opts, so they aren't passed to the HTTP client. + {enable_retries?, opts} = Keyword.pop(opts, :enable_retries?, false) + {retry_num, opts} = Keyword.pop(opts, :retry_num, 0) + {retry_opts, opts} = Keyword.pop(opts, :retry_opts, []) + # Defaults for retry_opts + retry_opts = + Keyword.merge([max_retries: 10, base_sleep_time: 5, cap_sleep_time: 5_000], retry_opts) + + # HTTP Client options {mod, options} = Map.fetch!(client, :http_client) options = Keyword.merge(options, opts) - apply(mod, :request, [method, url, body, headers, options]) + + resp = apply(mod, :request, [method, url, body, headers, options]) + + retriable?(resp) + |> case do + :ok -> + resp + + :retry -> + if enable_retries? and should_retry?(retry_num, retry_opts) do + updated_opts = + Keyword.merge(opts, + retry_num: retry_num + 1, + enable_retries?: enable_retries?, + retry_opts: retry_opts + ) + + request(client, method, url, body, headers, updated_opts) + else + resp + end + + :error -> + resp + end end + def should_retry?(retry_num, retry_opts) do + max_retries = Keyword.fetch!(retry_opts, :max_retries) + + if retry_num >= max_retries do + # The max-limit of retries has been reached. Give up. + false + else + # Sleep and retry + base_sleep_time = Keyword.fetch!(retry_opts, :base_sleep_time) + cap_sleep_time = Keyword.fetch!(retry_opts, :cap_sleep_time) + + # This equivalent to "FullJitter" in https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + max_sleep_time = + min(cap_sleep_time, base_sleep_time * :math.pow(2, retry_num)) + |> round() + + Enum.random(0..max_sleep_time) + |> Process.sleep() + + true + end + end + + defp retriable?({:ok, %{status_code: status}}) when status >= 500, do: :retry + # These are Hackney specific: + defp retriable?({:error, :closed}), do: :retry + defp retriable?({:error, :connect_timeout}), do: :retry + defp retriable?({:error, :checkout_timeout}), do: :retry + defp retriable?({:error, _}), do: :error + defp retriable?({:ok, _}), do: :ok + defp retriable?({:ok, _, _}), do: :ok + def encode!(_client, payload, :query), do: AWS.Util.encode_query(payload) def encode!(client, payload, format) do diff --git a/test/aws/client_test.exs b/test/aws/client_test.exs index 4e9086b0..7a45d89a 100644 --- a/test/aws/client_test.exs +++ b/test/aws/client_test.exs @@ -74,7 +74,69 @@ defmodule AWS.ClientTest do # Bypass should not assert on the request having been completed. Bypass.pass(bypass) end + + test "retries failed requests", %{client: client, bypass: bypass} do + # Start the Agent + TestRetryCounter.start_link(0) + + Bypass.expect(bypass, "GET", "/timeout", fn conn -> + # Increase Agent counter + TestRetryCounter.increment() + # Return 500 so we force a retry from AWS.Client + Plug.Conn.resp(conn, 500, "") + end) + + assert {:ok, %{status_code: 500}} = + AWS.Client.request(client, :get, "#{url(bypass)}timeout", "", [], + # Enable retries + enable_retries?: true + ) + + # Assert 1 request + 10 (default max_retries value) retries was made + assert TestRetryCounter.value() == 1 + 10 + end + + test "retries failed requests with retry_opts", %{client: client, bypass: bypass} do + # Start the Agent + TestRetryCounter.start_link(0) + + Bypass.expect(bypass, "GET", "/timeout", fn conn -> + # Increase Agent counter + TestRetryCounter.increment() + # Return 500 so we force a retry from AWS.Client + Plug.Conn.resp(conn, 500, "") + end) + + max_retries = 2 + + assert {:ok, %{status_code: 500}} = + AWS.Client.request(client, :get, "#{url(bypass)}timeout", "", [], + # Enable retries + enable_retries?: true, + # Set retry options + retry_opts: [max_retries: max_retries, base_sleep_time: 10, cap_sleep_time: 1000] + ) + + # Assert 1st request + N retries was made + assert TestRetryCounter.value() == 1 + max_retries + end end defp url(bypass), do: "http://localhost:#{bypass.port}/" end + +defmodule TestRetryCounter do + use Agent + + def start_link(initial_value) do + Agent.start_link(fn -> initial_value end, name: __MODULE__) + end + + def value do + Agent.get(__MODULE__, & &1) + end + + def increment do + Agent.update(__MODULE__, &(&1 + 1)) + end +end From edadc0324483d58b0b601f3fcd4adf4057a2cd1a Mon Sep 17 00:00:00 2001 From: Aske Doerge Date: Fri, 29 Mar 2024 11:37:13 +0100 Subject: [PATCH 2/4] Switch retries test to using :counters instead of Agent. --- test/aws/client_test.exs | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/test/aws/client_test.exs b/test/aws/client_test.exs index 7a45d89a..2bc510aa 100644 --- a/test/aws/client_test.exs +++ b/test/aws/client_test.exs @@ -76,12 +76,12 @@ defmodule AWS.ClientTest do end test "retries failed requests", %{client: client, bypass: bypass} do - # Start the Agent - TestRetryCounter.start_link(0) + # Setup a Counter + counter = :counters.new(1, [:atomics]) Bypass.expect(bypass, "GET", "/timeout", fn conn -> - # Increase Agent counter - TestRetryCounter.increment() + # Increase counter + :counters.add(counter, 1, 1) # Return 500 so we force a retry from AWS.Client Plug.Conn.resp(conn, 500, "") end) @@ -93,16 +93,16 @@ defmodule AWS.ClientTest do ) # Assert 1 request + 10 (default max_retries value) retries was made - assert TestRetryCounter.value() == 1 + 10 + assert :counters.get(counter, 1) == 1 + 10 end test "retries failed requests with retry_opts", %{client: client, bypass: bypass} do - # Start the Agent - TestRetryCounter.start_link(0) + # Setup a Counter + counter = :counters.new(1, [:atomics]) Bypass.expect(bypass, "GET", "/timeout", fn conn -> - # Increase Agent counter - TestRetryCounter.increment() + # Increase counter + :counters.add(counter, 1, 1) # Return 500 so we force a retry from AWS.Client Plug.Conn.resp(conn, 500, "") end) @@ -118,25 +118,9 @@ defmodule AWS.ClientTest do ) # Assert 1st request + N retries was made - assert TestRetryCounter.value() == 1 + max_retries + assert :counters.get(counter, 1) == 1 + max_retries end end defp url(bypass), do: "http://localhost:#{bypass.port}/" end - -defmodule TestRetryCounter do - use Agent - - def start_link(initial_value) do - Agent.start_link(fn -> initial_value end, name: __MODULE__) - end - - def value do - Agent.get(__MODULE__, & &1) - end - - def increment do - Agent.update(__MODULE__, &(&1 + 1)) - end -end From c89b0b2e18b82e7211e29f4cbc0c1ae94cce8e49 Mon Sep 17 00:00:00 2001 From: Aske Doerge Date: Fri, 29 Mar 2024 11:56:44 +0100 Subject: [PATCH 3/4] Improve doc-string. --- lib/aws/client.ex | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/aws/client.ex b/lib/aws/client.ex index 4d7049fb..46a3e822 100644 --- a/lib/aws/client.ex +++ b/lib/aws/client.ex @@ -197,22 +197,28 @@ defmodule AWS.Client do @doc """ Makes a HTTP request using the specified client. - `opts` may contain the keyword `:enable_retries?` that enables request retries on known - errors such as 500s. - The keyword `:retry_opts` can further supply the arguments: `:max_retries`, `:base_sleep_time` & - `:cap_sleep_time` that control the retry mechanism. This uses exponential backoff with jitter to - sleep in between retry attempts. Default values are: - `retry_opts: [max_retries: 10, base_sleep_time: 5, cap_sleep_time: 5_000]` + ## Retries and options. + The option `:enable_retries?` enables request retries on known errors such as 500s. + + * `enable_retries?` - Defaults to `false`. + * `retry_opts` - the options to configure retries in case of errors. This uses exponential backoff with jitter. + * `:max_retries` - the maximum number of retries (plus the initial request). Defaults to `10`. + * `:base_sleep_time` - the base sleep time in milliseconds. Defaults to `5`. + * `:cap_sleep_time` - the maximum sleep time between atttempts. Defaults to `5_000`. See "FullJitter" at: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ - ### Examples + ## Examples iex> AWS.Client.request(client, :post, url, payload, headers, options) {:ok, %{status_code: 200, body: body}} iex> AWS.Client.request(client, :post, url, payload, headers, enable_retries?: true) {:ok, %{status_code: 200, body: body}} + + iex> AWS.Client.request(client, :post, url, payload, headers, enable_retries?: true, retry_opts: [max_retries: 3]) + {:ok, %{status_code: 200, body: body}} + """ def request(client, method, url, body, headers, opts \\ []) do # Pop off all retry-related options from opts, so they aren't passed to the HTTP client. From f15e775542959714320f3bca191e034e5524e9cd Mon Sep 17 00:00:00 2001 From: Aske Doerge Date: Mon, 1 Apr 2024 13:00:15 +0200 Subject: [PATCH 4/4] Add Finch/Mint specific clauses. --- lib/aws/client.ex | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/aws/client.ex b/lib/aws/client.ex index 46a3e822..8668a822 100644 --- a/lib/aws/client.ex +++ b/lib/aws/client.ex @@ -282,11 +282,16 @@ defmodule AWS.Client do end end + # Retry on 500 defp retriable?({:ok, %{status_code: status}}) when status >= 500, do: :retry - # These are Hackney specific: + # Hackney specific defp retriable?({:error, :closed}), do: :retry defp retriable?({:error, :connect_timeout}), do: :retry defp retriable?({:error, :checkout_timeout}), do: :retry + # Finch/Mint specific + defp retriable?({:error, %{reason: :closed}}), do: :retry + defp retriable?({:error, %{reason: :timeout}}), do: :retry + # Do not retry on other erors defp retriable?({:error, _}), do: :error defp retriable?({:ok, _}), do: :ok defp retriable?({:ok, _, _}), do: :ok