Skip to content

Commit

Permalink
ResourceUnavailableJob : met à jour l'URL uniquement en cas de 200 (#…
Browse files Browse the repository at this point in the history
…3632)

* Add failing test case

* Update URL only when status_code = 200

* PR comments

* Document that there is another crontab

* Document follow for ODS resources
  • Loading branch information
AntoineAugusti authored Dec 7, 2023
1 parent f804e27 commit affbcb0
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 27 deletions.
6 changes: 4 additions & 2 deletions apps/transport/lib/db/resource.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ defmodule DB.Resource do
require Logger

typed_schema "resource" do
# real url
# The resource's real URL
field(:url, :string)
field(:format, :string)
field(:last_import, :utc_datetime_usec)
field(:title, :string)
field(:last_update, :utc_datetime_usec)
# stable data.gouv.fr url if exists, else (for ODS gtfs as csv) it's the real url
# data.gouv.fr's stable URL:
# - for resources hosted on data.gouv.fr it redirects to static.data.gouv.fr
# - for others, it points to the final URL
field(:latest_url, :string)
field(:is_available, :boolean, default: true)

Expand Down
22 changes: 10 additions & 12 deletions apps/transport/lib/jobs/resource_unavailable_job.ex
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,16 @@ defmodule Transport.Jobs.ResourceUnavailableJob do

Resource
|> Repo.get!(resource_id)
|> update_url()
|> maybe_update_url()
|> historize_resource()
|> check_availability()
|> update_availability()
end

defp check_availability({:updated, 200 = _status_code, %Resource{} = resource}) do
defp check_availability({:updated, %Resource{} = resource}) do
{true, resource}
end

defp check_availability({:updated, status_code, %Resource{url: url} = resource})
when status_code != 200 do
perform_check(resource, url)
end

defp check_availability({:no_op, %Resource{} = resource}) do
perform_check(resource, Resource.download_url(resource))
end
Expand All @@ -97,22 +92,25 @@ defmodule Transport.Jobs.ResourceUnavailableJob do

# GOTCHA: `filetype` is set to `"file"` for exports coming from ODS
# https://github.com/opendatateam/udata-ods/issues/250
defp update_url(%Resource{filetype: "file", url: url, latest_url: latest_url} = resource) do
# We "leverage" this bug because we need to resolve the final URL for
# some ODS resources referenced as external links
# https://github.com/etalab/transport-site/issues/3470
defp maybe_update_url(%Resource{filetype: "file", url: url, latest_url: latest_url} = resource) do
case follow(latest_url) do
{:ok, status_code, final_url} when final_url != url ->
{:ok, 200 = _status_code, final_url} when final_url != url ->
resource = resource |> Ecto.Changeset.change(%{url: final_url}) |> Repo.update!()
{:updated, status_code, resource}
{:updated, resource}

_ ->
{:no_op, resource}
end
end

defp update_url(%Resource{} = resource), do: {:no_op, resource}
defp maybe_update_url(%Resource{} = resource), do: {:no_op, resource}

defp historize_resource({:no_op, %Resource{}} = payload), do: payload

defp historize_resource({:updated, _status_code, %Resource{id: resource_id}} = payload) do
defp historize_resource({:updated, %Resource{id: resource_id}} = payload) do
%{resource_id: resource_id}
|> Transport.Jobs.ResourceHistoryJob.historize_and_validate_job(history_options: [unique: nil])
|> Oban.insert!()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
assert 1 == count_resource_unavailabilities()
%ResourceUnavailability{resource_id: ^resource_id, start: start, end: nil} = ResourceUnavailability |> Repo.one!()
assert DateTime.diff(DateTime.utc_now(), start) <= 1
assert %{is_available: false} = Repo.reload(resource)
assert %DB.Resource{is_available: false} = Repo.reload(resource)
end

test "when there is an ongoing unavailability and the resource stays unavailable" do
Expand All @@ -74,7 +74,7 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
%ResourceUnavailability{resource_id: ^resource_id, start: ^start, end: nil} =
ResourceUnavailability |> Repo.one!()

assert %{is_available: false} = Repo.reload(resource)
assert %DB.Resource{is_available: false} = Repo.reload(resource)
end

test "when there is an ongoing unavailability and the resource is now available" do
Expand All @@ -92,7 +92,7 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
ResourceUnavailability |> Repo.one!()

assert DateTime.diff(DateTime.utc_now(), date_end) <= 1
assert %{is_available: true} = Repo.reload(resource)
assert %DB.Resource{is_available: true} = Repo.reload(resource)
end

test "when there are no unavailabilities and the resource is available" do
Expand All @@ -103,7 +103,7 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
assert :ok == perform_job(ResourceUnavailableJob, %{"resource_id" => resource.id})

assert 0 == count_resource_unavailabilities()
assert %{is_available: true} = Repo.reload(resource)
assert %DB.Resource{is_available: true} = Repo.reload(resource)
end

test "uses latest_url when relevant" do
Expand All @@ -122,7 +122,7 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
assert :ok == perform_job(ResourceUnavailableJob, %{"resource_id" => resource.id})

assert 0 == count_resource_unavailabilities()
assert %{is_available: true} = Repo.reload(resource)
assert %DB.Resource{is_available: true} = Repo.reload(resource)
end

test "updates the resource's url when needed" do
Expand All @@ -145,15 +145,15 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do

Transport.HTTPoison.Mock
|> expect(:get, fn ^new_url ->
{:ok, %HTTPoison.Response{status_code: 500}}
{:ok, %HTTPoison.Response{status_code: 200}}
end)

Transport.AvailabilityChecker.Mock |> expect(:available?, fn _format, ^new_url -> false end)
# `Transport.AvailabilityChecker.Mock` is not called

assert :ok == perform_job(ResourceUnavailableJob, %{"resource_id" => resource.id})

assert 1 == count_resource_unavailabilities()
assert %{is_available: false, url: ^new_url} = Repo.reload(resource)
assert 0 == count_resource_unavailabilities()
assert %DB.Resource{is_available: true, url: ^new_url} = Repo.reload(resource)

assert [
%{
Expand Down Expand Up @@ -209,6 +209,30 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
] = all_enqueued(worker: Transport.Jobs.Workflow)
end

test "does not update resource.url if the status code is 404" do
%DB.Resource{} =
resource =
insert(:resource,
url: url = "https://static.data.gouv.fr/gtfs.zip",
latest_url: latest_url = "https://www.data.gouv.fr/latest_url",
is_available: true,
datagouv_id: "foo",
filetype: "file"
)

expect(Transport.HTTPoison.Mock, :get, fn ^latest_url ->
{:ok, %HTTPoison.Response{status_code: 404}}
end)

expect(Transport.AvailabilityChecker.Mock, :available?, fn _format, ^latest_url -> false end)

assert :ok == perform_job(ResourceUnavailableJob, %{"resource_id" => resource.id})

assert 1 == count_resource_unavailabilities()
assert %DB.Resource{is_available: false, url: ^url} = Repo.reload(resource)
assert [] == all_enqueued()
end

test "does not perform a real availability check if the resource is bypassed" do
url = "https://example.com/stop-monitoring"

Expand Down Expand Up @@ -253,7 +277,7 @@ defmodule Transport.Test.Transport.Jobs.ResourceUnavailableJobTest do
assert :ok == perform_job(ResourceUnavailableJob, %{"resource_id" => resource.id})

assert 0 == count_resource_unavailabilities()
assert %{is_available: true} = Repo.reload(resource)
assert %DB.Resource{is_available: true} = Repo.reload(resource)
end
end

Expand Down
8 changes: 5 additions & 3 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,11 @@ end
base_oban_conf = [repo: DB.Repo]

# Oban jobs that should run in every deployed environment (staging, prod)
# but not during dev or test
# Be careful : there is "app_env :prod" in contrast to :staging (ie production website vs prochainement)
# and "config_env :prod" in contrast to :dev et :test
# but not during dev or test.
#
# - There is "app_env :prod" in contrast to :staging (ie production website vs prochainement)
# and "config_env :prod" in contrast to :dev et :test
# - ⚠️ There is another legacy crontab in `Transport.Scheduler`, see `scheduler.ex`
oban_prod_crontab = [
{"0 */6 * * *", Transport.Jobs.ResourceHistoryAndValidationDispatcherJob},
{"30 */6 * * *", Transport.Jobs.GTFSToGeoJSONConverterJob},
Expand Down

0 comments on commit affbcb0

Please sign in to comment.