Skip to content

Commit

Permalink
Merge branch 'search-spike' of github.com:etalab/transport-site into …
Browse files Browse the repository at this point in the history
…search-spike
  • Loading branch information
thbar committed Jan 10, 2024
2 parents d75221b + 0548c69 commit 47411fc
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 322 deletions.
24 changes: 4 additions & 20 deletions apps/transport/lib/db/dataset.ex
Original file line number Diff line number Diff line change
Expand Up @@ -977,41 +977,25 @@ defmodule DB.Dataset do

defp cast_nation_dataset(changeset, _), do: changeset

@spec get_commune_by_insee(binary()) :: Commune.t() | nil
defp get_commune_by_insee(insee) do
Commune
|> Repo.get_by(insee: insee)
|> case do
nil ->
Logger.warning("Unable to find zone with INSEE #{insee}")
nil

commune ->
commune
end
end

@spec cast_datagouv_zone(Ecto.Changeset.t(), map(), binary()) :: Ecto.Changeset.t()
defp cast_datagouv_zone(changeset, _, nil) do
changeset
|> change
|> put_assoc(:communes, [])
end

defp cast_datagouv_zone(changeset, _, "") do
changeset
|> change
|> put_assoc(:communes, [])
end

# We’ll only cast datagouv zone if there is something written in the associated territory name in the backoffice
defp cast_datagouv_zone(changeset, %{"zones" => zones_insee}, _associated_territory_name) do
communes =
zones_insee
|> Enum.map(&get_commune_by_insee/1)
|> Enum.filter(fn z -> not is_nil(z) end)
Commune
|> where([c], c.insee in ^zones_insee)
|> Repo.all()

changeset
|> change
|> put_assoc(:communes, communes)
end

Expand Down
66 changes: 23 additions & 43 deletions apps/transport/lib/jobs/resource_unavailable_job.ex
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,15 @@ end

defmodule Transport.Jobs.ResourceUnavailableJob do
@moduledoc """
Job checking if a resource is available over HTTP or not and
storing unavailabilities in that case.
It also updates the relevant resource and keeps up to the following fields:
- is_available (if the availability of the resource changes)
- url (if lastest_url points to a new URL)
This job:
- Updates the URL if needed by following the latest_url for files stored on data.gouv.fr
- For all resources, check whether a resource is available over HTTP. If not: it creates an unavailability in the database
- Updates is_available if the availability of the resource has changed
"""
use Oban.Worker, unique: [period: {9, :minutes}], max_attempts: 5
require Logger
alias DB.{Repo, Resource, ResourceUnavailability}

# Set this env variable to a list of `resource.id`s (comma separated) to bypass
# `AvailabilityChecker.available?`. This is *not* something that should be used
# for too long or for too many resources.
# Example values: `42,1337`
# https://github.com/etalab/transport-site/issues/3470
@bypass_ids_env_name "BYPASS_RESOURCE_AVAILABILITY_RESOURCE_IDS"

@impl Oban.Worker
def perform(%Oban.Job{args: %{"resource_id" => resource_id}}) do
Logger.info("Running ResourceUnavailableJob for #{resource_id}")
Expand All @@ -68,33 +59,11 @@ defmodule Transport.Jobs.ResourceUnavailableJob do
|> maybe_update_url()
|> historize_resource()
|> check_availability()
|> update_availability()
end

defp check_availability({:updated, %Resource{} = resource}) do
{true, resource}
end

defp check_availability({:no_op, %Resource{} = resource}) do
perform_check(resource, Resource.download_url(resource))
end

defp perform_check(%Resource{id: resource_id, format: format} = resource, check_url) do
bypass_resource_ids = @bypass_ids_env_name |> System.get_env("") |> String.split(",")

if to_string(resource_id) in bypass_resource_ids do
Logger.info("is_available=true for resource##{resource_id} because the check is bypassed")
{true, resource}
else
{Transport.AvailabilityChecker.Wrapper.available?(format, check_url), resource}
end
|> update_resource()
|> create_or_update_resource_unavailability()
end

# GOTCHA: `filetype` is set to `"file"` for exports coming from ODS
# https://github.com/opendatateam/udata-ods/issues/250
# We "leverage" this bug because we need to resolve the final URL for
# some ODS resources referenced as external links
# https://github.com/etalab/transport-site/issues/3470
# We only update url for filetype : "file" = hosted on data.gouv.fr
defp maybe_update_url(%Resource{filetype: "file", url: url, latest_url: latest_url} = resource) do
case follow(latest_url) do
{:ok, 200 = _status_code, final_url} when final_url != url ->
Expand All @@ -118,12 +87,23 @@ defmodule Transport.Jobs.ResourceUnavailableJob do
payload
end

defp update_availability({is_available, %Resource{} = resource}) do
resource |> Resource.changeset(%{is_available: is_available}) |> DB.Repo.update!()
create_resource_unavailability(is_available, resource)
# We’ve just updated the URL by following it until we got a 200, so it’s available
defp check_availability({:updated, %Resource{} = resource}) do
{true, resource}
end

defp check_availability({:no_op, %Resource{format: format} = resource}) do
download_url = Resource.download_url(resource)
is_available = Transport.AvailabilityChecker.Wrapper.available?(format, download_url)
{is_available, resource}
end

defp update_resource({is_available, %Resource{} = resource}) do
resource = resource |> Resource.changeset(%{is_available: is_available}) |> DB.Repo.update!()
{is_available, resource}
end

def create_resource_unavailability(false = _is_available, %Resource{} = resource) do
def create_or_update_resource_unavailability({false = _is_available, %Resource{} = resource}) do
case ResourceUnavailability.ongoing_unavailability(resource) do
nil ->
%ResourceUnavailability{resource: resource, start: now()}
Expand All @@ -136,7 +116,7 @@ defmodule Transport.Jobs.ResourceUnavailableJob do
end
end

def create_resource_unavailability(true = _is_available, %Resource{} = resource) do
def create_or_update_resource_unavailability({true = _is_available, %Resource{} = resource}) do
case ResourceUnavailability.ongoing_unavailability(resource) do
%ResourceUnavailability{} = resource_unavailability ->
resource_unavailability
Expand Down
12 changes: 3 additions & 9 deletions apps/transport/lib/transport/import_data.ex
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ defmodule Transport.ImportData do
def is_ods_resource?(%{"harvest" => %{"uri" => uri}}) do
# Possible URL:
# https://data.angers.fr/api/explore/v2.1/catalog/datasets/angers-loire-metropole-horaires-reseau-irigo-gtfs-rt/exports/json
String.match?(uri, ~r{/api/explore/v(\d+\.\d+)/catalog/datasets/.*/exports/(json|csv)$}i)
String.match?(uri, ~r{/api/explore/v\d+\.\d+/catalog/datasets/.*/exports/\w+$}i)
end

def is_ods_resource?(_), do: false
Expand Down Expand Up @@ -772,9 +772,6 @@ defmodule Transport.ImportData do
...> |> ImportData.formated_format("bike-scooter-sharing", false)
"json"
iex> formated_format(%{"title" => "Export au format GeoJSON", "format" => "json"}, "low-emission-zones", false)
"geojson"
iex> formated_format(%{"url" => "https://data.strasbourg.eu/api/datasets/1.0/zfe_voie_exception/alternative_exports/zfe_voie_speciale_eurometropole_strasbourg_geojson", "format" => "a"}, "low-emission-zones", false)
"geojson"
Expand Down Expand Up @@ -818,11 +815,8 @@ defmodule Transport.ImportData do
end
end

# Classify GeoJSONs from ODS as geojson instead of json
# See https://github.com/opendatateam/udata-ods/issues/211
defp is_geojson?(%{"title" => "Export au format GeoJSON"}, _), do: true
defp is_geojson?(%{"url" => url}, format), do: is_format?(format, ["geojson"]) or String.ends_with?(url, "geojson")
defp is_geojson?(_, format), do: is_format?(format, ["geojson"])
defp is_geojson?(%{"url" => url}, format), do: is_format?(format, "geojson") or String.ends_with?(url, "geojson")
defp is_geojson?(_, format), do: is_format?(format, "geojson")

defp is_gbfs?(%{"url" => url}) do
if String.contains?(url, "gbfs") do
Expand Down
3 changes: 1 addition & 2 deletions apps/transport/lib/transport_web.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ defmodule TransportWeb do
below. Instead, define any helper function in modules
and import those modules here.
"""
def static_paths,
do: ~w(js css fonts images data favicon.ico documents BingSiteAuth.xml google5be4b09db1274976.html demo_rt.html)
def static_paths, do: ~w(css documents images js favicon.ico)

def controller do
quote do
Expand Down
7 changes: 1 addition & 6 deletions apps/transport/lib/transport_web/views/resource_view.ex
Original file line number Diff line number Diff line change
Expand Up @@ -267,15 +267,10 @@ defmodule TransportWeb.ResourceView do
false
iex> should_display_description?(%DB.Resource{description: "Bonjour", title: "Foo"})
true
iex> should_display_description?(%DB.Resource{description: "Bonjour", title: "Export au format CSV"})
false
"""
def should_display_description?(%DB.Resource{description: nil}), do: false
def should_display_description?(%DB.Resource{title: nil}), do: false

def should_display_description?(%DB.Resource{title: title}) do
not String.starts_with?(title, "Export au format")
end
def should_display_description?(%DB.Resource{}), do: true

def networks_start_end_dates(assigns) do
end_date_class = fn end_date ->
Expand Down
4 changes: 0 additions & 4 deletions apps/transport/priv/static/BingSiteAuth.xml

This file was deleted.

Loading

0 comments on commit 47411fc

Please sign in to comment.