diff --git a/apps/transport/lib/db/dataset.ex b/apps/transport/lib/db/dataset.ex
index 6a8026df50..53c55ef479 100644
--- a/apps/transport/lib/db/dataset.ex
+++ b/apps/transport/lib/db/dataset.ex
@@ -84,6 +84,8 @@ defmodule DB.Dataset do
# we ask in the backoffice for a name to display
# (used in the long title of a dataset and to find the associated datasets)
field(:associated_territory_name, :string)
+
+ field(:search_payload, :map)
end
def base_query, do: from(d in DB.Dataset, as: :dataset, where: d.is_active)
diff --git a/apps/transport/priv/repo/migrations/20231231135108_add_dataset_search_payload.exs b/apps/transport/priv/repo/migrations/20231231135108_add_dataset_search_payload.exs
new file mode 100644
index 0000000000..92486e43a0
--- /dev/null
+++ b/apps/transport/priv/repo/migrations/20231231135108_add_dataset_search_payload.exs
@@ -0,0 +1,9 @@
+defmodule DB.Repo.Migrations.AddDatasetSearchPayload do
+ use Ecto.Migration
+
+ def change do
+ alter table(:dataset) do
+ add(:search_payload, :map, default: %{})
+ end
+ end
+end
diff --git a/scripts/search_engine.exs b/scripts/search_engine.exs
new file mode 100755
index 0000000000..fb117b07d8
--- /dev/null
+++ b/scripts/search_engine.exs
@@ -0,0 +1,292 @@
+#! elixir
+
+my_app_root = Path.join(__DIR__, "..")
+
+Application.put_env(:search, Search.Endpoint,
+ http: [ip: {127, 0, 0, 1}, port: 5001],
+ server: true,
+ live_view: [signing_salt: "aaaaaaaa"],
+ secret_key_base: String.duplicate("a", 64)
+)
+
+Mix.install(
+ [
+ {:my_app, path: my_app_root, env: :dev},
+ {:io_ansi_table, "~> 1.0"}
+ ],
+ config_path: Path.join(my_app_root, "config/config.exs"),
+ lockfile: Path.join(my_app_root, "mix.lock")
+)
+
+# NOTE: wondering if Flop (https://github.com/woylie/flop) could be a better fit than Scrivener (which is hardly maintained)
+# It provides cursor-based pagination as well as regular limit/offset stuff.
+
+defmodule Search.ErrorView do
+ def render(template, _), do: Phoenix.Controller.status_message_from_template(template)
+end
+
+defmodule Search.HomeLive do
+ use Phoenix.LiveView, layout: {__MODULE__, :live}
+
+ use Phoenix.HTML, only: [text_input: 2]
+
+ def mount(_params, _session, socket) do
+ {:ok,
+ socket
+ |> assign(:title, "")
+ |> assign(:format, "")
+ |> assign(:mode, "")
+ |> update_datasets(%{"config" => %{}})}
+ end
+
+ defp phx_vsn, do: Application.spec(:phoenix, :vsn)
+ defp lv_vsn, do: Application.spec(:phoenix_live_view, :vsn)
+
+ def render("live.html", assigns) do
+ ~H"""
+
+
+
+
+
+
+ <%= @inner_content %>
+ """
+ end
+
+ def render(assigns) do
+ ~H"""
+
+ <.form :let={f} id="search" for={%{}} as={:config} phx-change="change_form" phx-submit="ignore">
+
+ <%= text_input(f, :title,
+ value: @title,
+ placeholder: "Title",
+ autocorrect: "off"
+ ) %>
+ <%= text_input(f, :format,
+ value: @format,
+ placeholder: "Resource Format",
+ autocorrect: "off"
+ ) %>
+ <%= text_input(f, :mode,
+ value: @mode,
+ placeholder: "Resource Mode",
+ autocorrect: "off"
+ ) %>
+
+
+
+
+ <%= @datasets |> length %> datasets found
+
+
+
+ <%= for dataset <- @datasets do %>
+
+ <%= dataset.id %> |
+ <%= dataset.title %> |
+ <%= dataset.formats %> |
+ <%= dataset.modes %> |
+
+ <% end %>
+
+
+
+ """
+ end
+
+ import Ecto.Query
+
+ def nil_if_blank(value) do
+ value = (value || "") |> String.trim()
+ if value == "", do: nil, else: value
+ end
+
+ def update_datasets(socket, params) do
+ # NOTE: could be improved here to tap directly in the assigns
+ datasets =
+ Searcher.search(
+ title: nil_if_blank(get_in(params, ["config", "title"])),
+ format: nil_if_blank(get_in(params, ["config", "format"])),
+ mode: nil_if_blank(get_in(params, ["config", "mode"]))
+ )
+ |> Enum.map(&Searcher.render(&1))
+
+ assign(socket, :datasets, datasets)
+ end
+
+ def handle_event("change_form", params, socket) do
+ {:noreply, update_datasets(socket, params)}
+ end
+end
+
+defmodule Search.Router do
+ use Phoenix.Router
+ import Phoenix.LiveView.Router
+
+ pipeline :browser do
+ plug(:accepts, ["html"])
+ end
+
+ scope "/", Search do
+ pipe_through(:browser)
+
+ live("/", HomeLive, :index)
+ end
+end
+
+defmodule Search.Endpoint do
+ use Phoenix.Endpoint, otp_app: :search
+ socket("/live", Phoenix.LiveView.Socket)
+ plug(Search.Router)
+end
+
+defmodule SearchIndexer do
+ import Ecto.Query
+
+ def fetch_items do
+ from(d in DB.Dataset,
+ preload: :resources
+ # limit: 10
+ )
+ |> DB.Repo.all()
+ end
+
+ def find_resource_history(resource_id) do
+ from(rh in DB.ResourceHistory)
+ |> where([rh], rh.resource_id == ^resource_id)
+ |> order_by([rh], {:desc, rh.inserted_at})
+ |> limit(1)
+ |> DB.Repo.one()
+ end
+
+ def find_resource_metadata(resource_history_id) do
+ from(rm in DB.ResourceMetadata)
+ |> where([rm], rm.resource_history_id == ^resource_history_id)
+ |> order_by([rh], {:desc, rh.inserted_at})
+ |> limit(1)
+ |> DB.Repo.one()
+ end
+
+ def compute_payload(%DB.Dataset{} = dataset) do
+ # NOTE: not optimized for N+1 because performance is good enough for now
+
+ modes =
+ dataset.resources
+ |> Enum.map(fn r ->
+ rh_id = if x = find_resource_history(r.id), do: x.id, else: nil
+
+ if rh_id != nil do
+ rm = find_resource_metadata(rh_id)
+
+ if rm != nil do
+ rm.metadata["modes"]
+ else
+ nil
+ end
+ else
+ nil
+ end
+ end)
+ |> List.flatten()
+ |> Enum.reject(&(&1 == nil))
+
+ %{
+ id: dataset.id,
+ datagouv_id: dataset.datagouv_id,
+ title: dataset.custom_title,
+ description: dataset.description,
+ formats: dataset.resources |> Enum.map(& &1.format),
+ modes: modes
+ }
+ end
+
+ def reindex! do
+ # NOTE: much, much too slow for my taste, should be bulked and/or parallelized
+ fetch_items()
+ |> Enum.map(&{&1, compute_payload(&1)})
+ |> Enum.each(fn {%DB.Dataset{} = d, %{} = payload} ->
+ Ecto.Changeset.change(d, %{search_payload: payload})
+ |> DB.Repo.update!()
+ end)
+ end
+end
+
+defmodule Searcher do
+ import Ecto.Query
+
+ def maybe_search_title(query, nil), do: query
+
+ def maybe_search_title(query, search_title) do
+ safe_like_title = "%" <> DB.Contact.safe_like_pattern(search_title) <> "%"
+
+ query
+ |> where([d], fragment("search_payload->>'title' ilike ?", ^safe_like_title))
+ end
+
+ def maybe_search_resources_formats(query, nil), do: query
+
+ def maybe_search_resources_formats(query, search_format) do
+ query
+ |> where([d], fragment("search_payload #> Array['formats'] \\? ?", ^search_format))
+ end
+
+ # NOTE: could be DRYed with formats
+ def maybe_search_resources_modes(query, nil), do: query
+
+ def maybe_search_resources_modes(query, mode) do
+ query
+ |> where([d], fragment("search_payload #> Array['modes'] \\? ?", ^mode))
+ end
+
+ def search(options) do
+ from(d in DB.Dataset)
+ |> maybe_search_title(options[:title])
+ |> maybe_search_resources_formats(options[:format])
+ |> maybe_search_resources_modes(options[:mode])
+ |> select([d], [:id, :custom_title, :search_payload])
+ |> DB.Repo.all()
+ end
+
+ def render(%{} = item) do
+ %{
+ id: item.id,
+ title: item.custom_title,
+ formats: (item.search_payload["formats"] || []) |> Enum.join(", "),
+ modes: (item.search_payload["modes"] || []) |> Enum.join(", ")
+ }
+ end
+
+ def render(items) do
+ IO.ANSI.Table.start([:id, :title, :formats])
+ IO.ANSI.Table.format(items |> Enum.map(&render(&1)))
+ end
+end
+
+if System.get_env("REINDEX") == "1" do
+ SearchIndexer.reindex!()
+end
+
+# Uncomment for fancy ANSI-console rendering
+
+# Searcher.search(title: "bibus")
+# |> Searcher.render()
+
+# Searcher.search(format: "SIRI")
+# |> Searcher.render()
+
+if System.get_env("RUN_SERVER") == "1" do
+ {:ok, _} = Supervisor.start_link([Search.Endpoint], strategy: :one_for_one)
+ Process.sleep(:infinity)
+end
+
+IO.puts("Done")