Skip to content

Commit

Permalink
Notification nouveaux JDD : répare matching avec mots-clefs
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoineAugusti committed Jan 14, 2025
1 parent 36dc757 commit c7444ea
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 5 deletions.
38 changes: 33 additions & 5 deletions apps/transport/lib/jobs/new_datagouv_datasets_job.ex
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
"trottinette",
"vls",
"scooter",
"scooters",
"libre-service",
"libre service",
"scooter"
"libre service"
]),
formats: MapSet.new(["gbfs"])
},
Expand All @@ -51,7 +51,7 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
"etalab/schema-comptage-mobilites-measure",
"etalab/schema-comptage-mobilites-site"
],
tags: MapSet.new(["cyclable", "parking", "stationnement", "vélo"]),
tags: MapSet.new(["cyclable", "cyclables", "parking", "parkings", "stationnement", "vélo", "vélos"]),
formats: MapSet.new([])
},
%{
Expand Down Expand Up @@ -221,7 +221,17 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do

defp string_matches?(str, %{formats: formats, tags: tags} = _rule) when is_binary(str) do
searches = MapSet.union(formats, tags) |> MapSet.to_list() |> Enum.map(&normalize/1)
str |> normalize() |> String.contains?(searches)
{words_with_spaces, words_without_spaces} = Enum.split_with(searches, &String.contains?(&1, " "))

match_without_spaces =
not (str
|> normalize()
|> String.split(~r/\s+/)
|> MapSet.new()
|> MapSet.disjoint?(MapSet.new(words_without_spaces)))

match_with_spaces = str |> normalize() |> String.contains?(words_with_spaces)
match_without_spaces || match_with_spaces
end

defp tags_is_relevant?(%{"tags" => tags} = _dataset, rule) do
Expand Down Expand Up @@ -257,8 +267,26 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
"velo"
iex> normalize("Châteauroux")
"chateauroux"
iex> normalize("J'adore manger")
"j'adore manger"
"""
def normalize(value) do
value |> String.normalize(:nfd) |> String.replace(~r/[^A-z]/u, "") |> String.downcase()
value
|> String.downcase()
|> String.graphemes()
|> Enum.map_join("", &normalize_grapheme/1)
end

defp normalize_grapheme(grapheme) do
case String.normalize(grapheme, :nfd) do
<<first, rest::binary>> when is_binary(rest) ->
case String.valid?(<<first>>) do
true -> <<first>>
false -> ""
end

_ ->
grapheme
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ defmodule Transport.Test.Transport.Jobs.NewDatagouvDatasetsJobTest do
})

assert :no_match == relevant_fn.(%{base | "title" => "Résultat des élections"})
# does not match on the word `velo` in the middle of the tag
assert :no_match == relevant_fn.(%{base | "tags" => ["developpement-du-territoire"]})

assert %{category: "IRVE"} =
relevant_fn.(%{
Expand Down

0 comments on commit c7444ea

Please sign in to comment.