From 20adca92ba1f1f53d9fd640f02748b0eb75160bd Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Sat, 13 Apr 2024 22:04:15 -0300 Subject: [PATCH 1/3] Add `re_contains/2` and `re_replace/3` to match with a regex The regular expression must follow the `regex` crate rules, and should be a string. The idea of keeping them separate is because we cannot use the Elixir standard regex, and we need to use strings for both literal and regex versions. To make the difference explicit, we are using the `re_` prefix for functions that accept regexes as strings. This is related to https://github.com/elixir-explorer/explorer/issues/353 --- lib/explorer/backend/lazy_series.ex | 24 +++- lib/explorer/backend/series.ex | 4 + lib/explorer/polars_backend/expression.ex | 2 + lib/explorer/polars_backend/native.ex | 4 +- lib/explorer/polars_backend/series.ex | 14 ++- lib/explorer/series.ex | 131 ++++++++++++++++++++-- native/explorer/src/expressions.rs | 18 ++- native/explorer/src/lib.rs | 2 + native/explorer/src/series.rs | 19 +++- test/explorer/data_frame_test.exs | 20 +++- test/explorer/series_test.exs | 92 ++++++++++++++- 11 files changed, 296 insertions(+), 34 deletions(-) diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 0c246fb46..5e062618d 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -119,7 +119,9 @@ defmodule Explorer.Backend.LazySeries do row_index: 1, # Strings contains: 2, + re_contains: 2, replace: 3, + re_replace: 3, lstrip: 2, rstrip: 2, strip: 2, @@ -993,8 +995,15 @@ defmodule Explorer.Backend.LazySeries do end @impl true - def contains(series, pattern) do - data = new(:contains, [lazy_series!(series), pattern], :boolean) + def contains(series, substring) do + data = new(:contains, [lazy_series!(series), substring], :boolean) + + Backend.Series.new(data, :boolean) + end + + @impl true + def re_contains(series, pattern) do + data = new(:re_contains, [lazy_series!(series), pattern], :boolean) Backend.Series.new(data, :boolean) end @@ -1014,8 +1023,15 @@ defmodule Explorer.Backend.LazySeries do end @impl true - def replace(series, pattern, replacement) do - data = new(:replace, [lazy_series!(series), pattern, replacement], :string) + def replace(series, substring, replacement) do + data = new(:replace, [lazy_series!(series), substring, replacement], :string) + + Backend.Series.new(data, :string) + end + + @impl true + def re_replace(series, pattern, replacement) do + data = new(:re_replace, [lazy_series!(series), pattern, replacement], :string) Backend.Series.new(data, :string) end diff --git a/lib/explorer/backend/series.ex b/lib/explorer/backend/series.ex index 459e9f631..5d96a9aaf 100644 --- a/lib/explorer/backend/series.ex +++ b/lib/explorer/backend/series.ex @@ -291,6 +291,10 @@ defmodule Explorer.Backend.Series do @callback json_decode(s, dtype()) :: s @callback json_path_match(s, String.t()) :: s + ## String - Regular expression versions + @callback re_contains(s, String.t()) :: s + @callback re_replace(s, String.t(), String.t()) :: s + # Date / DateTime @callback day_of_week(s) :: s diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index 68b42ff62..85efcb5a8 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -128,7 +128,9 @@ defmodule Explorer.PolarsBackend.Expression do # Strings contains: 2, + re_contains: 2, replace: 3, + re_replace: 3, strip: 2, lstrip: 2, rstrip: 2, diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 016402274..d77e1ca10 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -284,7 +284,7 @@ defmodule Explorer.PolarsBackend.Native do def s_categorise(_s, _s_categories), do: err() def s_coalesce(_s, _other), do: err() def s_concat(_series_list), do: err() - def s_contains(_s, _pattern), do: err() + def s_contains(_s, _pattern, _is_literal), do: err() def s_cumulative_max(_s, _reverse), do: err() def s_cumulative_min(_s, _reverse), do: err() def s_cumulative_sum(_s, _reverse), do: err() @@ -383,7 +383,7 @@ defmodule Explorer.PolarsBackend.Native do def s_ceil(_s), do: err() def s_rstrip(_s, _string), do: err() def s_rank(_s, _method, _descending, _seed), do: err() - def s_replace(_s, _pattern, _replacement), do: err() + def s_replace(_s, _pattern, _replacement, _literal), do: err() def s_sample_n(_s, _n, _replace, _shuffle, _seed), do: err() def s_sample_frac(_s, _frac, _replace, _shuffle, _seed), do: err() def s_series_equal(_s, _other, _null_equal), do: err() diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index d3bccf2fe..849d425d1 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -659,8 +659,12 @@ defmodule Explorer.PolarsBackend.Series do # Strings @impl true - def contains(series, pattern), - do: Shared.apply_series(series, :s_contains, [pattern]) + def contains(series, substring), + do: Shared.apply_series(series, :s_contains, [substring, true]) + + @impl true + def re_contains(series, pattern), + do: Shared.apply_series(series, :s_contains, [pattern, false]) @impl true def upcase(series), @@ -672,7 +676,11 @@ defmodule Explorer.PolarsBackend.Series do @impl true def replace(series, pattern, replacement), - do: Shared.apply_series(series, :s_replace, [pattern, replacement]) + do: Shared.apply_series(series, :s_replace, [pattern, replacement, true]) + + @impl true + def re_replace(series, pattern, replacement), + do: Shared.apply_series(series, :s_replace, [pattern, replacement, false]) @impl true def strip(series, str), diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 249a02968..246f090e1 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -5344,6 +5344,10 @@ defmodule Explorer.Series do @doc """ Detects whether a string contains a substring. + > ### Notice {: .warning} + > + > This function detects only literal strings. For regular expressions, see `re_contains/2`. + ## Examples iex> s = Explorer.Series.from_list(["abc", "def", "bcd"]) @@ -5355,12 +5359,46 @@ defmodule Explorer.Series do """ @doc type: :string_wise @spec contains(Series.t(), String.t()) :: Series.t() - def contains(%Series{dtype: :string} = series, pattern) - when K.is_binary(pattern), - do: apply_series(series, :contains, [pattern]) + def contains(%Series{dtype: :string} = series, substring) + when K.is_binary(substring), + do: apply_series(series, :contains, [substring]) def contains(%Series{dtype: dtype}, _), do: dtype_error("contains/2", dtype, [:string]) + @doc """ + Detects whether a string matches a pattern. + + > ### Notice {: .warning} + > + > This function matches against a regular expression. It does not expect an Elixir regex, but + > a escaped string - you can use the `~S` sigil for escaping - that follows the [`regex`](https://docs.rs/regex/latest/regex/) + > Rust crate rules. This is because our backend, Polars, expects that format. + > + > To match literal strings, you can use `contains/2`. + + ## Examples + + iex> s = Explorer.Series.from_list(["abc", "def", "bcd"]) + iex> Explorer.Series.re_contains(s, ~S/(a|e)/) + #Explorer.Series< + Polars[3] + boolean [true, true, false] + > + """ + @doc type: :string_wise + @spec re_contains(Series.t(), String.t()) :: Series.t() + def re_contains(%Series{dtype: :string} = series, pattern) + when K.is_binary(pattern), + do: apply_series(series, :re_contains, [pattern]) + + def re_contains(%Series{dtype: :string}, %Regex{}) do + raise ArgumentError, + "standard regexes cannot be used as pattern because it may be incompatible with the backend. " <> + "Please use the `~S` sigil or extract the source from the regex with `Regex.source/1`" + end + + def re_contains(%Series{dtype: dtype}, _), do: dtype_error("re_contains/2", dtype, [:string]) + @doc """ Converts all characters to uppercase. @@ -5400,9 +5438,13 @@ defmodule Explorer.Series do def downcase(%Series{dtype: dtype}), do: dtype_error("downcase/1", dtype, [:string]) @doc """ - Replaces all occurences of pattern with replacement in string series. + Replaces all occurences of a substring with replacement in string series. - Both pattern and replacement must be of type string. + Both substring and replacement must be of type string. + + > ### Notice {: .warning} + > + > This function replaces only literal strings. For regular expressions, see `re_replace/3`. ## Examples @@ -5415,15 +5457,86 @@ defmodule Explorer.Series do """ @doc type: :string_wise @spec replace(Series.t(), binary(), binary()) :: Series.t() - def replace(%Series{dtype: :string} = series, pattern, replacement) - when K.and(is_binary(pattern), is_binary(replacement)), - do: apply_series(series, :replace, [pattern, replacement]) + def replace(%Series{dtype: :string} = series, substring, replacement) + when K.and(is_binary(substring), is_binary(replacement)), + do: apply_series(series, :replace, [substring, replacement]) def replace(%Series{dtype: :string}, _, _), - do: raise(ArgumentError, "pattern and replacement in replace/3 need to be a string") + do: raise(ArgumentError, "substring and replacement in replace/3 need to be a string") def replace(%Series{dtype: dtype}, _, _), do: dtype_error("replace/3", dtype, [:string]) + @doc """ + Replaces all occurences of a pattern with replacement in string series. + + Both pattern and replacement must be of type string. The replacement + can refer to groups captures by using the `${x}`, where `x` is a number starting from 1. + It can also refer to named groups using the same syntax. + + > ### Notice {: .warning} + > + > This function matches against a regular expression. It does not expect an Elixir regex, but + > a escaped string - you can use the `~S` sigil for escaping - that follows the [`regex`](https://docs.rs/regex/latest/regex/) + > Rust crate rules. This is because our backend, Polars, expects that format. + > + > To replace by literal strings, you can use `replace/3`. + + ## Examples + + iex> series = Explorer.Series.from_list(["1.200,45", "1.234.567,30", "asdf", nil]) + iex> Explorer.Series.re_replace(series, ~S/[,.]/, "") + #Explorer.Series< + Polars[4] + string ["120045", "123456730", "asdf", nil] + > + + iex> series = Explorer.Series.from_list(["hat", "hut"]) + iex> Explorer.Series.re_replace(series, ~S/h(.)t/, "b${1}d") + #Explorer.Series< + Polars[2] + string ["bad", "bud"] + > + + iex> series = Explorer.Series.from_list(["hat", "hut"]) + iex> Explorer.Series.re_replace(series, ~S/h(?.)t/, "b${vowel}d") + #Explorer.Series< + Polars[2] + string ["bad", "bud"] + > + + Apply case-insensitive string replacement using the `(?i)` flag - remember, from the `regex` Rust crate. + + iex> series = Explorer.Series.from_list(["Foggy", "Rainy", "Sunny"]) + iex> Explorer.Series.re_replace(series, ~S/(?i)foggy|rainy/, "Sunny") + #Explorer.Series< + Polars[3] + string ["Sunny", "Sunny", "Sunny"] + > + + With an Elixir regex it causes an error: + + iex> series = Explorer.Series.from_list(["hat", "hut"]) + iex> Explorer.Series.re_replace(series, ~r/h(.)t/, "b${1}d") + ** (ArgumentError) standard regexes cannot be used as pattern because it may be incompatible with the backend. Please use the `~S` sigil or extract the source from the regex with `Regex.source/1` + + """ + @doc type: :string_wise + @spec re_replace(Series.t(), binary(), binary()) :: Series.t() + def re_replace(%Series{dtype: :string} = series, pattern, replacement) + when K.and(is_binary(pattern), is_binary(replacement)), + do: apply_series(series, :re_replace, [pattern, replacement]) + + def re_replace(%Series{dtype: :string}, %Regex{}, _) do + raise ArgumentError, + "standard regexes cannot be used as pattern because it may be incompatible with the backend. " <> + "Please use the `~S` sigil or extract the source from the regex with `Regex.source/1`" + end + + def re_replace(%Series{dtype: :string}, _, _), + do: raise(ArgumentError, "pattern and replacement in re_replace/3 need to be a string") + + def re_replace(%Series{dtype: dtype}, _, _), do: dtype_error("re_replace/3", dtype, [:string]) + @doc """ Returns a string series where all leading and trailing Unicode whitespaces have been removed. diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index b9eb6b1d0..700ef8bc9 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -835,6 +835,12 @@ pub fn expr_contains(expr: ExExpr, pattern: &str) -> ExExpr { ExExpr::new(expr.str().contains_literal(pattern.lit())) } +#[rustler::nif] +pub fn expr_re_contains(expr: ExExpr, pattern: &str) -> ExExpr { + let expr = expr.clone_inner(); + ExExpr::new(expr.str().contains(pattern.lit(), true)) +} + #[rustler::nif] pub fn expr_upcase(expr: ExExpr) -> ExExpr { let expr = expr.clone_inner(); @@ -896,11 +902,13 @@ pub fn expr_split(expr: ExExpr, substring: String) -> ExExpr { #[rustler::nif] pub fn expr_replace(expr: ExExpr, pat: String, value: String) -> ExExpr { let expr = expr.clone_inner(); - ExExpr::new(expr.str().replace_all( - Expr::Literal(LiteralValue::String(pat)), - Expr::Literal(LiteralValue::String(value)), - true, - )) + ExExpr::new(expr.str().replace_all(pat.lit(), value.lit(), true)) +} + +#[rustler::nif] +pub fn expr_re_replace(expr: ExExpr, pat: String, value: String) -> ExExpr { + let expr = expr.clone_inner(); + ExExpr::new(expr.str().replace_all(pat.lit(), value.lit(), false)) } #[rustler::nif] diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index b0bee4796..0b99abe95 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -252,6 +252,7 @@ rustler::init!( expr_describe_filter_plan, // string expressions expr_contains, + expr_re_contains, expr_upcase, expr_downcase, expr_strip, @@ -260,6 +261,7 @@ rustler::init!( expr_substring, expr_split, expr_replace, + expr_re_replace, expr_json_path_match, expr_split_into, // float round expressions diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index 4ec5641d0..f144d30de 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -1468,8 +1468,13 @@ pub fn s_not(s1: ExSeries) -> Result { } #[rustler::nif(schedule = "DirtyCpu")] -pub fn s_contains(s1: ExSeries, pattern: &str) -> Result { - Ok(ExSeries::new(s1.str()?.contains_literal(pattern)?.into())) +pub fn s_contains(s1: ExSeries, pattern: &str, literal: bool) -> Result { + let chunked_array = if literal { + s1.str()?.contains_literal(pattern)? + } else { + s1.str()?.contains(pattern, true)? + }; + Ok(ExSeries::new(chunked_array.into())) } #[rustler::nif(schedule = "DirtyCpu")] @@ -1487,10 +1492,14 @@ pub fn s_replace( s1: ExSeries, pattern: &str, replacement: &str, + literal: bool, ) -> Result { - Ok(ExSeries::new( - s1.str()?.replace_literal_all(pattern, replacement)?.into(), - )) + let chunked_array = if literal { + s1.str()?.replace_literal_all(pattern, replacement)? + } else { + s1.str()?.replace_all(pattern, replacement)? + }; + Ok(ExSeries::new(chunked_array.into())) } #[rustler::nif(schedule = "DirtyCpu")] diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 3851e1639..59c4dba73 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -622,6 +622,20 @@ defmodule Explorer.DataFrameTest do assert DF.to_columns(df1, atom_keys: true) == %{a: [1, 2, 3], b: [9, 8, 7]} end + + test "filter using contains/2" do + df = DF.new(a: [1, 2, 3, nil], b: ["abc", "bcd", "def", nil]) + + df1 = DF.filter(df, contains(b, "b")) + assert DF.to_columns(df1, atom_keys: true) == %{a: [1, 2], b: ["abc", "bcd"]} + end + + test "filter using re_contains/2" do + df = DF.new(a: [1, 2, 3, nil], b: ["abc", "bcd", "def", nil]) + + df1 = DF.filter(df, re_contains(b, ~S/^(b|d)/)) + assert DF.to_columns(df1, atom_keys: true) == %{a: [2, 3], b: ["bcd", "def"]} + end end describe "mutate_with/2" do @@ -1749,12 +1763,14 @@ defmodule Explorer.DataFrameTest do df1 = DF.mutate(df, - b: replace(a, ",", "") + b: replace(a, ",", ""), + c: re_replace(a, ~S/\d{3}$/, "999") ) assert DF.to_columns(df1, atom_keys: true) == %{ a: ["2,000", "2,000,000", ","], - b: ["2000", "2000000", ""] + b: ["2000", "2000000", ""], + c: ["2,999", "2,000,999", ","] } end diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index da4052b8c..ea1f62b59 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -5211,11 +5211,15 @@ defmodule Explorer.SeriesTest do end describe "replace/3" do - test "replaces all occurences of pattern in string by replacement string" do + test "replaces all occurences of a substring in string by replacement string" do series = Series.from_list(["1,200", "1,234,567", "asdf", nil]) assert Series.replace(series, ",", "") |> Series.to_list() == ["1200", "1234567", "asdf", nil] + end + + test "does not work with regex patterns" do + series = Series.from_list(["1,200", "1,234,567", "asdf", nil]) assert Series.replace(series, "[,]", "") |> Series.to_list() == ["1,200", "1,234,567", "asdf", nil] @@ -5229,11 +5233,11 @@ defmodule Explorer.SeriesTest do fn -> Series.replace(series, ",", "") end end - test "raises error if pattern is not string" do + test "raises error if substring is not string" do series = Series.from_list(["1,200", "1,234,567", "asdf", nil]) assert_raise ArgumentError, - "pattern and replacement in replace/3 need to be a string", + "substring and replacement in replace/3 need to be a string", fn -> Series.replace(series, 2, "") end end @@ -5241,11 +5245,91 @@ defmodule Explorer.SeriesTest do series = Series.from_list(["1,200", "1,234,567", "asdf", nil]) assert_raise ArgumentError, - "pattern and replacement in replace/3 need to be a string", + "substring and replacement in replace/3 need to be a string", fn -> Series.replace(series, ",", nil) end end end + describe "re_replace/3" do + test "replaces all occurences of pattern in string by replacement string" do + series = Series.from_list(["1,200.42", "1,234,567.54", "asdf", nil]) + + assert Series.re_replace(series, ~S/[^0-9]/, "") |> Series.to_list() == + ["120042", "123456754", "", nil] + end + + test "doesn't work with non string series" do + series = Series.from_list([1200, 1_234_567, nil]) + + assert_raise ArgumentError, + "Explorer.Series.re_replace/3 not implemented for dtype {:s, 64}. Valid dtype is :string", + fn -> Series.re_replace(series, ",", "") end + end + + test "raises error if pattern is not string" do + series = Series.from_list(["1,200", "1,234,567", "asdf", nil]) + + assert_raise ArgumentError, + "pattern and replacement in re_replace/3 need to be a string", + fn -> Series.re_replace(series, 2, "") end + end + + test "raises error if replacement is not string" do + series = Series.from_list(["1,200", "1,234,567", "asdf", nil]) + + assert_raise ArgumentError, + "pattern and replacement in re_replace/3 need to be a string", + fn -> Series.re_replace(series, ",", nil) end + end + + test "raises error if pattern is an Elixir regex" do + series = Series.from_list(["1,200.42", "1,234,567.54", "asdf", nil]) + + assert_raise ArgumentError, + "standard regexes cannot be used as pattern because it may be incompatible with the backend. " <> + "Please use the `~S` sigil or extract the source from the regex with `Regex.source/1`", + fn -> + Series.re_replace(series, ~r/[^0-9]/, "") + end + end + end + + describe "contains/2" do + test "check if a substring is inside the series" do + series = Series.from_list(["abc", "bcd", "def", nil]) + + assert Series.contains(series, "b") |> Series.to_list() == + [true, true, false, nil] + end + + test "does not work with regex patterns" do + series = Series.from_list(["abc", "bcd", "def", nil]) + + assert Series.contains(series, ~S/(b|d)/) |> Series.to_list() == + [false, false, false, nil] + end + end + + describe "re_contains/2" do + test "check if a pattern matches the contents of the series" do + series = Series.from_list(["abc", "bcd", "def", nil]) + + assert Series.re_contains(series, ~S/^(b|d)/) |> Series.to_list() == + [false, true, true, nil] + end + + test "raises error if pattern is an Elixir regex" do + series = Series.from_list(["abc", "bcd", "def", nil]) + + assert_raise ArgumentError, + "standard regexes cannot be used as pattern because it may be incompatible with the backend. " <> + "Please use the `~S` sigil or extract the source from the regex with `Regex.source/1`", + fn -> + Series.re_contains(series, ~r/^(b|d)/) + end + end + end + describe "strip, strip, lstrip, rstrip" do test "strip/1" do series = Series.from_list([" 123 ", " 2 ", " 20$ "]) From 9c467e6ba659fb6386b25a1f708c988a7602ba29 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Sun, 14 Apr 2024 14:32:17 -0300 Subject: [PATCH 2/3] Update lib/explorer/series.ex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Valim --- lib/explorer/series.ex | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 246f090e1..611fe4459 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -5370,9 +5370,11 @@ defmodule Explorer.Series do > ### Notice {: .warning} > - > This function matches against a regular expression. It does not expect an Elixir regex, but - > a escaped string - you can use the `~S` sigil for escaping - that follows the [`regex`](https://docs.rs/regex/latest/regex/) - > Rust crate rules. This is because our backend, Polars, expects that format. + > This function matches against a regular expression. It does not expect an Elixir regex, + > but a escaped string and you can use the `~S` sigil for escaping it. Since each Explorer + > backend may have its own regular expression rules, you must consult their underlying + > engine. For the default backend (Polars), the rules are outlined in the Rust create named + > [`regex`](https://docs.rs/regex/latest/regex/). > > To match literal strings, you can use `contains/2`. From 2d0b0168baf7dcdf21fdd9046be3808f5fa4f852 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Sun, 14 Apr 2024 14:35:05 -0300 Subject: [PATCH 3/3] Fix the description for re_replace/3 --- lib/explorer/series.ex | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 611fe4459..95536070c 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -5477,9 +5477,11 @@ defmodule Explorer.Series do > ### Notice {: .warning} > - > This function matches against a regular expression. It does not expect an Elixir regex, but - > a escaped string - you can use the `~S` sigil for escaping - that follows the [`regex`](https://docs.rs/regex/latest/regex/) - > Rust crate rules. This is because our backend, Polars, expects that format. + > This function matches against a regular expression. It does not expect an Elixir regex, + > but a escaped string and you can use the `~S` sigil for escaping it. Since each Explorer + > backend may have its own regular expression rules, you must consult their underlying + > engine. For the default backend (Polars), the rules are outlined in the Rust create named + > [`regex`](https://docs.rs/regex/latest/regex/). > > To replace by literal strings, you can use `replace/3`.