From f0732e7f5ef70af130ad5941abffa763ea57cb40 Mon Sep 17 00:00:00 2001 From: Matthew O'Neill Date: Thu, 27 Jul 2023 19:13:52 +0000 Subject: [PATCH 1/6] implement trim_trailing/2 and trim_leading/2 tidy up trim/2 rust implementation --- lib/explorer/backend/lazy_series.ex | 12 +++---- lib/explorer/backend/series.ex | 4 +-- lib/explorer/polars_backend/expression.ex | 4 +-- lib/explorer/polars_backend/native.ex | 4 +-- lib/explorer/polars_backend/series.ex | 8 ++--- lib/explorer/series.ex | 44 ++++++++++++++++++++--- native/explorer/src/expressions.rs | 8 ++--- native/explorer/src/series.rs | 30 ++++++++++++---- test/explorer/data_frame_test.exs | 42 ++++++++++++++++++++++ test/explorer/series_test.exs | 26 +++++++++++++- 10 files changed, 150 insertions(+), 32 deletions(-) diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index c33b6b52c..c1618dbfe 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -111,8 +111,8 @@ defmodule Explorer.Backend.LazySeries do covariance: 2, # Strings contains: 2, - trim_leading: 1, - trim_trailing: 1, + trim_leading: 2, + trim_trailing: 2, trim: 2, upcase: 1, downcase: 1, @@ -877,15 +877,15 @@ defmodule Explorer.Backend.LazySeries do end @impl true - def trim_leading(series) do - data = new(:trim_leading, [lazy_series!(series)]) + def trim_leading(series, string) do + data = new(:trim_leading, [lazy_series!(series), string]) Backend.Series.new(data, :string) end @impl true - def trim_trailing(series) do - data = new(:trim_trailing, [lazy_series!(series)]) + def trim_trailing(series, string) do + data = new(:trim_trailing, [lazy_series!(series), string]) Backend.Series.new(data, :string) end diff --git a/lib/explorer/backend/series.ex b/lib/explorer/backend/series.ex index 920dfccf2..eb1434e1d 100644 --- a/lib/explorer/backend/series.ex +++ b/lib/explorer/backend/series.ex @@ -238,8 +238,8 @@ defmodule Explorer.Backend.Series do @callback upcase(s) :: s @callback downcase(s) :: s @callback trim(s, String.t() | nil) :: s - @callback trim_leading(s) :: s - @callback trim_trailing(s) :: s + @callback trim_leading(s, String.t() | nil) :: s + @callback trim_trailing(s, String.t() | nil) :: s # Date / DateTime diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index d8c62c15e..6f551f835 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -121,8 +121,8 @@ defmodule Explorer.PolarsBackend.Expression do # Strings contains: 2, trim: 2, - trim_leading: 1, - trim_trailing: 1, + trim_leading: 2, + trim_trailing: 2, downcase: 1, upcase: 1 ] diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index d51c240bc..77591774b 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -260,7 +260,7 @@ defmodule Explorer.PolarsBackend.Native do def s_is_null(_s), do: err() def s_less(_s, _rhs), do: err() def s_less_equal(_s, _rhs), do: err() - def s_trim_leading(_s), do: err() + def s_trim_leading(_s, _string), do: err() def s_mask(_s, _filter), do: err() def s_max(_s), do: err() def s_mean(_s), do: err() @@ -307,7 +307,7 @@ defmodule Explorer.PolarsBackend.Native do def s_round(_s, _decimals), do: err() def s_floor(_s), do: err() def s_ceil(_s), do: err() - def s_trim_trailing(_s), do: err() + def s_trim_trailing(_s, _string), do: err() def s_rank(_s, _method, _descending, _seed), do: err() def s_sample_n(_s, _n, _replace, _shuffle, _seed), do: err() def s_sample_frac(_s, _frac, _replace, _shuffle, _seed), do: err() diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index ea5d52178..b0b3e385b 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -619,12 +619,12 @@ defmodule Explorer.PolarsBackend.Series do do: Shared.apply_series(series, :s_trim, [str]) @impl true - def trim_leading(series), - do: Shared.apply_series(series, :s_trim_leading) + def trim_leading(series, str), + do: Shared.apply_series(series, :s_trim_leading, [str]) @impl true - def trim_trailing(series), - do: Shared.apply_series(series, :s_trim_trailing) + def trim_trailing(series, str), + do: Shared.apply_series(series, :s_trim_trailing, [str]) # Float round @impl true diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 8c43564ad..66b71ecc0 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -4332,9 +4332,27 @@ defmodule Explorer.Series do @doc type: :string_wise @spec trim_leading(Series.t()) :: Series.t() def trim_leading(%Series{dtype: :string} = series), - do: apply_series(series, :trim_leading) + do: apply_series(series, :trim_leading, [nil]) - def trim_leading(%Series{dtype: dtype}), do: dtype_error("trim_leading/1", dtype, [:string]) + @doc """ + Returns a string where all leading examples of the provided binary have been removed. + + ## Examples + + iex> s = Explorer.Series.from_list(["$1", "$$200$$", "$$$3000$"]) + iex> Explorer.Series.trim_leading(s, "$") + #Explorer.Series< + Polars[3] + string ["1", "200$$", "3000$"] + > + """ + @doc type: :string_wise + @spec trim(Series.t(), String.t()) :: Series.t() + def trim_leading(%Series{dtype: :string} = series, string) when is_binary(string), + do: apply_series(series, :trim_leading, [string]) + + def trim_leading(%Series{dtype: dtype}, _string), + do: dtype_error("trim_leading/2", dtype, [:string]) @doc """ Returns a string where all trailing Unicode whitespaces have been removed. @@ -4351,9 +4369,27 @@ defmodule Explorer.Series do @doc type: :string_wise @spec trim_trailing(Series.t()) :: Series.t() def trim_trailing(%Series{dtype: :string} = series), - do: apply_series(series, :trim_trailing) + do: apply_series(series, :trim_trailing, [nil]) + + @doc """ + Returns a string where all trailing examples of the provided binary have been removed. + + ## Examples + + iex> s = Explorer.Series.from_list(["__abc__", "def_", "__bcd_"]) + iex> Explorer.Series.trim_trailing(s, "_") + #Explorer.Series< + Polars[3] + string ["__abc", "def", "__bcd"] + > + """ + @doc type: :string_wise + @spec trim_trailing(Series.t()) :: Series.t() + def trim_trailing(%Series{dtype: :string} = series, string) when is_binary(string), + do: apply_series(series, :trim_trailing, [string]) - def trim_trailing(%Series{dtype: dtype}), do: dtype_error("trim_trailing/1", dtype, [:string]) + def trim_trailing(%Series{dtype: dtype}, _string), + do: dtype_error("trim_trailing/2", dtype, [:string]) # Float diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index ed851178e..d2f3f8d2e 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -753,15 +753,15 @@ pub fn expr_trim(expr: ExExpr, string: Option) -> ExExpr { } #[rustler::nif] -pub fn expr_trim_leading(expr: ExExpr) -> ExExpr { +pub fn expr_trim_leading(expr: ExExpr, string: Option) -> ExExpr { let expr = expr.clone_inner(); - ExExpr::new(expr.str().lstrip(None)) + ExExpr::new(expr.str().lstrip(string)) } #[rustler::nif] -pub fn expr_trim_trailing(expr: ExExpr) -> ExExpr { +pub fn expr_trim_trailing(expr: ExExpr, string: Option) -> ExExpr { let expr = expr.clone_inner(); - ExExpr::new(expr.str().rstrip(None)) + ExExpr::new(expr.str().rstrip(string)) } #[rustler::nif] diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index 3371690ed..baffd83e0 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -1344,24 +1344,40 @@ pub fn s_trim(s1: ExSeries, pattern: Option<&str>) -> Result format!(r#"^[{}]+|[{}]+$"#, &string, &string), }; - // replace only replaces the leftmost match, so we need to call it twice. - let s1 = ExSeries::new(s1.utf8()?.replace(pattern.as_str(), "")?.into()); + // replace only replaces the leftmost match, so we need to call it twice. Ok(ExSeries::new( - s1.utf8()?.replace(pattern.as_str(), "")?.into(), + s1.utf8()? + .replace(pattern.as_str(), "")? + .replace(pattern.as_str(), "")? + .into(), )) } #[rustler::nif(schedule = "DirtyCpu")] -pub fn s_trim_leading(s1: ExSeries) -> Result { +pub fn s_trim_leading(s1: ExSeries, pattern: Option<&str>) -> Result { // There are no eager strip functions. - Ok(ExSeries::new(s1.utf8()?.replace(r#"^[ \s]+"#, "")?.into())) + let pattern = match pattern { + None => String::from(r#"^[ \s]+"#), + Some(string) => format!(r#"^[{}]+"#, &string), + }; + + Ok(ExSeries::new( + s1.utf8()?.replace(pattern.as_str(), "")?.into(), + )) } #[rustler::nif(schedule = "DirtyCpu")] -pub fn s_trim_trailing(s1: ExSeries) -> Result { +pub fn s_trim_trailing(s1: ExSeries, pattern: Option<&str>) -> Result { // There are no eager strip functions. - Ok(ExSeries::new(s1.utf8()?.replace(r#"[ \s]+$"#, "")?.into())) + let pattern = match pattern { + None => String::from(r#"[ \s]+$"#), + Some(string) => format!(r#"[{}]+$"#, &string), + }; + + Ok(ExSeries::new( + s1.utf8()?.replace(pattern.as_str(), "")?.into(), + )) } #[rustler::nif(schedule = "DirtyCpu")] diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 60c8d1b5f..8b3e9b69f 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1535,6 +1535,48 @@ defmodule Explorer.DataFrameTest do } end + test "trim trailing characters from string" do + df = + DF.new( + a: ["£2", "3£", "£200£", "£££20"], + b: [" sent ", " received", " words ", "lots of pound signs "] + ) + + df1 = + DF.mutate(df, + c: trim_trailing(a, "£"), + d: trim_trailing(b) + ) + + assert DF.to_columns(df1, atom_keys: true) == %{ + a: ["£2", "3£", "£200£", "£££20"], + b: [" sent ", " received", " words ", "lots of pound signs "], + c: ["£2", "3", "£200", "£££20"], + d: [" sent", " received", " words", "lots of pound signs"] + } + end + + test "trim leading characters from string" do + df = + DF.new( + a: ["£2", "3£", "£200£", "£££20"], + b: [" sent ", " received", " words ", "lots of pound signs "] + ) + + df1 = + DF.mutate(df, + c: trim_leading(a, "£"), + d: trim_leading(b) + ) + + assert DF.to_columns(df1, atom_keys: true) == %{ + a: ["£2", "3£", "£200£", "£££20"], + b: [" sent ", " received", " words ", "lots of pound signs "], + c: ["2", "3£", "200£", "20"], + d: ["sent ", "received", "words ", "lots of pound signs "] + } + end + test "conversion between string and datetime" do df = DF.new( diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 7225001d0..6b4d5c663 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -3966,7 +3966,7 @@ defmodule Explorer.SeriesTest do end end - describe "trim/1 and trim/2" do + describe "trim, trim, trim_leading, trim_trailing" do test "trim/1" do series = Series.from_list([" 123 ", " 2 ", " 20$ "]) @@ -3978,6 +3978,30 @@ defmodule Explorer.SeriesTest do assert Series.trim(series, "£") |> Series.to_list() == ["1£23", "2", "20"] end + + test "trim_leading/1" do + series = Series.from_list([" 123 ", " 2 ", " 20$ "]) + + assert Series.trim_leading(series) |> Series.to_list() == ["123 ", "2 ", "20$ "] + end + + test "trim_leading/2" do + series = Series.from_list(["£1£23", "2£", "£20£"]) + + assert Series.trim_leading(series, "£") |> Series.to_list() == ["1£23", "2£", "20£"] + end + + test "trim_trailing/1" do + series = Series.from_list([" 123 ", " 2 ", " 20$ "]) + + assert Series.trim_trailing(series) |> Series.to_list() == [" 123", " 2", " 20$"] + end + + test "trim_trailing/2" do + series = Series.from_list(["£1£23", "2£", "£20£"]) + + assert Series.trim_trailing(series, "£") |> Series.to_list() == ["£1£23", "2", "£20"] + end end describe "strptime/2 and strftime/2" do From ebc5665aff5b126f6b8b9f1326a46a6f80611f1a Mon Sep 17 00:00:00 2001 From: Matthew O'Neill Date: Thu, 27 Jul 2023 19:19:43 +0000 Subject: [PATCH 2/6] remove additional line break --- native/explorer/src/series.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index baffd83e0..d91d30ac6 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -1344,7 +1344,6 @@ pub fn s_trim(s1: ExSeries, pattern: Option<&str>) -> Result format!(r#"^[{}]+|[{}]+$"#, &string, &string), }; - // replace only replaces the leftmost match, so we need to call it twice. Ok(ExSeries::new( s1.utf8()? From 6f0d382e50cefdcea3e3a58f07ef5e94130c38d8 Mon Sep 17 00:00:00 2001 From: Matthew O'Neill Date: Thu, 27 Jul 2023 19:55:23 +0000 Subject: [PATCH 3/6] reword docstrings to be more clear --- lib/explorer/series.ex | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 66b71ecc0..5216ceb1d 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -4272,7 +4272,8 @@ defmodule Explorer.Series do def downcase(%Series{dtype: dtype}), do: dtype_error("downcase/1", dtype, [:string]) @doc """ - Returns a string where all leading and trailing Unicode whitespaces have been removed. + Returns a string series where all leading and trailing Unicode whitespaces + have been removed. ## Examples @@ -4290,7 +4291,7 @@ defmodule Explorer.Series do do: apply_series(series, :trim, [nil]) @doc """ - Returns a string where all leading and trailing examples of the provided string + Returns a string series where all leading and trailing examples of the provided string have been removed. ## Examples @@ -4318,7 +4319,7 @@ defmodule Explorer.Series do def trim(%Series{dtype: dtype}, _string), do: dtype_error("trim/2", dtype, [:string]) @doc """ - Returns a string where all leading Unicode whitespaces have been removed. + Returns a string series where all leading Unicode whitespaces have been removed. ## Examples @@ -4335,7 +4336,8 @@ defmodule Explorer.Series do do: apply_series(series, :trim_leading, [nil]) @doc """ - Returns a string where all leading examples of the provided binary have been removed. + Returns a string series where all leading examples of the provided string + have been removed. ## Examples @@ -4355,7 +4357,7 @@ defmodule Explorer.Series do do: dtype_error("trim_leading/2", dtype, [:string]) @doc """ - Returns a string where all trailing Unicode whitespaces have been removed. + Returns a string series where all trailing Unicode whitespaces have been removed. ## Examples @@ -4372,7 +4374,8 @@ defmodule Explorer.Series do do: apply_series(series, :trim_trailing, [nil]) @doc """ - Returns a string where all trailing examples of the provided binary have been removed. + Returns a string series where all trailing examples of the provided string + have been removed. ## Examples From 63d3e9666c54b7a90227f7500611284d9945a1df Mon Sep 17 00:00:00 2001 From: Matthew O'Neill Date: Thu, 27 Jul 2023 20:43:09 +0000 Subject: [PATCH 4/6] update typespec --- lib/explorer/series.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 5216ceb1d..3da8d8e5e 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -4349,7 +4349,7 @@ defmodule Explorer.Series do > """ @doc type: :string_wise - @spec trim(Series.t(), String.t()) :: Series.t() + @spec trim_leading(Series.t(), String.t()) :: Series.t() def trim_leading(%Series{dtype: :string} = series, string) when is_binary(string), do: apply_series(series, :trim_leading, [string]) @@ -4387,7 +4387,7 @@ defmodule Explorer.Series do > """ @doc type: :string_wise - @spec trim_trailing(Series.t()) :: Series.t() + @spec trim_trailing(Series.t(), String.t()) :: Series.t() def trim_trailing(%Series{dtype: :string} = series, string) when is_binary(string), do: apply_series(series, :trim_trailing, [string]) From 7cf1f8d410aab3abf7345e76fb5a24d72185348c Mon Sep 17 00:00:00 2001 From: Matthew O'Neill Date: Fri, 28 Jul 2023 20:09:09 +0000 Subject: [PATCH 5/6] improve docs and tests for multicharacter trims --- lib/explorer/series.ex | 33 ++++++++++++++++++++++++------- test/explorer/data_frame_test.exs | 21 ++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 3da8d8e5e..5256425be 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -4272,7 +4272,7 @@ defmodule Explorer.Series do def downcase(%Series{dtype: dtype}), do: dtype_error("downcase/1", dtype, [:string]) @doc """ - Returns a string series where all leading and trailing Unicode whitespaces + Returns a string series where all leading and trailing Unicode whitespaces have been removed. ## Examples @@ -4294,20 +4294,22 @@ defmodule Explorer.Series do Returns a string series where all leading and trailing examples of the provided string have been removed. + Where multiple characters are provided, all combinations of this set of characters will be trimmed + ## Examples - iex> s = Explorer.Series.from_list(["abc", "adefa", "bcda"]) - iex> Explorer.Series.trim(s, "a") + iex> s = Explorer.Series.from_list(["£123", "1.00£", "£1.00£"]) + iex> Explorer.Series.trim(s, "£") #Explorer.Series< Polars[3] - string ["bc", "def", "bcd"] + string ["123", "1.00", "1.00"] > - iex> s = Explorer.Series.from_list(["£123", "1.00£", "£1.00£"]) - iex> Explorer.Series.trim(s, "£") + iex> s = Explorer.Series.from_list(["abc", "adefa", "bcda"]) + iex> Explorer.Series.trim(s, "ab") #Explorer.Series< Polars[3] - string ["123", "1.00", "1.00"] + string ["c", "def", "cd"] > """ @@ -4339,6 +4341,7 @@ defmodule Explorer.Series do Returns a string series where all leading examples of the provided string have been removed. + Where multiple characters are provided, all combinations of this set of characters will be trimmed ## Examples iex> s = Explorer.Series.from_list(["$1", "$$200$$", "$$$3000$"]) @@ -4347,6 +4350,13 @@ defmodule Explorer.Series do Polars[3] string ["1", "200$$", "3000$"] > + + iex> s = Explorer.Series.from_list(["abc", "adefa", "bcda"]) + iex> Explorer.Series.trim_leading(s, "ab") + #Explorer.Series< + Polars[3] + string ["c", "defa", "cda"] + > """ @doc type: :string_wise @spec trim_leading(Series.t(), String.t()) :: Series.t() @@ -4377,6 +4387,8 @@ defmodule Explorer.Series do Returns a string series where all trailing examples of the provided string have been removed. + Where multiple characters are provided, all combinations of this set of characters will be trimmed + ## Examples iex> s = Explorer.Series.from_list(["__abc__", "def_", "__bcd_"]) @@ -4385,6 +4397,13 @@ defmodule Explorer.Series do Polars[3] string ["__abc", "def", "__bcd"] > + + iex> s = Explorer.Series.from_list(["abc", "adefa", "bcdabaaa"]) + iex> Explorer.Series.trim_trailing(s, "ab") + #Explorer.Series< + Polars[3] + string ["abc", "adef", "bcd"] + > """ @doc type: :string_wise @spec trim_trailing(Series.t(), String.t()) :: Series.t() diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 8b3e9b69f..842a7241c 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1535,6 +1535,27 @@ defmodule Explorer.DataFrameTest do } end + test "trim multiple characters from string" do + df = + DF.new( + a: ["ababhelloabab", "abababworldabababa", "abab", "bbbbaaaabhelloba"], + b: ["nx_hello", "world_nx", "nx_nx_xn", "more_nx"] + ) + + df1 = + DF.mutate(df, + c: trim(a, "ab"), + d: trim(b, "nx_") + ) + + assert DF.to_columns(df1, atom_keys: true) == %{ + a: ["ababhelloabab", "abababworldabababa", "abab", "bbbbaaaabhelloba"], + b: ["nx_hello", "world_nx", "nx_nx_xn", "more_nx"], + c: ["hello", "world", "", "hello"], + d: ["hello", "world", "", "more"] + } + end + test "trim trailing characters from string" do df = DF.new( From 2dc10cd62928637e9a882c62648e00440db6403c Mon Sep 17 00:00:00 2001 From: Matthew O'Neill Date: Fri, 28 Jul 2023 20:25:09 +0000 Subject: [PATCH 6/6] rename explorer trim functions --- lib/explorer/backend/lazy_series.ex | 18 +++--- lib/explorer/backend/series.ex | 6 +- lib/explorer/polars_backend/expression.ex | 6 +- lib/explorer/polars_backend/native.ex | 6 +- lib/explorer/polars_backend/series.ex | 12 ++-- lib/explorer/series.ex | 70 +++++++++++------------ native/explorer/src/expressions.rs | 6 +- native/explorer/src/lib.rs | 12 ++-- native/explorer/src/series.rs | 6 +- test/explorer/data_frame_test.exs | 24 ++++---- test/explorer/series_test.exs | 26 ++++----- 11 files changed, 96 insertions(+), 96 deletions(-) diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 772a11090..6f3767386 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -112,9 +112,9 @@ defmodule Explorer.Backend.LazySeries do covariance: 2, # Strings contains: 2, - trim_leading: 2, - trim_trailing: 2, - trim: 2, + lstrip: 2, + rstrip: 2, + strip: 2, upcase: 1, downcase: 1, substring: 3, @@ -874,21 +874,21 @@ defmodule Explorer.Backend.LazySeries do end @impl true - def trim(series, string) do - data = new(:trim, [lazy_series!(series), string]) + def strip(series, string) do + data = new(:strip, [lazy_series!(series), string]) Backend.Series.new(data, :string) end @impl true - def trim_leading(series, string) do - data = new(:trim_leading, [lazy_series!(series), string]) + def lstrip(series, string) do + data = new(:lstrip, [lazy_series!(series), string]) Backend.Series.new(data, :string) end @impl true - def trim_trailing(series, string) do - data = new(:trim_trailing, [lazy_series!(series), string]) + def rstrip(series, string) do + data = new(:rstrip, [lazy_series!(series), string]) Backend.Series.new(data, :string) end diff --git a/lib/explorer/backend/series.ex b/lib/explorer/backend/series.ex index fb8a1cd67..24b478b3c 100644 --- a/lib/explorer/backend/series.ex +++ b/lib/explorer/backend/series.ex @@ -244,9 +244,9 @@ defmodule Explorer.Backend.Series do @callback contains(s, String.t()) :: s @callback upcase(s) :: s @callback downcase(s) :: s - @callback trim(s, String.t() | nil) :: s - @callback trim_leading(s, String.t() | nil) :: s - @callback trim_trailing(s, String.t() | nil) :: s + @callback strip(s, String.t() | nil) :: s + @callback lstrip(s, String.t() | nil) :: s + @callback rstrip(s, String.t() | nil) :: s @callback substring(s, integer(), non_neg_integer() | nil) :: s # Date / DateTime diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index caa15094b..d9e238b6f 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -121,9 +121,9 @@ defmodule Explorer.PolarsBackend.Expression do # Strings contains: 2, - trim: 2, - trim_leading: 2, - trim_trailing: 2, + strip: 2, + lstrip: 2, + rstrip: 2, downcase: 1, upcase: 1, substring: 3 diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 6c1dad3f7..9e544d196 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -264,7 +264,7 @@ defmodule Explorer.PolarsBackend.Native do def s_is_null(_s), do: err() def s_less(_s, _rhs), do: err() def s_less_equal(_s, _rhs), do: err() - def s_trim_leading(_s, _string), do: err() + def s_lstrip(_s, _string), do: err() def s_mask(_s, _filter), do: err() def s_max(_s), do: err() def s_mean(_s), do: err() @@ -311,7 +311,7 @@ defmodule Explorer.PolarsBackend.Native do def s_round(_s, _decimals), do: err() def s_floor(_s), do: err() def s_ceil(_s), do: err() - def s_trim_trailing(_s, _string), do: err() + def s_rstrip(_s, _string), do: err() def s_rank(_s, _method, _descending, _seed), do: err() def s_sample_n(_s, _n, _replace, _shuffle, _seed), do: err() def s_sample_frac(_s, _frac, _replace, _shuffle, _seed), do: err() @@ -322,7 +322,7 @@ defmodule Explorer.PolarsBackend.Native do def s_slice_by_series(_s, _series), do: err() def s_sort(_s, _descending?, _nils_last?), do: err() def s_standard_deviation(_s), do: err() - def s_trim(_s, _string), do: err() + def s_strip(_s, _string), do: err() def s_subtract(_s, _other), do: err() def s_sum(_s), do: err() def s_tail(_s, _length), do: err() diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index de5b48cda..d65d114bb 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -620,16 +620,16 @@ defmodule Explorer.PolarsBackend.Series do do: Shared.apply_series(series, :s_downcase) @impl true - def trim(series, str), - do: Shared.apply_series(series, :s_trim, [str]) + def strip(series, str), + do: Shared.apply_series(series, :s_strip, [str]) @impl true - def trim_leading(series, str), - do: Shared.apply_series(series, :s_trim_leading, [str]) + def lstrip(series, str), + do: Shared.apply_series(series, :s_lstrip, [str]) @impl true - def trim_trailing(series, str), - do: Shared.apply_series(series, :s_trim_trailing, [str]) + def rstrip(series, str), + do: Shared.apply_series(series, :s_rstrip, [str]) @impl true def substring(series, offset, length), diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 8b6625df4..76f0facbb 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -4318,7 +4318,7 @@ defmodule Explorer.Series do ## Examples iex> s = Explorer.Series.from_list([" abc", "def ", " bcd "]) - iex> Explorer.Series.trim(s) + iex> Explorer.Series.strip(s) #Explorer.Series< Polars[3] string ["abc", "def", "bcd"] @@ -4326,27 +4326,27 @@ defmodule Explorer.Series do """ @doc type: :string_wise - @spec trim(Series.t()) :: Series.t() - def trim(%Series{dtype: :string} = series), - do: apply_series(series, :trim, [nil]) + @spec strip(Series.t()) :: Series.t() + def strip(%Series{dtype: :string} = series), + do: apply_series(series, :strip, [nil]) @doc """ Returns a string series where all leading and trailing examples of the provided string have been removed. - Where multiple characters are provided, all combinations of this set of characters will be trimmed + Where multiple characters are provided, all combinations of this set of characters will be stripped ## Examples iex> s = Explorer.Series.from_list(["£123", "1.00£", "£1.00£"]) - iex> Explorer.Series.trim(s, "£") + iex> Explorer.Series.strip(s, "£") #Explorer.Series< Polars[3] string ["123", "1.00", "1.00"] > iex> s = Explorer.Series.from_list(["abc", "adefa", "bcda"]) - iex> Explorer.Series.trim(s, "ab") + iex> Explorer.Series.strip(s, "ab") #Explorer.Series< Polars[3] string ["c", "def", "cd"] @@ -4354,11 +4354,11 @@ defmodule Explorer.Series do """ @doc type: :string_wise - @spec trim(Series.t(), String.t()) :: Series.t() - def trim(%Series{dtype: :string} = series, string) when is_binary(string), - do: apply_series(series, :trim, [string]) + @spec strip(Series.t(), String.t()) :: Series.t() + def strip(%Series{dtype: :string} = series, string) when is_binary(string), + do: apply_series(series, :strip, [string]) - def trim(%Series{dtype: dtype}, _string), do: dtype_error("trim/2", dtype, [:string]) + def strip(%Series{dtype: dtype}, _string), do: dtype_error("strip/2", dtype, [:string]) @doc """ Returns a string series where all leading Unicode whitespaces have been removed. @@ -4366,45 +4366,45 @@ defmodule Explorer.Series do ## Examples iex> s = Explorer.Series.from_list([" abc", "def ", " bcd"]) - iex> Explorer.Series.trim_leading(s) + iex> Explorer.Series.lstrip(s) #Explorer.Series< Polars[3] string ["abc", "def ", "bcd"] > """ @doc type: :string_wise - @spec trim_leading(Series.t()) :: Series.t() - def trim_leading(%Series{dtype: :string} = series), - do: apply_series(series, :trim_leading, [nil]) + @spec lstrip(Series.t()) :: Series.t() + def lstrip(%Series{dtype: :string} = series), + do: apply_series(series, :lstrip, [nil]) @doc """ Returns a string series where all leading examples of the provided string have been removed. - Where multiple characters are provided, all combinations of this set of characters will be trimmed + Where multiple characters are provided, all combinations of this set of characters will be stripped ## Examples iex> s = Explorer.Series.from_list(["$1", "$$200$$", "$$$3000$"]) - iex> Explorer.Series.trim_leading(s, "$") + iex> Explorer.Series.lstrip(s, "$") #Explorer.Series< Polars[3] string ["1", "200$$", "3000$"] > iex> s = Explorer.Series.from_list(["abc", "adefa", "bcda"]) - iex> Explorer.Series.trim_leading(s, "ab") + iex> Explorer.Series.lstrip(s, "ab") #Explorer.Series< Polars[3] string ["c", "defa", "cda"] > """ @doc type: :string_wise - @spec trim_leading(Series.t(), String.t()) :: Series.t() - def trim_leading(%Series{dtype: :string} = series, string) when is_binary(string), - do: apply_series(series, :trim_leading, [string]) + @spec lstrip(Series.t(), String.t()) :: Series.t() + def lstrip(%Series{dtype: :string} = series, string) when is_binary(string), + do: apply_series(series, :lstrip, [string]) - def trim_leading(%Series{dtype: dtype}, _string), - do: dtype_error("trim_leading/2", dtype, [:string]) + def lstrip(%Series{dtype: dtype}, _string), + do: dtype_error("lstrip/2", dtype, [:string]) @doc """ Returns a string series where all trailing Unicode whitespaces have been removed. @@ -4412,46 +4412,46 @@ defmodule Explorer.Series do ## Examples iex> s = Explorer.Series.from_list([" abc", "def ", " bcd"]) - iex> Explorer.Series.trim_trailing(s) + iex> Explorer.Series.rstrip(s) #Explorer.Series< Polars[3] string [" abc", "def", " bcd"] > """ @doc type: :string_wise - @spec trim_trailing(Series.t()) :: Series.t() - def trim_trailing(%Series{dtype: :string} = series), - do: apply_series(series, :trim_trailing, [nil]) + @spec rstrip(Series.t()) :: Series.t() + def rstrip(%Series{dtype: :string} = series), + do: apply_series(series, :rstrip, [nil]) @doc """ Returns a string series where all trailing examples of the provided string have been removed. - Where multiple characters are provided, all combinations of this set of characters will be trimmed + Where multiple characters are provided, all combinations of this set of characters will be stripped ## Examples iex> s = Explorer.Series.from_list(["__abc__", "def_", "__bcd_"]) - iex> Explorer.Series.trim_trailing(s, "_") + iex> Explorer.Series.rstrip(s, "_") #Explorer.Series< Polars[3] string ["__abc", "def", "__bcd"] > iex> s = Explorer.Series.from_list(["abc", "adefa", "bcdabaaa"]) - iex> Explorer.Series.trim_trailing(s, "ab") + iex> Explorer.Series.rstrip(s, "ab") #Explorer.Series< Polars[3] string ["abc", "adef", "bcd"] > """ @doc type: :string_wise - @spec trim_trailing(Series.t(), String.t()) :: Series.t() - def trim_trailing(%Series{dtype: :string} = series, string) when is_binary(string), - do: apply_series(series, :trim_trailing, [string]) + @spec rstrip(Series.t(), String.t()) :: Series.t() + def rstrip(%Series{dtype: :string} = series, string) when is_binary(string), + do: apply_series(series, :rstrip, [string]) - def trim_trailing(%Series{dtype: dtype}, _string), - do: dtype_error("trim_trailing/2", dtype, [:string]) + def rstrip(%Series{dtype: dtype}, _string), + do: dtype_error("rstrip/2", dtype, [:string]) @doc """ Returns a string sliced from the offset to the end of the string, supporting diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index a184fd4a9..1abb0718a 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -748,19 +748,19 @@ pub fn expr_downcase(expr: ExExpr) -> ExExpr { } #[rustler::nif] -pub fn expr_trim(expr: ExExpr, string: Option) -> ExExpr { +pub fn expr_strip(expr: ExExpr, string: Option) -> ExExpr { let expr = expr.clone_inner(); ExExpr::new(expr.str().strip(string)) } #[rustler::nif] -pub fn expr_trim_leading(expr: ExExpr, string: Option) -> ExExpr { +pub fn expr_lstrip(expr: ExExpr, string: Option) -> ExExpr { let expr = expr.clone_inner(); ExExpr::new(expr.str().lstrip(string)) } #[rustler::nif] -pub fn expr_trim_trailing(expr: ExExpr, string: Option) -> ExExpr { +pub fn expr_rstrip(expr: ExExpr, string: Option) -> ExExpr { let expr = expr.clone_inner(); ExExpr::new(expr.str().rstrip(string)) } diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index e261731f4..b0e4dd43c 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -248,9 +248,9 @@ rustler::init!( expr_contains, expr_upcase, expr_downcase, - expr_trim, - expr_trim_leading, - expr_trim_trailing, + expr_strip, + expr_lstrip, + expr_rstrip, expr_substring, // float round expressions expr_round, @@ -343,7 +343,7 @@ rustler::init!( s_is_nan, s_less, s_less_equal, - s_trim_leading, + s_lstrip, s_mask, s_max, s_mean, @@ -390,7 +390,7 @@ rustler::init!( s_remainder, s_rename, s_reverse, - s_trim_trailing, + s_rstrip, s_sample_n, s_sample_frac, s_series_equal, @@ -403,7 +403,7 @@ rustler::init!( s_sort, s_standard_deviation, s_tan, - s_trim, + s_strip, s_substring, s_subtract, s_sum, diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index 0c5697b09..0a39b7fde 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -1350,7 +1350,7 @@ pub fn s_downcase(s1: ExSeries) -> Result { } #[rustler::nif(schedule = "DirtyCpu")] -pub fn s_trim(s1: ExSeries, pattern: Option<&str>) -> Result { +pub fn s_strip(s1: ExSeries, pattern: Option<&str>) -> Result { // There are no eager strip functions. let pattern = match pattern { None => String::from(r#"^[ \s]+|[ \s]+$"#), @@ -1367,7 +1367,7 @@ pub fn s_trim(s1: ExSeries, pattern: Option<&str>) -> Result) -> Result { +pub fn s_lstrip(s1: ExSeries, pattern: Option<&str>) -> Result { // There are no eager strip functions. let pattern = match pattern { None => String::from(r#"^[ \s]+"#), @@ -1380,7 +1380,7 @@ pub fn s_trim_leading(s1: ExSeries, pattern: Option<&str>) -> Result) -> Result { +pub fn s_rstrip(s1: ExSeries, pattern: Option<&str>) -> Result { // There are no eager strip functions. let pattern = match pattern { None => String::from(r#"[ \s]+$"#), diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 835ea4b1f..3118636c7 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1550,7 +1550,7 @@ defmodule Explorer.DataFrameTest do } end - test "trim characters from string" do + test "strip characters from string" do df = DF.new( a: ["£2", "3£", "£200£", "£££20"], @@ -1559,8 +1559,8 @@ defmodule Explorer.DataFrameTest do df1 = DF.mutate(df, - c: trim(a, "£"), - d: trim(b) + c: strip(a, "£"), + d: strip(b) ) assert DF.to_columns(df1, atom_keys: true) == %{ @@ -1571,7 +1571,7 @@ defmodule Explorer.DataFrameTest do } end - test "trim multiple characters from string" do + test "strip multiple characters from string" do df = DF.new( a: ["ababhelloabab", "abababworldabababa", "abab", "bbbbaaaabhelloba"], @@ -1580,8 +1580,8 @@ defmodule Explorer.DataFrameTest do df1 = DF.mutate(df, - c: trim(a, "ab"), - d: trim(b, "nx_") + c: strip(a, "ab"), + d: strip(b, "nx_") ) assert DF.to_columns(df1, atom_keys: true) == %{ @@ -1592,7 +1592,7 @@ defmodule Explorer.DataFrameTest do } end - test "trim trailing characters from string" do + test "strip trailing characters from string" do df = DF.new( a: ["£2", "3£", "£200£", "£££20"], @@ -1601,8 +1601,8 @@ defmodule Explorer.DataFrameTest do df1 = DF.mutate(df, - c: trim_trailing(a, "£"), - d: trim_trailing(b) + c: rstrip(a, "£"), + d: rstrip(b) ) assert DF.to_columns(df1, atom_keys: true) == %{ @@ -1613,7 +1613,7 @@ defmodule Explorer.DataFrameTest do } end - test "trim leading characters from string" do + test "strip leading characters from string" do df = DF.new( a: ["£2", "3£", "£200£", "£££20"], @@ -1622,8 +1622,8 @@ defmodule Explorer.DataFrameTest do df1 = DF.mutate(df, - c: trim_leading(a, "£"), - d: trim_leading(b) + c: lstrip(a, "£"), + d: lstrip(b) ) assert DF.to_columns(df1, atom_keys: true) == %{ diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index dbb7f06e2..ed0061766 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -3966,41 +3966,41 @@ defmodule Explorer.SeriesTest do end end - describe "trim, trim, trim_leading, trim_trailing" do - test "trim/1" do + describe "strip, strip, lstrip, rstrip" do + test "strip/1" do series = Series.from_list([" 123 ", " 2 ", " 20$ "]) - assert Series.trim(series) |> Series.to_list() == ["123", "2", "20$"] + assert Series.strip(series) |> Series.to_list() == ["123", "2", "20$"] end - test "trim/2" do + test "strip/2" do series = Series.from_list(["£1£23", "2£", "£20£"]) - assert Series.trim(series, "£") |> Series.to_list() == ["1£23", "2", "20"] + assert Series.strip(series, "£") |> Series.to_list() == ["1£23", "2", "20"] end - test "trim_leading/1" do + test "lstrip/1" do series = Series.from_list([" 123 ", " 2 ", " 20$ "]) - assert Series.trim_leading(series) |> Series.to_list() == ["123 ", "2 ", "20$ "] + assert Series.lstrip(series) |> Series.to_list() == ["123 ", "2 ", "20$ "] end - test "trim_leading/2" do + test "lstrip/2" do series = Series.from_list(["£1£23", "2£", "£20£"]) - assert Series.trim_leading(series, "£") |> Series.to_list() == ["1£23", "2£", "20£"] + assert Series.lstrip(series, "£") |> Series.to_list() == ["1£23", "2£", "20£"] end - test "trim_trailing/1" do + test "rstrip/1" do series = Series.from_list([" 123 ", " 2 ", " 20$ "]) - assert Series.trim_trailing(series) |> Series.to_list() == [" 123", " 2", " 20$"] + assert Series.rstrip(series) |> Series.to_list() == [" 123", " 2", " 20$"] end - test "trim_trailing/2" do + test "rstrip/2" do series = Series.from_list(["£1£23", "2£", "£20£"]) - assert Series.trim_trailing(series, "£") |> Series.to_list() == ["£1£23", "2", "£20"] + assert Series.rstrip(series, "£") |> Series.to_list() == ["£1£23", "2", "£20"] end end