diff --git a/crates/polars-ops/src/chunked_array/strings/mod.rs b/crates/polars-ops/src/chunked_array/strings/mod.rs index de81435f2b71d..f61929f40ebca 100644 --- a/crates/polars-ops/src/chunked_array/strings/mod.rs +++ b/crates/polars-ops/src/chunked_array/strings/mod.rs @@ -10,8 +10,11 @@ mod justify; mod namespace; #[cfg(feature = "strings")] mod replace; +#[cfg(feature = "strings")] mod split; #[cfg(feature = "strings")] +mod strip; +#[cfg(feature = "strings")] mod substring; #[cfg(feature = "extract_jsonpath")] @@ -19,7 +22,10 @@ pub use json_path::*; #[cfg(feature = "strings")] pub use namespace::*; use polars_core::prelude::*; +#[cfg(feature = "strings")] pub use split::*; +#[cfg(feature = "strings")] +pub use strip::*; pub trait AsUtf8 { fn as_utf8(&self) -> &Utf8Chunked; diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index 4a3cfc16b3ac1..c594094e90301 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -23,14 +23,6 @@ where f } -fn opt_strip_prefix<'a>(s: Option<&'a str>, prefix: Option<&str>) -> Option<&'a str> { - Some(s?.strip_prefix(prefix?).unwrap_or(s?)) -} - -fn opt_strip_suffix<'a>(s: Option<&'a str>, suffix: Option<&str>) -> Option<&'a str> { - Some(s?.strip_suffix(suffix?).unwrap_or(s?)) -} - pub trait Utf8NameSpaceImpl: AsUtf8 { #[cfg(not(feature = "binary_encoding"))] fn hex_decode(&self) -> PolarsResult { @@ -350,32 +342,43 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { Ok(builder.finish()) } - fn strip_prefix(&self, prefix: &Utf8Chunked) -> Utf8Chunked { + fn strip_chars(&self, pat: &Series) -> PolarsResult { let ca = self.as_utf8(); - match prefix.len() { - 1 => match prefix.get(0) { - Some(prefix) => { - ca.apply_generic(|opt_s| opt_s.map(|s| s.strip_prefix(prefix).unwrap_or(s))) - }, - _ => Utf8Chunked::full_null(ca.name(), ca.len()), - }, - _ => binary_elementwise(ca, prefix, opt_strip_prefix), + if pat.dtype() == &DataType::Null { + Ok(ca.apply_generic(|opt_s| opt_s.map(|s| s.trim()))) + } else { + Ok(strip_chars(ca, pat.utf8()?)) } } - fn strip_suffix(&self, suffix: &Utf8Chunked) -> Utf8Chunked { + fn strip_chars_start(&self, pat: &Series) -> PolarsResult { let ca = self.as_utf8(); - match suffix.len() { - 1 => match suffix.get(0) { - Some(suffix) => { - ca.apply_generic(|opt_s| opt_s.map(|s| s.strip_suffix(suffix).unwrap_or(s))) - }, - _ => Utf8Chunked::full_null(ca.name(), ca.len()), - }, - _ => binary_elementwise(ca, suffix, opt_strip_suffix), + if pat.dtype() == &DataType::Null { + return Ok(ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_start()))); + } else { + Ok(strip_chars_start(ca, pat.utf8()?)) } } + fn strip_chars_end(&self, pat: &Series) -> PolarsResult { + let ca = self.as_utf8(); + if pat.dtype() == &DataType::Null { + return Ok(ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_end()))); + } else { + Ok(strip_chars_end(ca, pat.utf8()?)) + } + } + + fn strip_prefix(&self, prefix: &Utf8Chunked) -> Utf8Chunked { + let ca = self.as_utf8(); + strip_prefix(ca, prefix) + } + + fn strip_suffix(&self, suffix: &Utf8Chunked) -> Utf8Chunked { + let ca = self.as_utf8(); + strip_suffix(ca, suffix) + } + #[cfg(feature = "dtype-struct")] fn split_exact(&self, by: &Utf8Chunked, n: usize) -> PolarsResult { let ca = self.as_utf8(); diff --git a/crates/polars-ops/src/chunked_array/strings/strip.rs b/crates/polars-ops/src/chunked_array/strings/strip.rs new file mode 100644 index 0000000000000..1d58f4fd5f7a9 --- /dev/null +++ b/crates/polars-ops/src/chunked_array/strings/strip.rs @@ -0,0 +1,139 @@ +use polars_core::prelude::arity::binary_elementwise; + +use super::*; + +fn strip_chars_binary<'a>(opt_s: Option<&'a str>, opt_pat: Option<&str>) -> Option<&'a str> { + match (opt_s, opt_pat) { + (Some(s), Some(pat)) => { + if pat.chars().count() == 1 { + Some(s.trim_matches(pat.chars().next().unwrap())) + } else { + Some(s.trim_matches(|c| pat.contains(c))) + } + }, + (Some(s), _) => Some(s.trim()), + _ => None, + } +} + +fn strip_chars_start_binary<'a>(opt_s: Option<&'a str>, opt_pat: Option<&str>) -> Option<&'a str> { + match (opt_s, opt_pat) { + (Some(s), Some(pat)) => { + if pat.chars().count() == 1 { + Some(s.trim_start_matches(pat.chars().next().unwrap())) + } else { + Some(s.trim_start_matches(|c| pat.contains(c))) + } + }, + (Some(s), _) => Some(s.trim_start()), + _ => None, + } +} + +fn strip_chars_end_binary<'a>(opt_s: Option<&'a str>, opt_pat: Option<&str>) -> Option<&'a str> { + match (opt_s, opt_pat) { + (Some(s), Some(pat)) => { + if pat.chars().count() == 1 { + Some(s.trim_end_matches(pat.chars().next().unwrap())) + } else { + Some(s.trim_end_matches(|c| pat.contains(c))) + } + }, + (Some(s), _) => Some(s.trim_end()), + _ => None, + } +} + +fn strip_prefix_binary<'a>(s: Option<&'a str>, prefix: Option<&str>) -> Option<&'a str> { + Some(s?.strip_prefix(prefix?).unwrap_or(s?)) +} + +fn strip_suffix_binary<'a>(s: Option<&'a str>, suffix: Option<&str>) -> Option<&'a str> { + Some(s?.strip_suffix(suffix?).unwrap_or(s?)) +} + +pub fn strip_chars(ca: &Utf8Chunked, pat: &Utf8Chunked) -> Utf8Chunked { + match pat.len() { + 1 => { + if let Some(pat) = pat.get(0) { + if pat.chars().count() == 1 { + // Fast path for when a single character is passed + ca.apply_generic(|opt_s| { + opt_s.map(|s| s.trim_matches(pat.chars().next().unwrap())) + }) + } else { + ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_matches(|c| pat.contains(c)))) + } + } else { + ca.apply_generic(|opt_s| opt_s.map(|s| s.trim())) + } + }, + _ => binary_elementwise(ca, pat, strip_chars_binary), + } +} + +pub fn strip_chars_start(ca: &Utf8Chunked, pat: &Utf8Chunked) -> Utf8Chunked { + match pat.len() { + 1 => { + if let Some(pat) = pat.get(0) { + if pat.chars().count() == 1 { + // Fast path for when a single character is passed + ca.apply_generic(|opt_s| { + opt_s.map(|s| s.trim_start_matches(pat.chars().next().unwrap())) + }) + } else { + ca.apply_generic(|opt_s| { + opt_s.map(|s| s.trim_start_matches(|c| pat.contains(c))) + }) + } + } else { + ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_start())) + } + }, + _ => binary_elementwise(ca, pat, strip_chars_start_binary), + } +} + +pub fn strip_chars_end(ca: &Utf8Chunked, pat: &Utf8Chunked) -> Utf8Chunked { + match pat.len() { + 1 => { + if let Some(pat) = pat.get(0) { + if pat.chars().count() == 1 { + // Fast path for when a single character is passed + ca.apply_generic(|opt_s| { + opt_s.map(|s| s.trim_end_matches(pat.chars().next().unwrap())) + }) + } else { + ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_end_matches(|c| pat.contains(c)))) + } + } else { + ca.apply_generic(|opt_s| opt_s.map(|s| s.trim_end())) + } + }, + _ => binary_elementwise(ca, pat, strip_chars_end_binary), + } +} + +pub fn strip_prefix(ca: &Utf8Chunked, prefix: &Utf8Chunked) -> Utf8Chunked { + match prefix.len() { + 1 => match prefix.get(0) { + Some(prefix) => { + ca.apply_generic(|opt_s| opt_s.map(|s| s.strip_prefix(prefix).unwrap_or(s))) + }, + _ => Utf8Chunked::full_null(ca.name(), ca.len()), + }, + _ => binary_elementwise(ca, prefix, strip_prefix_binary), + } +} + +pub fn strip_suffix(ca: &Utf8Chunked, suffix: &Utf8Chunked) -> Utf8Chunked { + match suffix.len() { + 1 => match suffix.get(0) { + Some(suffix) => { + ca.apply_generic(|opt_s| opt_s.map(|s| s.strip_suffix(suffix).unwrap_or(s))) + }, + _ => Utf8Chunked::full_null(ca.name(), ca.len()), + }, + _ => binary_elementwise(ca, suffix, strip_suffix_binary), + } +} diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index f6b668442b161..23aa39730fd5b 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -746,9 +746,9 @@ impl From for SpecialEq> { Lowercase => map!(strings::lowercase), #[cfg(feature = "nightly")] Titlecase => map!(strings::titlecase), - StripChars(matches) => map!(strings::strip_chars, matches.as_deref()), - StripCharsStart(matches) => map!(strings::strip_chars_start, matches.as_deref()), - StripCharsEnd(matches) => map!(strings::strip_chars_end, matches.as_deref()), + StripChars => map_as_slice!(strings::strip_chars), + StripCharsStart => map_as_slice!(strings::strip_chars_start), + StripCharsEnd => map_as_slice!(strings::strip_chars_end), StripPrefix => map_as_slice!(strings::strip_prefix), StripSuffix => map_as_slice!(strings::strip_suffix), #[cfg(feature = "string_from_radix")] diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index b53260e51582b..1ae438b4c66e2 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -71,9 +71,9 @@ pub enum StringFunction { }, Slice(i64, Option), StartsWith, - StripChars(Option), - StripCharsStart(Option), - StripCharsEnd(Option), + StripChars, + StripCharsStart, + StripCharsEnd, StripPrefix, StripSuffix, #[cfg(feature = "dtype-struct")] @@ -127,9 +127,9 @@ impl StringFunction { ToDecimal(_) => mapper.with_dtype(DataType::Decimal(None, None)), Uppercase | Lowercase - | StripChars(_) - | StripCharsStart(_) - | StripCharsEnd(_) + | StripChars + | StripCharsStart + | StripCharsEnd | StripPrefix | StripSuffix | Slice(_, _) => mapper.with_same_dtype(), @@ -182,9 +182,9 @@ impl Display for StringFunction { StringFunction::Replace { .. } => "replace", StringFunction::Slice(_, _) => "str_slice", StringFunction::StartsWith { .. } => "starts_with", - StringFunction::StripChars(_) => "strip_chars", - StringFunction::StripCharsStart(_) => "strip_chars_start", - StringFunction::StripCharsEnd(_) => "strip_chars_end", + StringFunction::StripChars => "strip_chars", + StringFunction::StripCharsStart => "strip_chars_start", + StringFunction::StripCharsEnd => "strip_chars_end", StringFunction::StripPrefix => "strip_prefix", StringFunction::StripSuffix => "strip_suffix", #[cfg(feature = "dtype-struct")] @@ -298,67 +298,22 @@ pub(super) fn rjust(s: &Series, width: usize, fillchar: char) -> PolarsResult) -> PolarsResult { - let ca = s.utf8()?; - if let Some(matches) = matches { - if matches.chars().count() == 1 { - // Fast path for when a single character is passed - Ok(ca - .apply_values(|s| Cow::Borrowed(s.trim_matches(matches.chars().next().unwrap()))) - .into_series()) - } else { - Ok(ca - .apply_values(|s| Cow::Borrowed(s.trim_matches(|c| matches.contains(c)))) - .into_series()) - } - } else { - Ok(ca.apply_values(|s| Cow::Borrowed(s.trim())).into_series()) - } +pub(super) fn strip_chars(s: &[Series]) -> PolarsResult { + let ca = s[0].utf8()?; + let pat_s = &s[1]; + ca.strip_chars(pat_s).map(|ok| ok.into_series()) } -pub(super) fn strip_chars_start(s: &Series, matches: Option<&str>) -> PolarsResult { - let ca = s.utf8()?; - - if let Some(matches) = matches { - if matches.chars().count() == 1 { - // Fast path for when a single character is passed - Ok(ca - .apply_values(|s| { - Cow::Borrowed(s.trim_start_matches(matches.chars().next().unwrap())) - }) - .into_series()) - } else { - Ok(ca - .apply_values(|s| Cow::Borrowed(s.trim_start_matches(|c| matches.contains(c)))) - .into_series()) - } - } else { - Ok(ca - .apply_values(|s| Cow::Borrowed(s.trim_start())) - .into_series()) - } +pub(super) fn strip_chars_start(s: &[Series]) -> PolarsResult { + let ca = s[0].utf8()?; + let pat_s = &s[1]; + ca.strip_chars_start(pat_s).map(|ok| ok.into_series()) } -pub(super) fn strip_chars_end(s: &Series, matches: Option<&str>) -> PolarsResult { - let ca = s.utf8()?; - if let Some(matches) = matches { - if matches.chars().count() == 1 { - // Fast path for when a single character is passed - Ok(ca - .apply_values(|s| { - Cow::Borrowed(s.trim_end_matches(matches.chars().next().unwrap())) - }) - .into_series()) - } else { - Ok(ca - .apply_values(|s| Cow::Borrowed(s.trim_end_matches(|c| matches.contains(c)))) - .into_series()) - } - } else { - Ok(ca - .apply_values(|s| Cow::Borrowed(s.trim_end())) - .into_series()) - } +pub(super) fn strip_chars_end(s: &[Series]) -> PolarsResult { + let ca = s[0].utf8()?; + let pat_s = &s[1]; + ca.strip_chars_end(pat_s).map(|ok| ok.into_series()) } pub(super) fn strip_prefix(s: &[Series]) -> PolarsResult { diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index 04d1387cde52f..70e4cfa213736 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -292,27 +292,33 @@ impl StringNameSpace { } /// Remove leading and trailing characters, or whitespace if matches is None. - pub fn strip_chars(self, matches: Option) -> Expr { - self.0 - .map_private(FunctionExpr::StringExpr(StringFunction::StripChars( - matches, - ))) + pub fn strip_chars(self, matches: Expr) -> Expr { + self.0.map_many_private( + FunctionExpr::StringExpr(StringFunction::StripChars), + &[matches], + false, + false, + ) } /// Remove leading characters, or whitespace if matches is None. - pub fn strip_chars_start(self, matches: Option) -> Expr { - self.0 - .map_private(FunctionExpr::StringExpr(StringFunction::StripCharsStart( - matches, - ))) + pub fn strip_chars_start(self, matches: Expr) -> Expr { + self.0.map_many_private( + FunctionExpr::StringExpr(StringFunction::StripCharsStart), + &[matches], + false, + false, + ) } /// Remove trailing characters, or whitespace if matches is None. - pub fn strip_chars_end(self, matches: Option) -> Expr { - self.0 - .map_private(FunctionExpr::StringExpr(StringFunction::StripCharsEnd( - matches, - ))) + pub fn strip_chars_end(self, matches: Expr) -> Expr { + self.0.map_many_private( + FunctionExpr::StringExpr(StringFunction::StripCharsEnd), + &[matches], + false, + false, + ) } /// Remove prefix. diff --git a/crates/polars-sql/src/functions.rs b/crates/polars-sql/src/functions.rs index 2158ce2874512..fe97baa448317 100644 --- a/crates/polars-sql/src/functions.rs +++ b/crates/polars-sql/src/functions.rs @@ -3,6 +3,7 @@ use polars_lazy::dsl::Expr; use polars_plan::dsl::{coalesce, count, when}; use polars_plan::logical_plan::LiteralValue; use polars_plan::prelude::lit; +use polars_plan::prelude::LiteralValue::Null; use sqlparser::ast::{ Expr as SqlExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, Value as SqlValue, WindowSpec, WindowType, @@ -626,8 +627,8 @@ impl SqlFunctionVisitor<'_> { Length => self.visit_unary(|e| e.str().n_chars()), Lower => self.visit_unary(|e| e.str().to_lowercase()), LTrim => match function.args.len() { - 1 => self.visit_unary(|e| e.str().strip_chars_start(None)), - 2 => self.visit_binary(|e, s| e.str().strip_chars_start(Some(s))), + 1 => self.visit_unary(|e| e.str().strip_chars_start(lit(Null))), + 2 => self.visit_binary(|e, s| e.str().strip_chars_start(s)), _ => polars_bail!(InvalidOperation: "Invalid number of arguments for LTrim: {}", function.args.len() @@ -652,8 +653,8 @@ impl SqlFunctionVisitor<'_> { _ => polars_bail!(InvalidOperation:"Invalid number of arguments for RegexpLike: {}",function.args.len()), }, RTrim => match function.args.len() { - 1 => self.visit_unary(|e| e.str().strip_chars_end(None)), - 2 => self.visit_binary(|e, s| e.str().strip_chars_end(Some(s))), + 1 => self.visit_unary(|e| e.str().strip_chars_end(lit(Null))), + 2 => self.visit_binary(|e, s| e.str().strip_chars_end(s)), _ => polars_bail!(InvalidOperation: "Invalid number of arguments for RTrim: {}", function.args.len() diff --git a/crates/polars-sql/src/sql_expr.rs b/crates/polars-sql/src/sql_expr.rs index 49352bac49849..7d7e8106803dd 100644 --- a/crates/polars-sql/src/sql_expr.rs +++ b/crates/polars-sql/src/sql_expr.rs @@ -2,6 +2,7 @@ use polars_arrow::error::to_compute_err; use polars_core::prelude::*; use polars_lazy::dsl::Expr; use polars_lazy::prelude::*; +use polars_plan::prelude::LiteralValue::Null; use polars_plan::prelude::{col, lit, when}; use sqlparser::ast::{ ArrayAgg, BinaryOperator as SQLBinaryOperator, BinaryOperator, DataType as SQLDataType, @@ -322,12 +323,12 @@ impl SqlExprVisitor<'_> { }; Ok(match (trim_where, trim_what) { - (None | Some(TrimWhereField::Both), None) => expr.str().strip_chars(None), - (None | Some(TrimWhereField::Both), Some(val)) => expr.str().strip_chars(Some(val)), - (Some(TrimWhereField::Leading), None) => expr.str().strip_chars_start(None), - (Some(TrimWhereField::Leading), Some(val)) => expr.str().strip_chars_start(Some(val)), - (Some(TrimWhereField::Trailing), None) => expr.str().strip_chars_end(None), - (Some(TrimWhereField::Trailing), Some(val)) => expr.str().strip_chars_end(Some(val)), + (None | Some(TrimWhereField::Both), None) => expr.str().strip_chars(lit(Null)), + (None | Some(TrimWhereField::Both), Some(val)) => expr.str().strip_chars(lit(val)), + (Some(TrimWhereField::Leading), None) => expr.str().strip_chars_start(lit(Null)), + (Some(TrimWhereField::Leading), Some(val)) => expr.str().strip_chars_start(lit(val)), + (Some(TrimWhereField::Trailing), None) => expr.str().strip_chars_end(lit(Null)), + (Some(TrimWhereField::Trailing), Some(val)) => expr.str().strip_chars_end(lit(val)), }) } diff --git a/crates/polars-sql/tests/functions_string.rs b/crates/polars-sql/tests/functions_string.rs index b91a375157a75..32252f0cb505f 100644 --- a/crates/polars-sql/tests/functions_string.rs +++ b/crates/polars-sql/tests/functions_string.rs @@ -1,5 +1,6 @@ use polars_core::prelude::*; use polars_lazy::prelude::*; +use polars_plan::prelude::LiteralValue::Null; use polars_sql::*; #[test] @@ -48,32 +49,32 @@ fn test_string_functions() { col("a").str().to_uppercase().alias("upper_a_df"), col("a").str().to_uppercase().alias("upper_a_df2"), col("a").str().to_uppercase().alias("upper_a_df3"), - col("a").str().strip_chars(Some("x".into())).alias("trim_a"), + col("a").str().strip_chars(lit("x")).alias("trim_a"), col("a") .str() - .strip_chars_start(Some("x".into())) + .strip_chars_start(lit("x")) .alias("trim_a_leading"), col("a") .str() - .strip_chars_end(Some("x".into())) + .strip_chars_end(lit("x")) .alias("trim_a_trailing"), - col("a").str().strip_chars_start(None).alias("ltrim_a"), - col("a").str().strip_chars_end(None).alias("rtrim_a"), + col("a").str().strip_chars_start(lit(Null)).alias("ltrim_a"), + col("a").str().strip_chars_end(lit(Null)).alias("rtrim_a"), col("a") .str() - .strip_chars_start(Some("-".into())) + .strip_chars_start(lit("-")) .alias("ltrim_a_dash"), col("a") .str() - .strip_chars_end(Some("-".into())) + .strip_chars_end(lit("-")) .alias("rtrim_a_dash"), col("a") .str() - .strip_chars_start(Some("xyz".into())) + .strip_chars_start(lit("xyz")) .alias("ltrim_a_xyz"), col("a") .str() - .strip_chars_end(Some("xyz".into())) + .strip_chars_end(lit("xyz")) .alias("rtrim_a_xyz"), ]) .collect() diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 379227228075e..5ae5708f27bc3 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -20,6 +20,7 @@ from polars.type_aliases import ( Ambiguous, IntoExpr, + IntoExprColumn, PolarsDataType, PolarsTemporalType, TimeUnit, @@ -528,7 +529,7 @@ def to_titlecase(self) -> Expr: """ return wrap_expr(self._pyexpr.str_to_titlecase()) - def strip_chars(self, characters: str | None = None) -> Expr: + def strip_chars(self, characters: IntoExprColumn | None = None) -> Expr: r""" Remove leading and trailing characters. @@ -581,9 +582,10 @@ def strip_chars(self, characters: str | None = None) -> Expr: └───────┘ """ + characters = parse_as_expression(characters, str_as_lit=True) return wrap_expr(self._pyexpr.str_strip_chars(characters)) - def strip_chars_start(self, characters: str | None = None) -> Expr: + def strip_chars_start(self, characters: IntoExprColumn | None = None) -> Expr: r""" Remove leading characters. @@ -623,9 +625,10 @@ def strip_chars_start(self, characters: str | None = None) -> Expr: └─────────┘ """ + characters = parse_as_expression(characters, str_as_lit=True) return wrap_expr(self._pyexpr.str_strip_chars_start(characters)) - def strip_chars_end(self, characters: str | None = None) -> Expr: + def strip_chars_end(self, characters: IntoExprColumn | None = None) -> Expr: r""" Remove trailing characters. @@ -678,6 +681,7 @@ def strip_chars_end(self, characters: str | None = None) -> Expr: └───────┘ """ + characters = parse_as_expression(characters, str_as_lit=True) return wrap_expr(self._pyexpr.str_strip_chars_end(characters)) def strip_prefix(self, prefix: IntoExpr) -> Expr: diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 56429f42dfd9b..777daa8fd92e2 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -11,6 +11,7 @@ from polars.type_aliases import ( Ambiguous, IntoExpr, + IntoExprColumn, PolarsDataType, PolarsTemporalType, TimeUnit, @@ -1102,7 +1103,7 @@ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Ser """ - def strip_chars(self, characters: str | None = None) -> Series: + def strip_chars(self, characters: IntoExprColumn | None = None) -> Series: r""" Remove leading and trailing characters. @@ -1138,7 +1139,7 @@ def strip_chars(self, characters: str | None = None) -> Series: """ - def strip_chars_start(self, characters: str | None = None) -> Series: + def strip_chars_start(self, characters: IntoExprColumn | None = None) -> Series: r""" Remove leading characters. @@ -1173,7 +1174,7 @@ def strip_chars_start(self, characters: str | None = None) -> Series: """ - def strip_chars_end(self, characters: str | None = None) -> Series: + def strip_chars_end(self, characters: IntoExprColumn | None = None) -> Series: r""" Remove trailing characters. diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index 67db9bce336cb..4aefb2f4162b2 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -63,16 +63,24 @@ impl PyExpr { self.inner.clone().str().to_time(options).into() } - fn str_strip_chars(&self, matches: Option) -> Self { - self.inner.clone().str().strip_chars(matches).into() + fn str_strip_chars(&self, matches: Self) -> Self { + self.inner.clone().str().strip_chars(matches.inner).into() } - fn str_strip_chars_start(&self, matches: Option) -> Self { - self.inner.clone().str().strip_chars_start(matches).into() + fn str_strip_chars_start(&self, matches: Self) -> Self { + self.inner + .clone() + .str() + .strip_chars_start(matches.inner) + .into() } - fn str_strip_chars_end(&self, matches: Option) -> Self { - self.inner.clone().str().strip_chars_end(matches).into() + fn str_strip_chars_end(&self, matches: Self) -> Self { + self.inner + .clone() + .str() + .strip_chars_end(matches.inner) + .into() } fn str_strip_prefix(&self, prefix: Self) -> Self { diff --git a/py-polars/tests/unit/namespaces/test_string.py b/py-polars/tests/unit/namespaces/test_string.py index 37782037bf653..01fd5820297e0 100644 --- a/py-polars/tests/unit/namespaces/test_string.py +++ b/py-polars/tests/unit/namespaces/test_string.py @@ -195,6 +195,49 @@ def test_str_parse_int_df() -> None: ) +def test_str_strip_chars_expr() -> None: + df = pl.DataFrame( + { + "s": [" hello ", "^^world^^", "&&hi&&", " polars ", None], + "pat": [" ", "^", "&", None, "anything"], + } + ) + + all_expr = df.select( + [ + pl.col("s").str.strip_chars(pl.col("pat")).alias("strip_chars"), + pl.col("s").str.strip_chars_start(pl.col("pat")).alias("strip_chars_start"), + pl.col("s").str.strip_chars_end(pl.col("pat")).alias("strip_chars_end"), + ] + ) + + expected = pl.DataFrame( + { + "strip_chars": ["hello", "world", "hi", "polars", None], + "strip_chars_start": ["hello ", "world^^", "hi&&", "polars ", None], + "strip_chars_end": [" hello", "^^world", "&&hi", " polars", None], + } + ) + + assert_frame_equal(all_expr, expected) + + strip_by_null = df.select( + pl.col("s").str.strip_chars(None).alias("strip_chars"), + pl.col("s").str.strip_chars_start(None).alias("strip_chars_start"), + pl.col("s").str.strip_chars_end(None).alias("strip_chars_end"), + ) + + # only whitespace are striped. + expected = pl.DataFrame( + { + "strip_chars": ["hello", "^^world^^", "&&hi&&", "polars", None], + "strip_chars_start": ["hello ", "^^world^^", "&&hi&&", "polars ", None], + "strip_chars_end": [" hello", "^^world^^", "&&hi&&", " polars", None], + } + ) + assert_frame_equal(strip_by_null, expected) + + def test_str_strip_chars() -> None: s = pl.Series([" hello ", "world\t "]) expected = pl.Series(["hello", "world"])