From 919400e9080a0f0afb0aa2aefd4dd27fda461818 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 24 Oct 2023 14:09:30 +0200 Subject: [PATCH] Cleanup --- .../src/chunked_array/strings/mod.rs | 4 +- .../src/chunked_array/strings/namespace.rs | 16 +-- .../src/chunked_array/strings/pad.rs | 26 ++-- crates/polars-plan/src/dsl/string.rs | 26 ++-- py-polars/polars/expr/string.py | 112 ++++++++---------- py-polars/polars/series/string.py | 45 ++++--- py-polars/src/expr/string.rs | 4 +- .../tests/unit/namespaces/string/test_pad.py | 4 +- .../unit/namespaces/string/test_string.py | 1 - 9 files changed, 122 insertions(+), 116 deletions(-) diff --git a/crates/polars-ops/src/chunked_array/strings/mod.rs b/crates/polars-ops/src/chunked_array/strings/mod.rs index dfa2e20a283c..532e7f10723d 100644 --- a/crates/polars-ops/src/chunked_array/strings/mod.rs +++ b/crates/polars-ops/src/chunked_array/strings/mod.rs @@ -6,10 +6,10 @@ mod concat; mod extract; #[cfg(feature = "extract_jsonpath")] mod json_path; -#[cfg(feature = "string_pad")] -mod pad; #[cfg(feature = "strings")] mod namespace; +#[cfg(feature = "string_pad")] +mod pad; #[cfg(feature = "strings")] mod replace; #[cfg(feature = "strings")] diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index b28d5ecb5c9b..61bfc508d986 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -159,7 +159,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { /// Pad the start of the string until it reaches the given length. /// /// Padding is done using the specified `fill_char`. - /// Strings with a length equal to or greater than the given length are + /// Strings with length equal to or greater than the given length are /// returned as-is. #[cfg(feature = "string_pad")] fn pad_start(&self, length: usize, fill_char: char) -> Utf8Chunked { @@ -170,7 +170,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { /// Pad the end of the string until it reaches the given length. /// /// Padding is done using the specified `fill_char`. - /// Strings with a length equal to or greater than the given length are + /// Strings with length equal to or greater than the given length are /// returned as-is. #[cfg(feature = "string_pad")] fn pad_end(&self, length: usize, fill_char: char) -> Utf8Chunked { @@ -178,12 +178,12 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { pad::pad_end(ca, length, fill_char) } - /// Return a copy of the string left filled with ASCII '0' digits to make a - /// string of length width. - /// - /// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character - /// rather than before. - /// The original string is returned if width is less than or equal to `s.len()`. + /// Pad the start of the string with zeros until it reaches the given length. + /// + /// A sign prefix (`-`) is handled by inserting the padding after the sign + /// character rather than before. + /// Strings with length equal to or greater than the given length are + /// returned as-is. #[cfg(feature = "string_pad")] fn zfill(&self, length: usize) -> Utf8Chunked { let ca = self.as_utf8(); diff --git a/crates/polars-ops/src/chunked_array/strings/pad.rs b/crates/polars-ops/src/chunked_array/strings/pad.rs index 01a0ac75735f..19ed3ebc6719 100644 --- a/crates/polars-ops/src/chunked_array/strings/pad.rs +++ b/crates/polars-ops/src/chunked_array/strings/pad.rs @@ -2,18 +2,18 @@ use std::fmt::Write; use polars_core::prelude::Utf8Chunked; -pub(super) fn pad_end<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) -> Utf8Chunked { +pub(super) fn pad_end<'a>(ca: &'a Utf8Chunked, length: usize, fill_char: char) -> Utf8Chunked { // amortize allocation let mut buf = String::new(); let f = |s: &'a str| { - let padding = width.saturating_sub(s.len()); + let padding = length.saturating_sub(s.len()); if padding == 0 { s } else { buf.clear(); buf.push_str(s); for _ in 0..padding { - buf.push(fillchar) + buf.push(fill_char) } // extend lifetime // lifetime is bound to 'a @@ -24,17 +24,17 @@ pub(super) fn pad_end<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) -> ca.apply_mut(f) } -pub(super) fn pad_start<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) -> Utf8Chunked { +pub(super) fn pad_start<'a>(ca: &'a Utf8Chunked, length: usize, fill_char: char) -> Utf8Chunked { // amortize allocation let mut buf = String::new(); let f = |s: &'a str| { - let padding = width.saturating_sub(s.len()); + let padding = length.saturating_sub(s.len()); if padding == 0 { s } else { buf.clear(); for _ in 0..padding { - buf.push(fillchar) + buf.push(fill_char) } buf.push_str(s); // extend lifetime @@ -46,30 +46,30 @@ pub(super) fn pad_start<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) - ca.apply_mut(f) } -pub(super) fn zfill<'a>(ca: &'a Utf8Chunked, alignment: usize) -> Utf8Chunked { +pub(super) fn zfill<'a>(ca: &'a Utf8Chunked, length: usize) -> Utf8Chunked { // amortize allocation let mut buf = String::new(); let f = |s: &'a str| { - let alignment = alignment.saturating_sub(s.len()); - if alignment == 0 { + let length = length.saturating_sub(s.len()); + if length == 0 { return s; } buf.clear(); if let Some(stripped) = s.strip_prefix('-') { write!( &mut buf, - "-{:0alignment$}{value}", + "-{:0length$}{value}", 0, - alignment = alignment, + length = length, value = stripped ) .unwrap(); } else { write!( &mut buf, - "{:0alignment$}{value}", + "{:0length$}{value}", 0, - alignment = alignment, + length = length, value = s ) .unwrap(); diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index 2a12fedc2e70..1eab0990736b 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -93,28 +93,34 @@ impl StringNameSpace { )) } - /// Return the string right justified in a string of length width. - /// Padding is done using the specified `fillchar`, - /// The original string is returned if width is less than or equal to `s.len()`. + /// Pad the start of the string until it reaches the given length. + /// + /// Padding is done using the specified `fill_char`. + /// Strings with length equal to or greater than the given length are + /// returned as-is. #[cfg(feature = "string_pad")] pub fn pad_start(self, length: usize, fill_char: char) -> Expr { self.0 .map_private(StringFunction::PadStart { length, fill_char }.into()) } - /// Return the string left justified in a string of length width. - /// Padding is done using the specified `fillchar`, - /// The original string is returned if width is less than or equal to `s.len()`. + /// Pad the end of the string until it reaches the given length. + /// + /// Padding is done using the specified `fill_char`. + /// Strings with length equal to or greater than the given length are + /// returned as-is. #[cfg(feature = "string_pad")] pub fn pad_end(self, length: usize, fill_char: char) -> Expr { self.0 .map_private(StringFunction::PadEnd { length, fill_char }.into()) } - /// Return a copy of the string left filled with ASCII '0' digits to make a string of length width. - /// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character - /// rather than before. - /// The original string is returned if width is less than or equal to `s.len()`. + /// Pad the start of the string with zeros until it reaches the given length. + /// + /// A sign prefix (`-`) is handled by inserting the padding after the sign + /// character rather than before. + /// Strings with length equal to or greater than the given length are + /// returned as-is. #[cfg(feature = "string_pad")] pub fn zfill(self, length: usize) -> Expr { self.0.map_private(StringFunction::ZFill(length).into()) diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 95297d76314d..2501d381e81d 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -772,33 +772,34 @@ def pad_start(self, length: int, fill_char: str = " ") -> Expr: Parameters ---------- length - Pad the string until it reaches this length. Strings with a length equal to + Pad the string until it reaches this length. Strings with length equal to or greater than this value are returned as-is. fill_char - The ASCII character to pad the string with. + The character to pad the string with. See Also -------- pad_end + zfill Examples -------- >>> df = pl.DataFrame({"a": ["cow", "monkey", "hippopotamus", None]}) - >>> df.select(pl.col("a").str.rjust(8, "*")) - shape: (4, 1) - ┌──────────────┐ - │ a │ - │ --- │ - │ str │ - ╞══════════════╡ - │ *****cow │ - │ **monkey │ - │ null │ - │ hippopotamus │ - └──────────────┘ + >>> df.with_columns(padded=pl.col("a").str.pad_start(8, "*")) + shape: (4, 2) + ┌──────────────┬──────────────┐ + │ a ┆ padded │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞══════════════╪══════════════╡ + │ cow ┆ *****cow │ + │ monkey ┆ **monkey │ + │ hippopotamus ┆ hippopotamus │ + │ null ┆ null │ + └──────────────┴──────────────┘ """ - return wrap_expr(self._pyexpr.str_rjust(length, fill_char)) + return wrap_expr(self._pyexpr.str_pad_start(length, fill_char)) def pad_end(self, length: int, fill_char: str = " ") -> Expr: """ @@ -807,10 +808,10 @@ def pad_end(self, length: int, fill_char: str = " ") -> Expr: Parameters ---------- length - Pad the string until it reaches this length. Strings with a length equal to + Pad the string until it reaches this length. Strings with length equal to or greater than this value are returned as-is. fill_char - The ASCII character to pad the string with. + The character to pad the string with. See Also -------- @@ -818,38 +819,36 @@ def pad_end(self, length: int, fill_char: str = " ") -> Expr: Examples -------- - >>> df = pl.DataFrame({"a": ["cow", "monkey", None, "hippopotamus"]}) - >>> df.select(pl.col("a").str.ljust(8, "*")) - shape: (4, 1) - ┌──────────────┐ - │ a │ - │ --- │ - │ str │ - ╞══════════════╡ - │ cow***** │ - │ monkey** │ - │ null │ - │ hippopotamus │ - └──────────────┘ + >>> df = pl.DataFrame({"a": ["cow", "monkey", "hippopotamus", None]}) + >>> df.with_columns(padded=pl.col("a").str.pad_end(8, "*")) + shape: (4, 2) + ┌──────────────┬──────────────┐ + │ a ┆ padded │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞══════════════╪══════════════╡ + │ cow ┆ cow***** │ + │ monkey ┆ monkey** │ + │ hippopotamus ┆ hippopotamus │ + │ null ┆ null │ + └──────────────┴──────────────┘ + """ - return wrap_expr(self._pyexpr.str_ljust(length, fill_char)) + return wrap_expr(self._pyexpr.str_pad_end(length, fill_char)) @deprecate_renamed_parameter("alignment", "length", version="0.19.12") def zfill(self, length: int) -> Expr: """ - Fills the string with zeroes. - - Return a copy of the string left filled with ASCII '0' digits to make a string - of length width. + Pad the start of the string with zeros until it reaches the given length. - A leading sign prefix ('+'/'-') is handled by inserting the padding after the - sign character rather than before. The original string is returned if width is - less than or equal to ``len(s)``. + A sign prefix (``-``) is handled by inserting the padding after the sign + character rather than before. Parameters ---------- length - Fill the value up to this length + Pad the string until it reaches this length. Strings with length equal to + or greater than this value are returned as-is. See Also -------- @@ -857,28 +856,19 @@ def zfill(self, length: int) -> Expr: Examples -------- - >>> df = pl.DataFrame( - ... { - ... "num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None], - ... } - ... ) - >>> df.with_columns(pl.col("num").cast(str).str.zfill(5)) - shape: (11, 1) - ┌─────────┐ - │ num │ - │ --- │ - │ str │ - ╞═════════╡ - │ -0010 │ - │ -0001 │ - │ 00000 │ - │ 00001 │ - │ … │ - │ 10000 │ - │ 100000 │ - │ 1000000 │ - │ null │ - └─────────┘ + >>> df = pl.DataFrame({"a": [-1, 123, 999999, None]}) + >>> df.with_columns(zfill=pl.col("a").cast(pl.Utf8).str.zfill(4)) + shape: (4, 2) + ┌────────┬────────┐ + │ a ┆ zfill │ + │ --- ┆ --- │ + │ i64 ┆ str │ + ╞════════╪════════╡ + │ -1 ┆ -001 │ + │ 123 ┆ 0123 │ + │ 999999 ┆ 999999 │ + │ null ┆ null │ + └────────┴────────┘ """ return wrap_expr(self._pyexpr.str_zfill(length)) diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 281dcca95af9..57e613c04c64 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -1281,26 +1281,27 @@ def pad_start(self, length: int, fill_char: str = " ") -> Series: Parameters ---------- length - Pad the string until it reaches this length. Strings with a length equal to + Pad the string until it reaches this length. Strings with length equal to or greater than this value are returned as-is. fill_char - The ASCII character to pad the string with. + The character to pad the string with. See Also -------- pad_end + zfill Examples -------- >>> s = pl.Series("a", ["cow", "monkey", "hippopotamus", None]) - >>> s.str.rjust(8, "*") + >>> s.str.pad_start(8, "*") shape: (4,) Series: 'a' [str] [ "*****cow" "**monkey" - null "hippopotamus" + null ] """ @@ -1312,10 +1313,10 @@ def pad_end(self, length: int, fill_char: str = " ") -> Series: Parameters ---------- length - Pad the string until it reaches this length. Strings with a length equal to + Pad the string until it reaches this length. Strings with length equal to or greater than this value are returned as-is. fill_char - The ASCII character to pad the string with. + The character to pad the string with. See Also -------- @@ -1330,8 +1331,8 @@ def pad_end(self, length: int, fill_char: str = " ") -> Series: [ "cow*****" "monkey**" - null "hippopotamus" + null ] """ @@ -1339,23 +1340,33 @@ def pad_end(self, length: int, fill_char: str = " ") -> Series: @deprecate_renamed_parameter("alignment", "length", version="0.19.12") def zfill(self, length: int) -> Series: """ - Fills the string with zeroes. + Pad the start of the string with zeros until it reaches the given length. - Return a copy of the string left filled with ASCII '0' digits to make a string - of the given length. + A sign prefix (``-``) is handled by inserting the padding after the sign + character rather than before. - A leading sign prefix ('+'/'-') is handled by inserting the padding after the - sign character rather than before. The original string is returned if width is - less than or equal to ``len(s)``. + Parameters + ---------- + length + Pad the string until it reaches this length. Strings with length equal to + or greater than this value are returned as-is. See Also -------- pad_start - Parameters - ---------- - length - Fill the value up to this length. + Examples + -------- + >>> s = pl.Series([-1, 123, 999999, None]) + >>> s.cast(pl.Utf8).str.zfill(4) + shape: (4,) + Series: '' [str] + [ + "-001" + "0123" + "999999" + null + ] """ diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index f60edec79001..5503153dffae 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -138,11 +138,11 @@ impl PyExpr { } fn str_pad_start(&self, length: usize, fill_char: char) -> Self { - self.inner.clone().str().rjust(length, fill_char).into() + self.inner.clone().str().pad_start(length, fill_char).into() } fn str_pad_end(&self, length: usize, fill_char: char) -> Self { - self.inner.clone().str().ljust(length, fill_char).into() + self.inner.clone().str().pad_end(length, fill_char).into() } fn str_zfill(&self, length: usize) -> Self { diff --git a/py-polars/tests/unit/namespaces/string/test_pad.py b/py-polars/tests/unit/namespaces/string/test_pad.py index 3b27f120ac21..ff28febcd7ef 100644 --- a/py-polars/tests/unit/namespaces/string/test_pad.py +++ b/py-polars/tests/unit/namespaces/string/test_pad.py @@ -19,7 +19,7 @@ def test_str_pad_start() -> None: "padded": [" foo", "longer_foo", "longest_fooooooo", " hi"], "padded_len": [10, 10, 16, 10], }, - schema_overrides={"padded_len": pl.UInt32} + schema_overrides={"padded_len": pl.UInt32}, ) assert_frame_equal(result, expected) @@ -37,7 +37,7 @@ def test_str_pad_end() -> None: "padded": ["foo ", "longer_foo", "longest_fooooooo", "hi "], "padded_len": [10, 10, 16, 10], }, - schema_overrides={"padded_len": pl.UInt32} + schema_overrides={"padded_len": pl.UInt32}, ) assert_frame_equal(result, expected) diff --git a/py-polars/tests/unit/namespaces/string/test_string.py b/py-polars/tests/unit/namespaces/string/test_string.py index 16a5a3987317..b08f82f1fbd2 100644 --- a/py-polars/tests/unit/namespaces/string/test_string.py +++ b/py-polars/tests/unit/namespaces/string/test_string.py @@ -818,7 +818,6 @@ def _named_groups_builder(pattern: str, groups: dict[str, str]) -> str: ).to_dict(False) == {"literal": [{"foo": "foo", "bar": None}]} - def test_starts_ends_with() -> None: df = pl.DataFrame( {