Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Oct 24, 2023
1 parent 6f35d5f commit 919400e
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 116 deletions.
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/strings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ mod concat;
mod extract;
#[cfg(feature = "extract_jsonpath")]
mod json_path;
#[cfg(feature = "string_pad")]
mod pad;
#[cfg(feature = "strings")]
mod namespace;
#[cfg(feature = "string_pad")]
mod pad;
#[cfg(feature = "strings")]
mod replace;
#[cfg(feature = "strings")]
Expand Down
16 changes: 8 additions & 8 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
/// Pad the start of the string until it reaches the given length.
///
/// Padding is done using the specified `fill_char`.
/// Strings with a length equal to or greater than the given length are
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
fn pad_start(&self, length: usize, fill_char: char) -> Utf8Chunked {
Expand All @@ -170,20 +170,20 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
/// Pad the end of the string until it reaches the given length.
///
/// Padding is done using the specified `fill_char`.
/// Strings with a length equal to or greater than the given length are
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
fn pad_end(&self, length: usize, fill_char: char) -> Utf8Chunked {
let ca = self.as_utf8();
pad::pad_end(ca, length, fill_char)
}

/// Return a copy of the string left filled with ASCII '0' digits to make a
/// string of length width.
///
/// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
/// rather than before.
/// The original string is returned if width is less than or equal to `s.len()`.
/// Pad the start of the string with zeros until it reaches the given length.
///
/// A sign prefix (`-`) is handled by inserting the padding after the sign
/// character rather than before.
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
fn zfill(&self, length: usize) -> Utf8Chunked {
let ca = self.as_utf8();
Expand Down
26 changes: 13 additions & 13 deletions crates/polars-ops/src/chunked_array/strings/pad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@ use std::fmt::Write;

use polars_core::prelude::Utf8Chunked;

pub(super) fn pad_end<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) -> Utf8Chunked {
pub(super) fn pad_end<'a>(ca: &'a Utf8Chunked, length: usize, fill_char: char) -> Utf8Chunked {
// amortize allocation
let mut buf = String::new();
let f = |s: &'a str| {
let padding = width.saturating_sub(s.len());
let padding = length.saturating_sub(s.len());
if padding == 0 {
s
} else {
buf.clear();
buf.push_str(s);
for _ in 0..padding {
buf.push(fillchar)
buf.push(fill_char)
}
// extend lifetime
// lifetime is bound to 'a
Expand All @@ -24,17 +24,17 @@ pub(super) fn pad_end<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) ->
ca.apply_mut(f)
}

pub(super) fn pad_start<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) -> Utf8Chunked {
pub(super) fn pad_start<'a>(ca: &'a Utf8Chunked, length: usize, fill_char: char) -> Utf8Chunked {
// amortize allocation
let mut buf = String::new();
let f = |s: &'a str| {
let padding = width.saturating_sub(s.len());
let padding = length.saturating_sub(s.len());
if padding == 0 {
s
} else {
buf.clear();
for _ in 0..padding {
buf.push(fillchar)
buf.push(fill_char)
}
buf.push_str(s);
// extend lifetime
Expand All @@ -46,30 +46,30 @@ pub(super) fn pad_start<'a>(ca: &'a Utf8Chunked, width: usize, fillchar: char) -
ca.apply_mut(f)
}

pub(super) fn zfill<'a>(ca: &'a Utf8Chunked, alignment: usize) -> Utf8Chunked {
pub(super) fn zfill<'a>(ca: &'a Utf8Chunked, length: usize) -> Utf8Chunked {
// amortize allocation
let mut buf = String::new();
let f = |s: &'a str| {
let alignment = alignment.saturating_sub(s.len());
if alignment == 0 {
let length = length.saturating_sub(s.len());
if length == 0 {
return s;
}
buf.clear();
if let Some(stripped) = s.strip_prefix('-') {
write!(
&mut buf,
"-{:0alignment$}{value}",
"-{:0length$}{value}",
0,
alignment = alignment,
length = length,
value = stripped
)
.unwrap();
} else {
write!(
&mut buf,
"{:0alignment$}{value}",
"{:0length$}{value}",
0,
alignment = alignment,
length = length,
value = s
)
.unwrap();
Expand Down
26 changes: 16 additions & 10 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,28 +93,34 @@ impl StringNameSpace {
))
}

/// Return the string right justified in a string of length width.
/// Padding is done using the specified `fillchar`,
/// The original string is returned if width is less than or equal to `s.len()`.
/// Pad the start of the string until it reaches the given length.
///
/// Padding is done using the specified `fill_char`.
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
pub fn pad_start(self, length: usize, fill_char: char) -> Expr {
self.0
.map_private(StringFunction::PadStart { length, fill_char }.into())
}

/// Return the string left justified in a string of length width.
/// Padding is done using the specified `fillchar`,
/// The original string is returned if width is less than or equal to `s.len()`.
/// Pad the end of the string until it reaches the given length.
///
/// Padding is done using the specified `fill_char`.
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
pub fn pad_end(self, length: usize, fill_char: char) -> Expr {
self.0
.map_private(StringFunction::PadEnd { length, fill_char }.into())
}

/// Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
/// A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character
/// rather than before.
/// The original string is returned if width is less than or equal to `s.len()`.
/// Pad the start of the string with zeros until it reaches the given length.
///
/// A sign prefix (`-`) is handled by inserting the padding after the sign
/// character rather than before.
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
pub fn zfill(self, length: usize) -> Expr {
self.0.map_private(StringFunction::ZFill(length).into())
Expand Down
112 changes: 51 additions & 61 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,33 +772,34 @@ def pad_start(self, length: int, fill_char: str = " ") -> Expr:
Parameters
----------
length
Pad the string until it reaches this length. Strings with a length equal to
Pad the string until it reaches this length. Strings with length equal to
or greater than this value are returned as-is.
fill_char
The ASCII character to pad the string with.
The character to pad the string with.
See Also
--------
pad_end
zfill
Examples
--------
>>> df = pl.DataFrame({"a": ["cow", "monkey", "hippopotamus", None]})
>>> df.select(pl.col("a").str.rjust(8, "*"))
shape: (4, 1)
┌──────────────┐
│ a │
│ --- │
│ str │
╞══════════════╡
│ *****cow │
│ **monkey │
null
hippopotamus
└──────────────┘
>>> df.with_columns(padded=pl.col("a").str.pad_start(8, "*"))
shape: (4, 2)
┌──────────────┬──────────────
│ a ┆ padded
│ --- ┆ ---
│ str ┆ str
╞══════════════╪══════════════
cow ┆ *****cow │
monkey ┆ **monkey │
hippopotamus ┆ hippopotamus
null ┆ null
└──────────────┴──────────────
"""
return wrap_expr(self._pyexpr.str_rjust(length, fill_char))
return wrap_expr(self._pyexpr.str_pad_start(length, fill_char))

def pad_end(self, length: int, fill_char: str = " ") -> Expr:
"""
Expand All @@ -807,78 +808,67 @@ def pad_end(self, length: int, fill_char: str = " ") -> Expr:
Parameters
----------
length
Pad the string until it reaches this length. Strings with a length equal to
Pad the string until it reaches this length. Strings with length equal to
or greater than this value are returned as-is.
fill_char
The ASCII character to pad the string with.
The character to pad the string with.
See Also
--------
pad_start
Examples
--------
>>> df = pl.DataFrame({"a": ["cow", "monkey", None, "hippopotamus"]})
>>> df.select(pl.col("a").str.ljust(8, "*"))
shape: (4, 1)
┌──────────────┐
│ a │
│ --- │
│ str │
╞══════════════╡
│ cow***** │
│ monkey** │
│ null │
│ hippopotamus │
└──────────────┘
>>> df = pl.DataFrame({"a": ["cow", "monkey", "hippopotamus", None]})
>>> df.with_columns(padded=pl.col("a").str.pad_end(8, "*"))
shape: (4, 2)
┌──────────────┬──────────────┐
│ a ┆ padded │
│ --- ┆ --- │
│ str ┆ str │
╞══════════════╪══════════════╡
│ cow ┆ cow***** │
│ monkey ┆ monkey** │
│ hippopotamus ┆ hippopotamus │
│ null ┆ null │
└──────────────┴──────────────┘
"""
return wrap_expr(self._pyexpr.str_ljust(length, fill_char))
return wrap_expr(self._pyexpr.str_pad_end(length, fill_char))

@deprecate_renamed_parameter("alignment", "length", version="0.19.12")
def zfill(self, length: int) -> Expr:
"""
Fills the string with zeroes.
Return a copy of the string left filled with ASCII '0' digits to make a string
of length width.
Pad the start of the string with zeros until it reaches the given length.
A leading sign prefix ('+'/'-') is handled by inserting the padding after the
sign character rather than before. The original string is returned if width is
less than or equal to ``len(s)``.
A sign prefix (``-``) is handled by inserting the padding after the sign
character rather than before.
Parameters
----------
length
Fill the value up to this length
Pad the string until it reaches this length. Strings with length equal to
or greater than this value are returned as-is.
See Also
--------
pad_start
Examples
--------
>>> df = pl.DataFrame(
... {
... "num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
... }
... )
>>> df.with_columns(pl.col("num").cast(str).str.zfill(5))
shape: (11, 1)
┌─────────┐
│ num │
│ --- │
│ str │
╞═════════╡
│ -0010 │
│ -0001 │
│ 00000 │
│ 00001 │
│ … │
│ 10000 │
│ 100000 │
│ 1000000 │
│ null │
└─────────┘
>>> df = pl.DataFrame({"a": [-1, 123, 999999, None]})
>>> df.with_columns(zfill=pl.col("a").cast(pl.Utf8).str.zfill(4))
shape: (4, 2)
┌────────┬────────┐
│ a ┆ zfill │
│ --- ┆ --- │
│ i64 ┆ str │
╞════════╪════════╡
│ -1 ┆ -001 │
│ 123 ┆ 0123 │
│ 999999 ┆ 999999 │
│ null ┆ null │
└────────┴────────┘
"""
return wrap_expr(self._pyexpr.str_zfill(length))
Expand Down
Loading

0 comments on commit 919400e

Please sign in to comment.