Skip to content

Commit

Permalink
feat: Add name.replace expression to support additional column rena…
Browse files Browse the repository at this point in the history
…me options
  • Loading branch information
alexander-beedie committed Jul 30, 2024
1 parent fae85ff commit 77a8dff
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 4 deletions.
13 changes: 13 additions & 0 deletions crates/polars-plan/src/dsl/name.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use regex::Regex;
#[cfg(feature = "dtype-struct")]
use smartstring::alias::String as SmartString;

Expand Down Expand Up @@ -48,6 +49,18 @@ impl ExprNameNameSpace {
self.map(move |name| Ok(format!("{name}{suffix}")))
}

/// Replace matching string pattern in the root column name with a new value.
pub fn replace(self, pattern: &str, value: &str, literal: bool) -> Expr {
let value = value.to_string();
let pattern = pattern.to_string();
if literal {
self.map(move |name| Ok(name.replace(&pattern, &value)))
} else {
let rx = Regex::new(&pattern);
self.map(move |name| Ok(rx.clone()?.replace_all(name, &value).to_string()))
}
}

/// Update the root column name to use lowercase characters.
#[allow(clippy::wrong_self_convention)]
pub fn to_lowercase(self) -> Expr {
Expand Down
46 changes: 46 additions & 0 deletions py-polars/polars/expr/name.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,52 @@ def prefix_fields(self, prefix: str) -> Expr:
"""
return self._from_pyexpr(self._pyexpr.name_prefix_fields(prefix))

def replace(self, pattern: str, value: str, *, literal: bool = False) -> Expr:
"""
Replace matching regex/literal substring in the name with a new value.
Parameters
----------
pattern
A valid regular expression pattern, compatible with the `regex crate
<https://docs.rs/regex/latest/regex/>`_.
value
String that will replace the matched substring.
literal
Treat `pattern` as a literal string, not a regex.
Notes
-----
This will undo any previous renaming operations on the expression.
Due to implementation constraints, this method can only be called as the last
expression in a chain. Only one name operation per expression will work.
Consider using `.name.map` for advanced renaming.
Examples
--------
>>> df = pl.DataFrame(
... {
... "n_foo": [1, 2, 3],
... "n_bar": ["x", "y", "z"],
... }
... )
>>> df.select(pl.all().name.replace("^n_", "col_"))
shape: (3, 2)
┌─────────┬─────────┐
│ col_foo ┆ col_bar │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════════╪═════════╡
│ 1 ┆ x │
│ 2 ┆ y │
│ 3 ┆ z │
└─────────┴─────────┘
>>> df.select(pl.all().name.replace("(a|e|i|o|u)", "@")).schema
Schema([('n_f@@', Int64), ('n_b@r', String)])
"""
return self._from_pyexpr(self._pyexpr.name_replace(pattern, value, literal))

def suffix_fields(self, suffix: str) -> Expr:
"""
Add a suffix to all fields name of a struct.
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,7 +1881,7 @@ def replace(
value
String that will replace the matched substring.
literal
Treat `pattern` as a literal string.
Treat `pattern` as a literal string, not a regex.
n
Number of matches to replace.
Expand Down Expand Up @@ -1974,7 +1974,7 @@ def replace_all(
value
String that will replace the matched substring.
literal
Treat `pattern` as a literal string.
Treat `pattern` as a literal string, not a regex.
See Also
--------
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,7 +1095,7 @@ def replace(
value
String that will replace the matched substring.
literal
Treat `pattern` as a literal string.
Treat `pattern` as a literal string, not a regex.
n
Number of matches to replace.
Expand Down Expand Up @@ -1169,7 +1169,7 @@ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Ser
value
String that will replace the matched substring.
literal
Treat `pattern` as a literal string.
Treat `pattern` as a literal string, not a regex.
See Also
--------
Expand Down
8 changes: 8 additions & 0 deletions py-polars/src/expr/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ impl PyExpr {
self.inner.clone().name().to_uppercase().into()
}

fn name_replace(&self, pattern: &str, value: &str, literal: bool) -> Self {
self.inner
.clone()
.name()
.replace(pattern, value, literal)
.into()
}

fn name_map_fields(&self, name_mapper: PyObject) -> Self {
let name_mapper = Arc::new(move |name: &str| {
Python::with_gil(|py| {
Expand Down
38 changes: 38 additions & 0 deletions py-polars/tests/unit/operations/namespaces/test_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

from collections import OrderedDict

import pytest

import polars as pl
from polars.exceptions import ComputeError


def test_name_change_case() -> None:
Expand Down Expand Up @@ -43,6 +46,41 @@ def test_name_prefix_suffix() -> None:
)


def test_name_replace() -> None:
df = pl.DataFrame(
schema={"n_foo": pl.Int32, "n_bar": pl.String, "misc?": pl.Float64},
)

assert df.select(
pl.all().name.replace("^n_", "col_"),
).schema == {
"col_foo": pl.Int32,
"col_bar": pl.String,
"misc?": pl.Float64,
}

assert df.select(
pl.all().name.replace("(a|e|i|o|u)", "#"),
).schema == {
"n_f##": pl.Int32,
"n_b#r": pl.String,
"m#sc?": pl.Float64,
}

with pytest.raises(ComputeError, match="repetition operator missing expression"):
df.select(
pl.all().name.replace("?", "!!"),
)

assert df.select(
pl.all().name.replace("?", "!!", literal=True),
).schema == {
"n_foo": pl.Int32,
"n_bar": pl.String,
"misc!!": pl.Float64,
}


def test_name_update_all() -> None:
df = pl.DataFrame(
schema={
Expand Down

0 comments on commit 77a8dff

Please sign in to comment.