Skip to content

Commit

Permalink
depr(python, rust!): Rename count_match to count_matches (#11028)
Browse files Browse the repository at this point in the history
Co-authored-by: Dmitrii Rudenko <[email protected]>
Co-authored-by: Stijn de Gooijer <[email protected]>
  • Loading branch information
3 people authored Sep 10, 2023
1 parent 5b14a76 commit 67c7d7d
Show file tree
Hide file tree
Showing 22 changed files with 160 additions and 48 deletions.
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/list/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn count_bits_set_by_offsets(values: &Bitmap, offset: &[i64]) -> Vec<IdxSize> {
}

#[cfg(feature = "list_count")]
pub fn list_count_match(ca: &ListChunked, value: AnyValue) -> PolarsResult<Series> {
pub fn list_count_matches(ca: &ListChunked, value: AnyValue) -> PolarsResult<Series> {
let value = Series::new("", [value]);

let ca = ca.apply_to_inner(&|s| {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
}

/// Count all successive non-overlapping regex matches.
fn count_match(&self, pat: &str) -> PolarsResult<UInt32Chunked> {
fn count_matches(&self, pat: &str) -> PolarsResult<UInt32Chunked> {
let ca = self.as_utf8();
let reg = Regex::new(pat)?;

Expand All @@ -355,7 +355,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
}

/// Count all successive non-overlapping regex matches.
fn count_match_many(&self, pat: &Utf8Chunked) -> PolarsResult<UInt32Chunked> {
fn count_matches_many(&self, pat: &Utf8Chunked) -> PolarsResult<UInt32Chunked> {
let ca = self.as_utf8();
polars_ensure!(
ca.len() == pat.len(),
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-plan/src/dsl/function_expr/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub enum ListFunction {
#[cfg(feature = "list_take")]
Take(bool),
#[cfg(feature = "list_count")]
CountMatch,
CountMatches,
Sum,
#[cfg(feature = "list_sets")]
SetOperation(SetOperation),
Expand All @@ -37,7 +37,7 @@ impl Display for ListFunction {
#[cfg(feature = "list_take")]
Take(_) => "take",
#[cfg(feature = "list_count")]
CountMatch => "count",
CountMatches => "count",
Sum => "sum",
#[cfg(feature = "list_sets")]
SetOperation(s) => return write!(f, "{s}"),
Expand Down Expand Up @@ -247,7 +247,7 @@ pub(super) fn take(args: &[Series], null_on_oob: bool) -> PolarsResult<Series> {
}

#[cfg(feature = "list_count")]
pub(super) fn count_match(args: &[Series]) -> PolarsResult<Series> {
pub(super) fn count_matches(args: &[Series]) -> PolarsResult<Series> {
let s = &args[0];
let element = &args[1];
polars_ensure!(
Expand All @@ -256,7 +256,7 @@ pub(super) fn count_match(args: &[Series]) -> PolarsResult<Series> {
element.len()
);
let ca = s.list()?;
list_count_match(ca, element.get(0).unwrap())
list_count_matches(ca, element.get(0).unwrap())
}

pub(super) fn sum(s: &Series) -> PolarsResult<Series> {
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
#[cfg(feature = "list_take")]
Take(null_ob_oob) => map_as_slice!(list::take, null_ob_oob),
#[cfg(feature = "list_count")]
CountMatch => map_as_slice!(list::count_match),
CountMatches => map_as_slice!(list::count_matches),
Sum => map!(list::sum),
#[cfg(feature = "list_sets")]
SetOperation(s) => map_as_slice!(list::set_operation, s),
Expand Down Expand Up @@ -647,8 +647,8 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
match func {
#[cfg(feature = "regex")]
Contains { literal, strict } => map_as_slice!(strings::contains, literal, strict),
CountMatch => {
map_as_slice!(strings::count_match)
CountMatches => {
map_as_slice!(strings::count_matches)
},
EndsWith { .. } => map_as_slice!(strings::ends_with),
StartsWith { .. } => map_as_slice!(strings::starts_with),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ impl FunctionExpr {
#[cfg(feature = "list_take")]
Take(_) => mapper.with_same_dtype(),
#[cfg(feature = "list_count")]
CountMatch => mapper.with_dtype(IDX_DTYPE),
CountMatches => mapper.with_dtype(IDX_DTYPE),
Sum => mapper.nested_sum_type(),
#[cfg(feature = "list_sets")]
SetOperation(_) => mapper.with_same_dtype(),
Expand Down
12 changes: 6 additions & 6 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub enum StringFunction {
literal: bool,
strict: bool,
},
CountMatch,
CountMatches,
EndsWith,
Explode,
Extract {
Expand Down Expand Up @@ -92,7 +92,7 @@ impl StringFunction {
ConcatVertical(_) | ConcatHorizontal(_) => mapper.with_same_dtype(),
#[cfg(feature = "regex")]
Contains { .. } => mapper.with_dtype(DataType::Boolean),
CountMatch => mapper.with_dtype(DataType::UInt32),
CountMatches => mapper.with_dtype(DataType::UInt32),
EndsWith | StartsWith => mapper.with_dtype(DataType::Boolean),
Explode => mapper.with_same_dtype(),
Extract { .. } => mapper.with_same_dtype(),
Expand Down Expand Up @@ -132,7 +132,7 @@ impl Display for StringFunction {
let s = match self {
#[cfg(feature = "regex")]
StringFunction::Contains { .. } => "contains",
StringFunction::CountMatch => "count_match",
StringFunction::CountMatches => "count_matches",
StringFunction::EndsWith { .. } => "ends_with",
StringFunction::Extract { .. } => "extract",
#[cfg(feature = "concat_str")]
Expand Down Expand Up @@ -433,20 +433,20 @@ pub(super) fn extract_all(args: &[Series]) -> PolarsResult<Series> {
}
}

pub(super) fn count_match(args: &[Series]) -> PolarsResult<Series> {
pub(super) fn count_matches(args: &[Series]) -> PolarsResult<Series> {
let s = &args[0];
let pat = &args[1];

let ca = s.utf8()?;
let pat = pat.utf8()?;
if pat.len() == 1 {
if let Some(pat) = pat.get(0) {
ca.count_match(pat).map(|ca| ca.into_series())
ca.count_matches(pat).map(|ca| ca.into_series())
} else {
Ok(Series::full_null(ca.name(), ca.len(), &DataType::UInt32))
}
} else {
ca.count_match_many(pat).map(|ca| ca.into_series())
ca.count_matches_many(pat).map(|ca| ca.into_series())
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-plan/src/dsl/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,12 +310,12 @@ impl ListNameSpace {
}
#[cfg(feature = "list_count")]
/// Count how often the value produced by ``element`` occurs.
pub fn count_match<E: Into<Expr>>(self, element: E) -> Expr {
pub fn count_matches<E: Into<Expr>>(self, element: E) -> Expr {
let other = element.into();

Expr::Function {
input: vec![self.0, other],
function: FunctionExpr::ListExpr(ListFunction::CountMatch),
function: FunctionExpr::ListExpr(ListFunction::CountMatches),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyFlat,
input_wildcard_expansion: true,
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ impl StringNameSpace {
}

/// Count all successive non-overlapping regex matches.
pub fn count_match(self, pat: Expr) -> Expr {
pub fn count_matches(self, pat: Expr) -> Expr {
self.0
.map_many_private(StringFunction::CountMatch.into(), &[pat], false)
.map_many_private(StringFunction::CountMatches.into(), &[pat], false)
}

/// Convert a Utf8 column into a Date/Datetime/Time column.
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/list.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The following methods are available under the `expr.list` attribute.
Expr.list.concat
Expr.list.contains
Expr.list.count_match
Expr.list.count_matches
Expr.list.diff
Expr.list.difference
Expr.list.eval
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The following methods are available under the `expr.str` attribute.
Expr.str.concat
Expr.str.contains
Expr.str.count_match
Expr.str.count_matches
Expr.str.decode
Expr.str.encode
Expr.str.ends_with
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/list.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The following methods are available under the `Series.list` attribute.
Series.list.concat
Series.list.contains
Series.list.count_match
Series.list.count_matches
Series.list.diff
Series.list.difference
Series.list.eval
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The following methods are available under the `Series.str` attribute.
Series.str.concat
Series.str.contains
Series.str.count_match
Series.str.count_matches
Series.str.decode
Series.str.encode
Series.str.ends_with
Expand Down
22 changes: 19 additions & 3 deletions py-polars/polars/expr/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ def explode(self) -> Expr:
"""
return wrap_expr(self._pyexpr.explode())

def count_match(self, element: IntoExpr) -> Expr:
def count_matches(self, element: IntoExpr) -> Expr:
"""
Count how often the value produced by ``element`` occurs.
Expand All @@ -750,7 +750,7 @@ def count_match(self, element: IntoExpr) -> Expr:
Examples
--------
>>> df = pl.DataFrame({"listcol": [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
>>> df.select(pl.col("listcol").list.count_match(2).alias("number_of_twos"))
>>> df.select(pl.col("listcol").list.count_matches(2).alias("number_of_twos"))
shape: (5, 1)
┌────────────────┐
│ number_of_twos │
Expand All @@ -766,7 +766,7 @@ def count_match(self, element: IntoExpr) -> Expr:
"""
element = parse_as_expression(element, str_as_lit=True)
return wrap_expr(self._pyexpr.list_count_match(element))
return wrap_expr(self._pyexpr.list_count_matches(element))

def to_struct(
self,
Expand Down Expand Up @@ -1068,3 +1068,19 @@ def symmetric_difference(self, other: IntoExpr) -> Expr:
""" # noqa: W505
return self.set_symmetric_difference(other)

@deprecate_renamed_function("count_matches", version="0.19.3")
def count_match(self, element: IntoExpr) -> Expr:
"""
Count how often the value produced by ``element`` occurs.
.. deprecated:: 0.19.3
This method has been renamed to :func:`count_matches`.
Parameters
----------
element
An expression that produces a single value
"""
return self.count_matches(element)
29 changes: 26 additions & 3 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,7 +1419,7 @@ def extract_groups(self, pattern: str) -> Expr:
"""
return wrap_expr(self._pyexpr.str_extract_groups(pattern))

def count_match(self, pattern: str | Expr) -> Expr:
def count_matches(self, pattern: str | Expr) -> Expr:
r"""
Count all successive non-overlapping regex matches.
Expand All @@ -1439,7 +1439,7 @@ def count_match(self, pattern: str | Expr) -> Expr:
--------
>>> df = pl.DataFrame({"foo": ["123 bla 45 asd", "xyz 678 910t", "bar", None]})
>>> df.select(
... pl.col("foo").str.count_match(r"\d").alias("count_digits"),
... pl.col("foo").str.count_matches(r"\d").alias("count_digits"),
... )
shape: (4, 1)
┌──────────────┐
Expand All @@ -1455,7 +1455,7 @@ def count_match(self, pattern: str | Expr) -> Expr:
"""
pattern = parse_as_expression(pattern, str_as_lit=True)
return wrap_expr(self._pyexpr.str_count_match(pattern))
return wrap_expr(self._pyexpr.str_count_matches(pattern))

def split(self, by: str, *, inclusive: bool = False) -> Expr:
"""
Expand Down Expand Up @@ -1938,6 +1938,29 @@ def rstrip(self, characters: str | None = None) -> Expr:
"""
return self.strip_chars_end(characters)

@deprecate_renamed_function("count_matches", version="0.19.3")
def count_match(self, pattern: str | Expr) -> Expr:
"""
Count all successive non-overlapping regex matches.
.. deprecated:: 0.19.3
This method has been renamed to :func:`count_matches`.
Parameters
----------
pattern
A valid regular expression pattern, compatible with the `regex crate
<https://docs.rs/regex/latest/regex/>`_.
Returns
-------
Expr
Expression of data type :class:`UInt32`. Returns null if the
original value is null.
"""
return self.count_matches(pattern)


def _validate_format_argument(format: str | None) -> None:
if format is not None and ".%f" in format:
Expand Down
1 change: 0 additions & 1 deletion py-polars/polars/functions/range/date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ def date_range(
type date. All other permutations return a datetime Series.
.. deprecated:: 0.19.3
In a future version of Polars, `date_range` will always return a `Date`.
Please use :func:`datetime_range` if you want a `Datetime` instead.
Expand Down
19 changes: 18 additions & 1 deletion py-polars/polars/series/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def explode(self) -> Series:
"""

def count_match(
def count_matches(
self, element: float | str | bool | int | date | datetime | time | Expr
) -> Expr:
"""
Expand Down Expand Up @@ -688,3 +688,20 @@ def symmetric_difference(self, other: Series) -> Series:
""" # noqa: W505
return self.set_symmetric_difference(other)

@deprecate_renamed_function("count_matches", version="0.19.3")
def count_match(
self, element: float | str | bool | int | date | datetime | time | Expr
) -> Expr:
"""
Count how often the value produced by ``element`` occurs.
.. deprecated:: 0.19.3
This method has been renamed to :func:`count_matches`.
Parameters
----------
element
An expression that produces a single value
"""
27 changes: 25 additions & 2 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ def extract_groups(self, pattern: str) -> Series:
"""

def count_match(self, pattern: str | Series) -> Series:
def count_matches(self, pattern: str | Series) -> Series:
r"""
Count all successive non-overlapping regex matches.
Expand All @@ -847,7 +847,7 @@ def count_match(self, pattern: str | Series) -> Series:
--------
>>> s = pl.Series("foo", ["123 bla 45 asd", "xyz 678 910t", "bar", None])
>>> # count digits
>>> s.str.count_match(r"\d")
>>> s.str.count_matches(r"\d")
shape: (4,)
Series: 'foo' [u32]
[
Expand Down Expand Up @@ -1539,3 +1539,26 @@ def rstrip(self, characters: str | None = None) -> Series:
removed instead.
"""

@deprecate_renamed_function("count_matches", version="0.19.3")
def count_match(self, pattern: str | Series) -> Series:
"""
Count all successive non-overlapping regex matches.
.. deprecated:: 0.19.3
This method has been renamed to :func:`count_matches`.
Parameters
----------
pattern
A valid regular expression pattern, compatible with the `regex crate
<https://docs.rs/regex/latest/regex/>`_. Can also be a :class:`Series` of
regular expressions.
Returns
-------
Series
Series of data type :class:`UInt32`. Returns null if the original
value is null.
"""
4 changes: 2 additions & 2 deletions py-polars/src/expr/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ impl PyExpr {
}

#[cfg(feature = "list_count")]
fn list_count_match(&self, expr: PyExpr) -> Self {
self.inner.clone().list().count_match(expr.inner).into()
fn list_count_matches(&self, expr: PyExpr) -> Self {
self.inner.clone().list().count_matches(expr.inner).into()
}

fn list_diff(&self, n: i64, null_behavior: Wrap<NullBehavior>) -> PyResult<Self> {
Expand Down
Loading

0 comments on commit 67c7d7d

Please sign in to comment.