diff --git a/crates/polars-ops/src/chunked_array/list/count.rs b/crates/polars-ops/src/chunked_array/list/count.rs index 546bebba969b..f066552eb934 100644 --- a/crates/polars-ops/src/chunked_array/list/count.rs +++ b/crates/polars-ops/src/chunked_array/list/count.rs @@ -25,7 +25,7 @@ fn count_bits_set_by_offsets(values: &Bitmap, offset: &[i64]) -> Vec { } #[cfg(feature = "list_count")] -pub fn list_count_match(ca: &ListChunked, value: AnyValue) -> PolarsResult { +pub fn list_count_matches(ca: &ListChunked, value: AnyValue) -> PolarsResult { let value = Series::new("", [value]); let ca = ca.apply_to_inner(&|s| { diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index 583c0fa1367e..a53db59432f5 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -342,7 +342,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { } /// Count all successive non-overlapping regex matches. - fn count_match(&self, pat: &str) -> PolarsResult { + fn count_matches(&self, pat: &str) -> PolarsResult { let ca = self.as_utf8(); let reg = Regex::new(pat)?; @@ -355,7 +355,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { } /// Count all successive non-overlapping regex matches. - fn count_match_many(&self, pat: &Utf8Chunked) -> PolarsResult { + fn count_matches_many(&self, pat: &Utf8Chunked) -> PolarsResult { let ca = self.as_utf8(); polars_ensure!( ca.len() == pat.len(), diff --git a/crates/polars-plan/src/dsl/function_expr/list.rs b/crates/polars-plan/src/dsl/function_expr/list.rs index 4a915aab97c2..856273fadc76 100644 --- a/crates/polars-plan/src/dsl/function_expr/list.rs +++ b/crates/polars-plan/src/dsl/function_expr/list.rs @@ -14,7 +14,7 @@ pub enum ListFunction { #[cfg(feature = "list_take")] Take(bool), #[cfg(feature = "list_count")] - CountMatch, + CountMatches, Sum, #[cfg(feature = "list_sets")] SetOperation(SetOperation), @@ -37,7 +37,7 @@ impl Display for ListFunction { #[cfg(feature = "list_take")] Take(_) => "take", #[cfg(feature = "list_count")] - CountMatch => "count", + CountMatches => "count", Sum => "sum", #[cfg(feature = "list_sets")] SetOperation(s) => return write!(f, "{s}"), @@ -247,7 +247,7 @@ pub(super) fn take(args: &[Series], null_on_oob: bool) -> PolarsResult { } #[cfg(feature = "list_count")] -pub(super) fn count_match(args: &[Series]) -> PolarsResult { +pub(super) fn count_matches(args: &[Series]) -> PolarsResult { let s = &args[0]; let element = &args[1]; polars_ensure!( @@ -256,7 +256,7 @@ pub(super) fn count_match(args: &[Series]) -> PolarsResult { element.len() ); let ca = s.list()?; - list_count_match(ca, element.get(0).unwrap()) + list_count_matches(ca, element.get(0).unwrap()) } pub(super) fn sum(s: &Series) -> PolarsResult { diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 6dd42e1bc6ad..8e3bc4f9a0d1 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -527,7 +527,7 @@ impl From for SpecialEq> { #[cfg(feature = "list_take")] Take(null_ob_oob) => map_as_slice!(list::take, null_ob_oob), #[cfg(feature = "list_count")] - CountMatch => map_as_slice!(list::count_match), + CountMatches => map_as_slice!(list::count_matches), Sum => map!(list::sum), #[cfg(feature = "list_sets")] SetOperation(s) => map_as_slice!(list::set_operation, s), @@ -647,8 +647,8 @@ impl From for SpecialEq> { match func { #[cfg(feature = "regex")] Contains { literal, strict } => map_as_slice!(strings::contains, literal, strict), - CountMatch => { - map_as_slice!(strings::count_match) + CountMatches => { + map_as_slice!(strings::count_matches) }, EndsWith { .. } => map_as_slice!(strings::ends_with), StartsWith { .. } => map_as_slice!(strings::starts_with), diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 5a397f52319a..71a4781847a7 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -107,7 +107,7 @@ impl FunctionExpr { #[cfg(feature = "list_take")] Take(_) => mapper.with_same_dtype(), #[cfg(feature = "list_count")] - CountMatch => mapper.with_dtype(IDX_DTYPE), + CountMatches => mapper.with_dtype(IDX_DTYPE), Sum => mapper.nested_sum_type(), #[cfg(feature = "list_sets")] SetOperation(_) => mapper.with_same_dtype(), diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index d14f893e8be3..b111211a9219 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -26,7 +26,7 @@ pub enum StringFunction { literal: bool, strict: bool, }, - CountMatch, + CountMatches, EndsWith, Explode, Extract { @@ -92,7 +92,7 @@ impl StringFunction { ConcatVertical(_) | ConcatHorizontal(_) => mapper.with_same_dtype(), #[cfg(feature = "regex")] Contains { .. } => mapper.with_dtype(DataType::Boolean), - CountMatch => mapper.with_dtype(DataType::UInt32), + CountMatches => mapper.with_dtype(DataType::UInt32), EndsWith | StartsWith => mapper.with_dtype(DataType::Boolean), Explode => mapper.with_same_dtype(), Extract { .. } => mapper.with_same_dtype(), @@ -132,7 +132,7 @@ impl Display for StringFunction { let s = match self { #[cfg(feature = "regex")] StringFunction::Contains { .. } => "contains", - StringFunction::CountMatch => "count_match", + StringFunction::CountMatches => "count_matches", StringFunction::EndsWith { .. } => "ends_with", StringFunction::Extract { .. } => "extract", #[cfg(feature = "concat_str")] @@ -433,7 +433,7 @@ pub(super) fn extract_all(args: &[Series]) -> PolarsResult { } } -pub(super) fn count_match(args: &[Series]) -> PolarsResult { +pub(super) fn count_matches(args: &[Series]) -> PolarsResult { let s = &args[0]; let pat = &args[1]; @@ -441,12 +441,12 @@ pub(super) fn count_match(args: &[Series]) -> PolarsResult { let pat = pat.utf8()?; if pat.len() == 1 { if let Some(pat) = pat.get(0) { - ca.count_match(pat).map(|ca| ca.into_series()) + ca.count_matches(pat).map(|ca| ca.into_series()) } else { Ok(Series::full_null(ca.name(), ca.len(), &DataType::UInt32)) } } else { - ca.count_match_many(pat).map(|ca| ca.into_series()) + ca.count_matches_many(pat).map(|ca| ca.into_series()) } } diff --git a/crates/polars-plan/src/dsl/list.rs b/crates/polars-plan/src/dsl/list.rs index 8665c76c03d5..427d99d5ca6c 100644 --- a/crates/polars-plan/src/dsl/list.rs +++ b/crates/polars-plan/src/dsl/list.rs @@ -310,12 +310,12 @@ impl ListNameSpace { } #[cfg(feature = "list_count")] /// Count how often the value produced by ``element`` occurs. - pub fn count_match>(self, element: E) -> Expr { + pub fn count_matches>(self, element: E) -> Expr { let other = element.into(); Expr::Function { input: vec![self.0, other], - function: FunctionExpr::ListExpr(ListFunction::CountMatch), + function: FunctionExpr::ListExpr(ListFunction::CountMatches), options: FunctionOptions { collect_groups: ApplyOptions::ApplyFlat, input_wildcard_expansion: true, diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index 5be838b4d529..bd8e715dd5b4 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -128,9 +128,9 @@ impl StringNameSpace { } /// Count all successive non-overlapping regex matches. - pub fn count_match(self, pat: Expr) -> Expr { + pub fn count_matches(self, pat: Expr) -> Expr { self.0 - .map_many_private(StringFunction::CountMatch.into(), &[pat], false) + .map_many_private(StringFunction::CountMatches.into(), &[pat], false) } /// Convert a Utf8 column into a Date/Datetime/Time column. diff --git a/py-polars/docs/source/reference/expressions/list.rst b/py-polars/docs/source/reference/expressions/list.rst index 2710b8d56c80..989d3740aa99 100644 --- a/py-polars/docs/source/reference/expressions/list.rst +++ b/py-polars/docs/source/reference/expressions/list.rst @@ -16,6 +16,7 @@ The following methods are available under the `expr.list` attribute. Expr.list.concat Expr.list.contains Expr.list.count_match + Expr.list.count_matches Expr.list.diff Expr.list.difference Expr.list.eval diff --git a/py-polars/docs/source/reference/expressions/string.rst b/py-polars/docs/source/reference/expressions/string.rst index 355801bd84bf..ff4e73d7cc4a 100644 --- a/py-polars/docs/source/reference/expressions/string.rst +++ b/py-polars/docs/source/reference/expressions/string.rst @@ -12,6 +12,7 @@ The following methods are available under the `expr.str` attribute. Expr.str.concat Expr.str.contains Expr.str.count_match + Expr.str.count_matches Expr.str.decode Expr.str.encode Expr.str.ends_with diff --git a/py-polars/docs/source/reference/series/list.rst b/py-polars/docs/source/reference/series/list.rst index 46942ab076b9..2b5128610192 100644 --- a/py-polars/docs/source/reference/series/list.rst +++ b/py-polars/docs/source/reference/series/list.rst @@ -16,6 +16,7 @@ The following methods are available under the `Series.list` attribute. Series.list.concat Series.list.contains Series.list.count_match + Series.list.count_matches Series.list.diff Series.list.difference Series.list.eval diff --git a/py-polars/docs/source/reference/series/string.rst b/py-polars/docs/source/reference/series/string.rst index 910b589bf86a..a47113010c5b 100644 --- a/py-polars/docs/source/reference/series/string.rst +++ b/py-polars/docs/source/reference/series/string.rst @@ -12,6 +12,7 @@ The following methods are available under the `Series.str` attribute. Series.str.concat Series.str.contains Series.str.count_match + Series.str.count_matches Series.str.decode Series.str.encode Series.str.ends_with diff --git a/py-polars/polars/expr/list.py b/py-polars/polars/expr/list.py index dffc2b028537..642ca1afcabc 100644 --- a/py-polars/polars/expr/list.py +++ b/py-polars/polars/expr/list.py @@ -738,7 +738,7 @@ def explode(self) -> Expr: """ return wrap_expr(self._pyexpr.explode()) - def count_match(self, element: IntoExpr) -> Expr: + def count_matches(self, element: IntoExpr) -> Expr: """ Count how often the value produced by ``element`` occurs. @@ -750,7 +750,7 @@ def count_match(self, element: IntoExpr) -> Expr: Examples -------- >>> df = pl.DataFrame({"listcol": [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]}) - >>> df.select(pl.col("listcol").list.count_match(2).alias("number_of_twos")) + >>> df.select(pl.col("listcol").list.count_matches(2).alias("number_of_twos")) shape: (5, 1) ┌────────────────┐ │ number_of_twos │ @@ -766,7 +766,7 @@ def count_match(self, element: IntoExpr) -> Expr: """ element = parse_as_expression(element, str_as_lit=True) - return wrap_expr(self._pyexpr.list_count_match(element)) + return wrap_expr(self._pyexpr.list_count_matches(element)) def to_struct( self, @@ -1068,3 +1068,19 @@ def symmetric_difference(self, other: IntoExpr) -> Expr: """ # noqa: W505 return self.set_symmetric_difference(other) + + @deprecate_renamed_function("count_matches", version="0.19.3") + def count_match(self, element: IntoExpr) -> Expr: + """ + Count how often the value produced by ``element`` occurs. + + .. deprecated:: 0.19.3 + This method has been renamed to :func:`count_matches`. + + Parameters + ---------- + element + An expression that produces a single value + + """ + return self.count_matches(element) diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 73762a9e68aa..6036fac84d71 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -1419,7 +1419,7 @@ def extract_groups(self, pattern: str) -> Expr: """ return wrap_expr(self._pyexpr.str_extract_groups(pattern)) - def count_match(self, pattern: str | Expr) -> Expr: + def count_matches(self, pattern: str | Expr) -> Expr: r""" Count all successive non-overlapping regex matches. @@ -1439,7 +1439,7 @@ def count_match(self, pattern: str | Expr) -> Expr: -------- >>> df = pl.DataFrame({"foo": ["123 bla 45 asd", "xyz 678 910t", "bar", None]}) >>> df.select( - ... pl.col("foo").str.count_match(r"\d").alias("count_digits"), + ... pl.col("foo").str.count_matches(r"\d").alias("count_digits"), ... ) shape: (4, 1) ┌──────────────┐ @@ -1455,7 +1455,7 @@ def count_match(self, pattern: str | Expr) -> Expr: """ pattern = parse_as_expression(pattern, str_as_lit=True) - return wrap_expr(self._pyexpr.str_count_match(pattern)) + return wrap_expr(self._pyexpr.str_count_matches(pattern)) def split(self, by: str, *, inclusive: bool = False) -> Expr: """ @@ -1938,6 +1938,29 @@ def rstrip(self, characters: str | None = None) -> Expr: """ return self.strip_chars_end(characters) + @deprecate_renamed_function("count_matches", version="0.19.3") + def count_match(self, pattern: str | Expr) -> Expr: + """ + Count all successive non-overlapping regex matches. + + .. deprecated:: 0.19.3 + This method has been renamed to :func:`count_matches`. + + Parameters + ---------- + pattern + A valid regular expression pattern, compatible with the `regex crate + `_. + + Returns + ------- + Expr + Expression of data type :class:`UInt32`. Returns null if the + original value is null. + + """ + return self.count_matches(pattern) + def _validate_format_argument(format: str | None) -> None: if format is not None and ".%f" in format: diff --git a/py-polars/polars/functions/range/date_range.py b/py-polars/polars/functions/range/date_range.py index c2f1d039185c..a9c0f6e2628d 100644 --- a/py-polars/polars/functions/range/date_range.py +++ b/py-polars/polars/functions/range/date_range.py @@ -126,7 +126,6 @@ def date_range( type date. All other permutations return a datetime Series. .. deprecated:: 0.19.3 - In a future version of Polars, `date_range` will always return a `Date`. Please use :func:`datetime_range` if you want a `Datetime` instead. diff --git a/py-polars/polars/series/list.py b/py-polars/polars/series/list.py index 3156d693d45e..403958404c1c 100644 --- a/py-polars/polars/series/list.py +++ b/py-polars/polars/series/list.py @@ -434,7 +434,7 @@ def explode(self) -> Series: """ - def count_match( + def count_matches( self, element: float | str | bool | int | date | datetime | time | Expr ) -> Expr: """ @@ -688,3 +688,20 @@ def symmetric_difference(self, other: Series) -> Series: """ # noqa: W505 return self.set_symmetric_difference(other) + + @deprecate_renamed_function("count_matches", version="0.19.3") + def count_match( + self, element: float | str | bool | int | date | datetime | time | Expr + ) -> Expr: + """ + Count how often the value produced by ``element`` occurs. + + .. deprecated:: 0.19.3 + This method has been renamed to :func:`count_matches`. + + Parameters + ---------- + element + An expression that produces a single value + + """ diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 97b2e14c133c..44de32c904fa 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -826,7 +826,7 @@ def extract_groups(self, pattern: str) -> Series: """ - def count_match(self, pattern: str | Series) -> Series: + def count_matches(self, pattern: str | Series) -> Series: r""" Count all successive non-overlapping regex matches. @@ -847,7 +847,7 @@ def count_match(self, pattern: str | Series) -> Series: -------- >>> s = pl.Series("foo", ["123 bla 45 asd", "xyz 678 910t", "bar", None]) >>> # count digits - >>> s.str.count_match(r"\d") + >>> s.str.count_matches(r"\d") shape: (4,) Series: 'foo' [u32] [ @@ -1539,3 +1539,26 @@ def rstrip(self, characters: str | None = None) -> Series: removed instead. """ + + @deprecate_renamed_function("count_matches", version="0.19.3") + def count_match(self, pattern: str | Series) -> Series: + """ + Count all successive non-overlapping regex matches. + + .. deprecated:: 0.19.3 + This method has been renamed to :func:`count_matches`. + + Parameters + ---------- + pattern + A valid regular expression pattern, compatible with the `regex crate + `_. Can also be a :class:`Series` of + regular expressions. + + Returns + ------- + Series + Series of data type :class:`UInt32`. Returns null if the original + value is null. + + """ diff --git a/py-polars/src/expr/list.rs b/py-polars/src/expr/list.rs index 1838bd291903..3495718d817d 100644 --- a/py-polars/src/expr/list.rs +++ b/py-polars/src/expr/list.rs @@ -33,8 +33,8 @@ impl PyExpr { } #[cfg(feature = "list_count")] - fn list_count_match(&self, expr: PyExpr) -> Self { - self.inner.clone().list().count_match(expr.inner).into() + fn list_count_matches(&self, expr: PyExpr) -> Self { + self.inner.clone().list().count_matches(expr.inner).into() } fn list_diff(&self, n: i64, null_behavior: Wrap) -> PyResult { diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index 2b84e83fe776..ec39afce27d8 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -267,8 +267,8 @@ impl PyExpr { .into()) } - fn str_count_match(&self, pat: Self) -> Self { - self.inner.clone().str().count_match(pat.inner).into() + fn str_count_matches(&self, pat: Self) -> Self { + self.inner.clone().str().count_matches(pat.inner).into() } fn str_split(&self, by: &str) -> Self { diff --git a/py-polars/tests/unit/datatypes/test_list.py b/py-polars/tests/unit/datatypes/test_list.py index 8cb515bc4dfd..d6bbcdf4a278 100644 --- a/py-polars/tests/unit/datatypes/test_list.py +++ b/py-polars/tests/unit/datatypes/test_list.py @@ -3,6 +3,7 @@ from datetime import date, datetime, time import pandas as pd +import pytest import polars as pl from polars.testing import assert_series_equal @@ -275,12 +276,24 @@ def test_flat_aggregation_to_list_conversion_6918() -> None: ).to_dict(False) == {"a": [1, 2], "b": [[[0.0, 1.0]], [[3.0, 4.0]]]} -def test_list_count_match() -> None: +def test_list_count_matches_deprecated() -> None: + with pytest.deprecated_call(): + # Your test code here + assert pl.DataFrame( + {"listcol": [[], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]} + ).select(pl.col("listcol").list.count_match(2).alias("number_of_twos")).to_dict( + False + ) == { + "number_of_twos": [0, 0, 2, 1, 0] + } + + +def test_list_count_matches() -> None: assert pl.DataFrame({"listcol": [[], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]}).select( - pl.col("listcol").list.count_match(2).alias("number_of_twos") + pl.col("listcol").list.count_matches(2).alias("number_of_twos") ).to_dict(False) == {"number_of_twos": [0, 0, 2, 1, 0]} assert pl.DataFrame({"listcol": [[], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]}).select( - pl.col("listcol").list.count_match(2).alias("number_of_twos") + pl.col("listcol").list.count_matches(2).alias("number_of_twos") ).to_dict(False) == {"number_of_twos": [0, 0, 2, 1, 0]} diff --git a/py-polars/tests/unit/namespaces/test_list.py b/py-polars/tests/unit/namespaces/test_list.py index 11f6b3d3fc1e..d84184fdf6d1 100644 --- a/py-polars/tests/unit/namespaces/test_list.py +++ b/py-polars/tests/unit/namespaces/test_list.py @@ -450,8 +450,13 @@ def test_list_tail_underflow_9087() -> None: def test_list_count_match_boolean_nulls_9141() -> None: a = pl.DataFrame({"a": [[True, None, False]]}) + assert a.select(pl.col("a").list.count_matches(True))["a"].to_list() == [1] - assert a.select(pl.col("a").list.count_match(True))["a"].to_list() == [1] + +def test_list_count_matches_boolean_nulls_9141() -> None: + a = pl.DataFrame({"a": [[True, None, False]]}) + + assert a.select(pl.col("a").list.count_matches(True))["a"].to_list() == [1] def test_list_set_operations() -> None: diff --git a/py-polars/tests/unit/namespaces/test_string.py b/py-polars/tests/unit/namespaces/test_string.py index a4309bb6c683..a9d20da1a6f3 100644 --- a/py-polars/tests/unit/namespaces/test_string.py +++ b/py-polars/tests/unit/namespaces/test_string.py @@ -550,15 +550,27 @@ def test_extract_all_count() -> None: assert ( df.select( pl.col("foo").str.extract_all(r"a").alias("extract"), - pl.col("foo").str.count_match(r"a").alias("count"), + pl.col("foo").str.count_matches(r"a").alias("count"), ).to_dict(False) ) == {"extract": [["a", "a"], ["a"], [], None], "count": [2, 1, 0, None]} assert df["foo"].str.extract_all(r"a").dtype == pl.List - assert df["foo"].str.count_match(r"a").dtype == pl.UInt32 + assert df["foo"].str.count_matches(r"a").dtype == pl.UInt32 -def test_count_match_many() -> None: +def test_count_matches_deprecated_count() -> None: + df = pl.DataFrame({"foo": ["123 bla 45 asd", "xaz 678 910t", "boo", None]}) + + with pytest.deprecated_call(): + expr = pl.col("foo").str.count_match(r"a") + + result = df.select(expr) + + expected = pl.Series("foo", [2, 1, 0, None], dtype=pl.UInt32).to_frame() + assert_frame_equal(result, expected) + + +def test_count_matches_many() -> None: df = pl.DataFrame( { "foo": ["123 bla 45 asd", "xyz 678 910t", None, "boo"], @@ -566,17 +578,17 @@ def test_count_match_many() -> None: } ) assert ( - df.select(pl.col("foo").str.count_match(pl.col("bar")).alias("count")).to_dict( - False - ) + df.select( + pl.col("foo").str.count_matches(pl.col("bar")).alias("count") + ).to_dict(False) ) == {"count": [5, 4, None, None]} - assert df["foo"].str.count_match(df["bar"]).dtype == pl.UInt32 + assert df["foo"].str.count_matches(df["bar"]).dtype == pl.UInt32 # Test broadcast. broad = df.select( - pl.col("foo").str.count_match(pl.col("bar").first()).alias("count"), - pl.col("foo").str.count_match(pl.col("bar").last()).alias("count_null"), + pl.col("foo").str.count_matches(pl.col("bar").first()).alias("count"), + pl.col("foo").str.count_matches(pl.col("bar").last()).alias("count_null"), ) assert broad.to_dict(False) == { "count": [5, 6, None, 0],