Skip to content

Commit

Permalink
depr(python,rust!): renaming approx_unique as approx_n_unique (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie authored Aug 4, 2023
1 parent a566346 commit 6874e2f
Show file tree
Hide file tree
Showing 16 changed files with 59 additions and 45 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! # HyperLogLogPlus
//!
//! `hyperloglogplus` module contains implementation of HyperLogLogPlus
//! algorithm for cardinality estimation so that [`crate::series::approx_unique`] function can
//! algorithm for cardinality estimation so that [`crate::series::approx_n_unique`] function can
//! be efficiently implemented.
//!
//! This module borrows code from [arrow-datafusion](https://github.com/apache/arrow-datafusion/blob/93771052c5ac31f2cf22b8c25bf938656afe1047/datafusion/physical-expr/src/aggregate/hyperloglog.rs).
Expand Down
20 changes: 10 additions & 10 deletions crates/polars-ops/src/series/ops/approx_unique.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use polars_core::with_match_physical_integer_polars_type;
#[cfg(feature = "approx_unique")]
use crate::series::HyperLogLog;

fn approx_unique_ca<'a, T>(ca: &'a ChunkedArray<T>) -> PolarsResult<Series>
fn approx_n_unique_ca<'a, T>(ca: &'a ChunkedArray<T>) -> PolarsResult<Series>
where
T: PolarsDataType,
&'a ChunkedArray<T>: IntoIterator,
Expand All @@ -23,22 +23,22 @@ fn dispatcher(s: &Series) -> PolarsResult<Series> {
let s = s.to_physical_repr();
use DataType::*;
match s.dtype() {
Boolean => s.bool().and_then(approx_unique_ca),
Binary => s.binary().and_then(approx_unique_ca),
Boolean => s.bool().and_then(approx_n_unique_ca),
Binary => s.binary().and_then(approx_n_unique_ca),
Utf8 => {
let s = s.cast(&Binary).unwrap();
let ca = s.binary().unwrap();
approx_unique_ca(ca)
approx_n_unique_ca(ca)
}
Float32 => approx_unique_ca(&s.bit_repr_small()),
Float64 => approx_unique_ca(&s.bit_repr_large()),
Float32 => approx_n_unique_ca(&s.bit_repr_small()),
Float64 => approx_n_unique_ca(&s.bit_repr_large()),
dt if dt.is_numeric() => {
with_match_physical_integer_polars_type!(s.dtype(), |$T| {
let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
approx_unique_ca(ca)
approx_n_unique_ca(ca)
})
}
dt => polars_bail!(opq = approx_unique, dt),
dt => polars_bail!(opq = approx_n_unique, dt),
}
}

Expand All @@ -57,7 +57,7 @@ fn dispatcher(s: &Series) -> PolarsResult<Series> {
///
/// let s = Series::new("s", [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]);
///
/// let approx_count = approx_unique(&s).unwrap();
/// let approx_count = approx_n_unique(&s).unwrap();
/// dbg!(approx_count);
/// # }
/// ```
Expand All @@ -69,6 +69,6 @@ fn dispatcher(s: &Series) -> PolarsResult<Series> {
/// 3
/// ]
/// ```
pub fn approx_unique(s: &Series) -> PolarsResult<Series> {
pub fn approx_n_unique(s: &Series) -> PolarsResult<Series> {
dispatcher(s)
}
4 changes: 2 additions & 2 deletions crates/polars-plan/src/dsl/function_expr/dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ pub(super) fn reverse(s: &Series) -> PolarsResult<Series> {
}

#[cfg(feature = "approx_unique")]
pub(super) fn approx_unique(s: &Series) -> PolarsResult<Series> {
polars_ops::prelude::approx_unique(s)
pub(super) fn approx_n_unique(s: &Series) -> PolarsResult<Series> {
polars_ops::prelude::approx_n_unique(s)
}

#[cfg(feature = "diff")]
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ pub enum FunctionExpr {
Reverse,
Boolean(BooleanFunction),
#[cfg(feature = "approx_unique")]
ApproxUnique,
ApproxNUnique,
#[cfg(feature = "dtype-categorical")]
Categorical(CategoricalFunction),
Coalesce,
Expand Down Expand Up @@ -292,7 +292,7 @@ impl Display for FunctionExpr {
Reverse => "reverse",
Boolean(func) => return write!(f, "{func}"),
#[cfg(feature = "approx_unique")]
ApproxUnique => "approx_unique",
ApproxNUnique => "approx_n_unique",
#[cfg(feature = "dtype-categorical")]
Categorical(func) => return write!(f, "{func}"),
Coalesce => "coalesce",
Expand Down Expand Up @@ -544,7 +544,7 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
Reverse => map!(dispatch::reverse),
Boolean(func) => func.into(),
#[cfg(feature = "approx_unique")]
ApproxUnique => map!(dispatch::approx_unique),
ApproxNUnique => map!(dispatch::approx_n_unique),
#[cfg(feature = "dtype-categorical")]
Categorical(func) => func.into(),
Coalesce => map_as_slice!(fill_null::coalesce),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ impl FunctionExpr {
Cummin { .. } => mapper.with_same_dtype(),
Cummax { .. } => mapper.with_same_dtype(),
#[cfg(feature = "approx_unique")]
ApproxUnique => mapper.with_dtype(IDX_DTYPE),
ApproxNUnique => mapper.with_dtype(IDX_DTYPE),
#[cfg(feature = "diff")]
Diff(_, _) => mapper.map_dtype(|dt| match dt {
#[cfg(feature = "dtype-datetime")]
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1003,8 +1003,8 @@ impl Expr {

/// Get the approximate count of unique values.
#[cfg(feature = "approx_unique")]
pub fn approx_unique(self) -> Self {
self.apply_private(FunctionExpr::ApproxUnique)
pub fn approx_n_unique(self) -> Self {
self.apply_private(FunctionExpr::ApproxNUnique)
.with_function_options(|mut options| {
options.auto_explode = true;
options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Computation
:toctree: api/

Expr.abs
Expr.approx_unique
Expr.approx_n_unique
Expr.arccos
Expr.arccosh
Expr.arcsin
Expand Down
4 changes: 2 additions & 2 deletions py-polars/docs/source/reference/expressions/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ These functions are available from the polars module root and can be used as exp
any
any_horizontal
apply
approx_unique
approx_n_unique
arange
arctan2
arctan2d
Expand Down Expand Up @@ -89,7 +89,7 @@ These functions are available from the polars module root and can be used as exp
Expr.all
Expr.any
Expr.apply
Expr.approx_unique
Expr.approx_n_unique
Expr.count
Expr.cumsum
Expr.exclude
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
any,
any_horizontal,
apply,
approx_unique,
approx_n_unique,
arange,
arctan2,
arctan2d,
Expand Down Expand Up @@ -308,6 +308,7 @@
"sum_horizontal",
# polars.functions.lazy
"apply",
"approx_n_unique",
"arange",
"arctan2",
"arctan2d",
Expand Down Expand Up @@ -342,7 +343,6 @@
"mean",
"median",
"n_unique",
"approx_unique",
"quantile",
"reduce",
"rolling_corr",
Expand Down
5 changes: 5 additions & 0 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
from polars.utils._wrap import wrap_expr, wrap_ldf, wrap_s
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.deprecation import (
deprecate_renamed_methods,
deprecate_renamed_parameter,
issue_deprecation_warning,
)
Expand Down Expand Up @@ -181,6 +182,10 @@
P = ParamSpec("P")


@deprecate_renamed_methods(
mapping={"approx_unique": "approx_n_unique"},
versions={"approx_unique": "0.18.12"},
)
class DataFrame:
"""
Two-dimensional data structure representing data as a table with rows and columns.
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2879,16 +2879,16 @@ def n_unique(self) -> Self:
"""
return self._from_pyexpr(self._pyexpr.n_unique())

def approx_unique(self) -> Self:
def approx_n_unique(self) -> Self:
"""
Approx count unique values.
Approximate count of unique values.
This is done using the HyperLogLog++ algorithm for cardinality estimation.
Examples
--------
>>> df = pl.DataFrame({"a": [1, 1, 2]})
>>> df.select(pl.col("a").approx_unique())
>>> df.select(pl.col("a").approx_n_unique())
shape: (1, 1)
┌─────┐
│ a │
Expand All @@ -2899,7 +2899,7 @@ def approx_unique(self) -> Self:
└─────┘
"""
return self._from_pyexpr(self._pyexpr.approx_unique())
return self._from_pyexpr(self._pyexpr.approx_n_unique())

def null_count(self) -> Self:
"""
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from polars.functions.eager import align_frames, concat
from polars.functions.lazy import (
apply,
approx_unique,
approx_n_unique,
arctan2,
arctan2d,
arg_sort_by,
Expand Down Expand Up @@ -91,7 +91,7 @@
"sum_horizontal",
# polars.functions.eager
"align_frames",
"approx_unique",
"approx_n_unique",
"arg_where",
"concat",
"date_range",
Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/functions/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,9 +590,9 @@ def n_unique(column: str | Series) -> Expr | int:
return col(column).n_unique()


def approx_unique(column: str | Expr) -> Expr:
def approx_n_unique(column: str | Expr) -> Expr:
"""
Approx count unique values.
Approximate count of unique values.
This is done using the HyperLogLog++ algorithm for cardinality estimation.
Expand All @@ -604,7 +604,7 @@ def approx_unique(column: str | Expr) -> Expr:
Examples
--------
>>> df = pl.DataFrame({"a": [1, 8, 1], "b": [4, 5, 2], "c": ["foo", "bar", "foo"]})
>>> df.select(pl.approx_unique("a"))
>>> df.select(pl.approx_n_unique("a"))
shape: (1, 1)
┌─────┐
│ a │
Expand All @@ -616,8 +616,8 @@ def approx_unique(column: str | Expr) -> Expr:
"""
if isinstance(column, pl.Expr):
return column.approx_unique()
return col(column).approx_unique()
return column.approx_n_unique()
return col(column).approx_n_unique()


@overload
Expand Down
13 changes: 9 additions & 4 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from polars.utils._wrap import wrap_df, wrap_expr
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.deprecation import (
deprecate_renamed_methods,
deprecate_renamed_parameter,
issue_deprecation_warning,
)
Expand Down Expand Up @@ -110,6 +111,10 @@
P = ParamSpec("P")


@deprecate_renamed_methods(
mapping={"approx_unique": "approx_n_unique"},
versions={"approx_unique": "0.18.12"},
)
class LazyFrame:
"""
Representation of a Lazy computation graph/query against a DataFrame.
Expand Down Expand Up @@ -3798,9 +3803,9 @@ def first(self) -> Self:
"""
return self.slice(0, 1)

def approx_unique(self) -> Self:
def approx_n_unique(self) -> Self:
"""
Approx count unique values.
Approximate count of unique values.
This is done using the HyperLogLog++ algorithm for cardinality estimation.
Expand All @@ -3812,7 +3817,7 @@ def approx_unique(self) -> Self:
... "b": [1, 2, 1, 1],
... }
... )
>>> lf.approx_unique().collect()
>>> lf.approx_n_unique().collect()
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
Expand All @@ -3823,7 +3828,7 @@ def approx_unique(self) -> Self:
└─────┴─────┘
"""
return self.select(F.all().approx_unique())
return self.select(F.all().approx_n_unique())

def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
"""
Expand Down
8 changes: 6 additions & 2 deletions py-polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,21 +372,25 @@ impl PyExpr {
fn filter(&self, predicate: Self) -> Self {
self.clone().inner.filter(predicate.inner).into()
}

fn reverse(&self) -> Self {
self.clone().inner.reverse().into()
}

fn std(&self, ddof: u8) -> Self {
self.clone().inner.std(ddof).into()
}

fn var(&self, ddof: u8) -> Self {
self.clone().inner.var(ddof).into()
}

fn is_unique(&self) -> Self {
self.clone().inner.is_unique().into()
}

fn approx_unique(&self) -> Self {
self.clone().inner.approx_unique().into()
fn approx_n_unique(&self) -> Self {
self.clone().inner.approx_n_unique().into()
}

fn is_first(&self) -> Self {
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,21 +332,21 @@ def test_abs_logical_type() -> None:
assert s.abs().to_list() == [timedelta(hours=1), timedelta(hours=1)]


def test_approx_unique() -> None:
def test_approx_n_unique() -> None:
df1 = pl.DataFrame({"a": [None, 1, 2], "b": [None, 2, 1]})

assert_frame_equal(
df1.select(pl.approx_unique("b")),
df1.select(pl.approx_n_unique("b")),
pl.DataFrame({"b": pl.Series(values=[3], dtype=pl.UInt32)}),
)

assert_frame_equal(
df1.select(pl.approx_unique(pl.col("b"))),
df1.select(pl.approx_n_unique(pl.col("b"))),
pl.DataFrame({"b": pl.Series(values=[3], dtype=pl.UInt32)}),
)

assert_frame_equal(
df1.select(pl.col("b").approx_unique()),
df1.select(pl.col("b").approx_n_unique()),
pl.DataFrame({"b": pl.Series(values=[3], dtype=pl.UInt32)}),
)

Expand Down

0 comments on commit 6874e2f

Please sign in to comment.