diff --git a/polars/polars-core/src/chunked_array/ops/unique/mod.rs b/polars/polars-core/src/chunked_array/ops/unique/mod.rs index 8f34e149a04a..b4c3224e687e 100644 --- a/polars/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/polars/polars-core/src/chunked_array/ops/unique/mod.rs @@ -78,7 +78,38 @@ where } #[cfg(feature = "mode")] -#[allow(clippy::needless_collect)] +fn mode_indices(groups: GroupsProxy) -> Vec { + match groups { + GroupsProxy::Idx(groups) => { + let mut groups = groups.into_iter().collect_trusted::>(); + groups.sort_unstable_by_key(|k| k.1.len()); + let last = &groups.last().unwrap(); + let max_occur = last.1.len(); + groups + .iter() + .rev() + .take_while(|v| v.1.len() == max_occur) + .map(|v| v.0) + .collect() + } + GroupsProxy::Slice { groups, .. } => { + let last = groups.last().unwrap(); + let max_occur = last[1]; + + groups + .iter() + .rev() + .take_while(|v| { + let len = v[1]; + len == max_occur + }) + .map(|v| v[0]) + .collect() + } + } +} + +#[cfg(feature = "mode")] fn mode(ca: &ChunkedArray) -> ChunkedArray where ChunkedArray: IntoGroupsProxy + ChunkTake, @@ -86,28 +117,12 @@ where if ca.is_empty() { return ca.clone(); } - let mut groups = ca - .group_tuples(true, false) - .unwrap() - .into_idx() - .into_iter() - .collect_trusted::>(); - groups.sort_unstable_by_key(|k| k.1.len()); - let last = &groups.last().unwrap(); - - let max_occur = last.1.len(); - - // collect until we don't take with trusted len anymore - // TODO! take directly from iter, but first remove standard trusted-length collect. - let idx = groups - .iter() - .rev() - .take_while(|v| v.1.len() == max_occur) - .map(|v| v.0) - .collect::>(); + let groups = ca.group_tuples(true, false).unwrap(); + let idx = mode_indices(groups); + // Safety: // group indices are in bounds - unsafe { ca.take_unchecked(idx.into_iter().map(|i| i as usize).into()) } + unsafe { ca.take_unchecked(idx.as_slice().into()) } } macro_rules! arg_unique_ca { diff --git a/polars/polars-core/src/frame/groupby/proxy.rs b/polars/polars-core/src/frame/groupby/proxy.rs index 3b30c47327c4..3ddf64691c04 100644 --- a/polars/polars-core/src/frame/groupby/proxy.rs +++ b/polars/polars-core/src/frame/groupby/proxy.rs @@ -312,7 +312,7 @@ impl GroupsProxy { match self { GroupsProxy::Idx(groups) => groups, GroupsProxy::Slice { groups, .. } => { - eprintln!("Had to reallocate groups, missed an optimization opportunity. Please open an issue."); + polars_warn!("Had to reallocate groups, missed an optimization opportunity. Please open an issue."); groups .iter() .map(|&[first, len]| (first, (first..first + len).collect_trusted::>())) diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 618402ca41d8..52d718084c15 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -1262,6 +1262,9 @@ def test_mode() -> None: ) assert pl.Series([1.0, 2.0, 3.0, 2.0]).mode().item() == 2.0 + # sorted data + assert pl.int_range(0, 3, eager=True).mode().to_list() == [2, 1, 0] + def test_rank() -> None: s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])