Skip to content

Commit

Permalink
feat(rust, python): is_first also supports numeric list type.
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa committed Aug 25, 2023
1 parent ecb819a commit 46ef41b
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 1 deletion.
1 change: 1 addition & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ string_from_radix = ["polars-core/strings"]
extract_jsonpath = ["serde_json", "jsonpath_lib", "polars-json"]
log = []
hash = []
group_by_list = ["polars-core/group_by_list"]
rolling_window = ["polars-core/rolling_window"]
moment = ["polars-core/moment"]
search_sorted = []
Expand Down
20 changes: 20 additions & 0 deletions crates/polars-ops/src/series/ops/is_first.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,21 @@ fn is_first_struct(s: &Series) -> PolarsResult<BooleanChunked> {
Ok(BooleanChunked::with_chunk(s.name(), arr))
}

fn is_first_list(ca: &ListChunked) -> PolarsResult<BooleanChunked> {
let groups = ca.group_tuples(true, false)?;
let first = groups.take_group_firsts();
let mut out = MutableBitmap::with_capacity(ca.len());
out.extend_constant(ca.len(), false);

for idx in first {
// Group tuples are always in bounds
unsafe { out.set_unchecked(idx as usize, true) }
}

let arr = BooleanArray::new(ArrowDataType::Boolean, out.into(), None);
Ok(BooleanChunked::with_chunk(ca.name(), arr))
}

pub fn is_first(s: &Series) -> PolarsResult<BooleanChunked> {
let s = s.to_physical_repr();

Expand Down Expand Up @@ -97,6 +112,11 @@ pub fn is_first(s: &Series) -> PolarsResult<BooleanChunked> {
},
#[cfg(feature = "dtype-struct")]
Struct(_) => return is_first_struct(&s),
#[cfg(feature = "group_by_list")]
List(inner) if inner.is_numeric() => {
let ca = s.list().unwrap();
return is_first_list(ca);
},
dt => polars_bail!(opq = is_first, dt),
};
Ok(out)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ extract_jsonpath = [
]
string_encoding = ["polars-ops/string_encoding", "polars-core/strings"]
binary_encoding = ["polars-ops/binary_encoding"]
group_by_list = ["polars-core/group_by_list"]
group_by_list = ["polars-core/group_by_list", "polars-ops/group_by_list"]
lazy_regex = ["polars-lazy/regex"]
cum_agg = ["polars-core/cum_agg", "polars-core/cum_agg"]
rolling_window = ["polars-core/rolling_window", "polars-lazy/rolling_window", "polars-time/rolling_window"]
Expand Down
5 changes: 5 additions & 0 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@ def test_is_first() -> None:
"a": [True, True, True, False, True, False, False]
}

ldf = pl.LazyFrame({"a": [[1, 2], [3], [1, 2], [4, 5], [4, 5]]})
assert ldf.select(pl.col("a").is_first()).collect().to_dict(False) == {
"a": [True, True, False, True, False]
}


def test_is_duplicated() -> None:
ldf = pl.LazyFrame({"a": [4, 1, 4]}).select(pl.col("a").is_duplicated())
Expand Down

0 comments on commit 46ef41b

Please sign in to comment.