Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Sep 15, 2023
1 parent 4ddf7f1 commit 2927081
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 81 deletions.
2 changes: 2 additions & 0 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9262,6 +9262,7 @@ def rolling_apply(
function, window_size, weights, min_periods, center=center
)

@deprecate_renamed_function("is_first_distinct", version="0.19.3")
def is_first(self) -> Self:
"""
Return a boolean mask indicating the first occurrence of each distinct value.
Expand All @@ -9277,6 +9278,7 @@ def is_first(self) -> Self:
"""
return self.is_first_distinct()

@deprecate_renamed_function("is_last_distinct", version="0.19.3")
def is_last(self) -> Self:
"""
Return a boolean mask indicating the last occurrence of each distinct value.
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6732,6 +6732,7 @@ def rolling_apply(
"""

@deprecate_renamed_function("is_first_distinct", version="0.19.3")
def is_first(self) -> Series:
"""
Return a boolean mask indicating the first occurrence of each distinct value.
Expand All @@ -6746,6 +6747,7 @@ def is_first(self) -> Series:
"""

@deprecate_renamed_function("is_last_distinct", version="0.19.3")
def is_last(self) -> Series:
"""
Return a boolean mask indicating the last occurrence of each distinct value.
Expand Down
105 changes: 105 additions & 0 deletions py-polars/tests/unit/operations/test_is_first_last_distinct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal, assert_series_equal


def test_is_first_distinct() -> None:
lf = pl.LazyFrame({"a": [4, 1, 4]})
result = lf.select(pl.col("a").is_first_distinct()).collect()["a"]
expected = pl.Series("a", [True, True, False])
assert_series_equal(result, expected)


def test_is_first_distinct_struct() -> None:
lf = pl.LazyFrame({"a": [1, 2, 3, 2, None, 2, 1], "b": [0, 2, 3, 2, None, 2, 0]})
result = lf.select(pl.struct("a", "b").is_first_distinct())
expected = pl.LazyFrame({"a": [True, True, True, False, True, False, False]})
assert_frame_equal(result, expected)


def test_is_first_distinct_list() -> None:
lf = pl.LazyFrame({"a": [[1, 2], [3], [1, 2], [4, 5], [4, 5]]})
result = lf.select(pl.col("a").is_first_distinct())
expected = pl.LazyFrame({"a": [True, True, False, True, False]})
assert_frame_equal(result, expected)


def test_is_first_distinct_various() -> None:
# numeric
s = pl.Series([1, 1, None, 2, None, 3, 3])
expected = [True, False, True, True, False, True, False]
assert s.is_first_distinct().to_list() == expected
# str
s = pl.Series(["x", "x", None, "y", None, "z", "z"])
expected = [True, False, True, True, False, True, False]
assert s.is_first_distinct().to_list() == expected
# boolean
s = pl.Series([True, True, None, False, None, False, False])
expected = [True, False, True, True, False, False, False]
assert s.is_first_distinct().to_list() == expected
# struct
s = pl.Series(
[
{"x": 1, "y": 2},
{"x": 1, "y": 2},
None,
{"x": 2, "y": 1},
None,
{"x": 3, "y": 2},
{"x": 3, "y": 2},
]
)
expected = [True, False, True, True, False, True, False]
assert s.is_first_distinct().to_list() == expected
# list
s = pl.Series([[1, 2], [1, 2], None, [2, 3], None, [3, 4], [3, 4]])
expected = [True, False, True, True, False, True, False]
assert s.is_first_distinct().to_list() == expected


def test_is_last_distinct() -> None:
# numeric
s = pl.Series([1, 1, None, 2, None, 3, 3])
expected = [False, True, False, True, True, False, True]
assert s.is_last_distinct().to_list() == expected
# str
s = pl.Series(["x", "x", None, "y", None, "z", "z"])
expected = [False, True, False, True, True, False, True]
assert s.is_last_distinct().to_list() == expected
# boolean
s = pl.Series([True, True, None, False, None, False, False])
expected = [False, True, False, False, True, False, True]
assert s.is_last_distinct().to_list() == expected
# struct
s = pl.Series(
[
{"x": 1, "y": 2},
{"x": 1, "y": 2},
None,
{"x": 2, "y": 1},
None,
{"x": 3, "y": 2},
{"x": 3, "y": 2},
]
)
expected = [False, True, False, True, True, False, True]
assert s.is_last_distinct().to_list() == expected
# list
s = pl.Series([[1, 2], [1, 2], None, [2, 3], None, [3, 4], [3, 4]])
expected = [False, True, False, True, True, False, True]
assert s.is_last_distinct().to_list() == expected


@pytest.mark.parametrize("dtypes", [pl.Int32, pl.Utf8, pl.Boolean, pl.List(pl.Int32)])
def test_is_first_last_distinct_all_null(dtypes: pl.PolarsDataType) -> None:
s = pl.Series([None, None, None], dtype=dtypes)
assert s.is_first_distinct().to_list() == [True, False, False]
assert s.is_last_distinct().to_list() == [False, False, True]


def test_is_first_last_deprecated() -> None:
with pytest.deprecated_call():
pl.col("a").is_first()
with pytest.deprecated_call():
pl.col("a").is_last()
63 changes: 0 additions & 63 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1220,69 +1220,6 @@ def test_apply_list_out() -> None:
assert out[2].to_list() == [2, 2]


def test_is_first() -> None:
# numeric
s = pl.Series([1, 1, None, 2, None, 3, 3])
assert s.is_first().to_list() == [True, False, True, True, False, True, False]
# str
s = pl.Series(["x", "x", None, "y", None, "z", "z"])
assert s.is_first().to_list() == [True, False, True, True, False, True, False]
# boolean
s = pl.Series([True, True, None, False, None, False, False])
assert s.is_first().to_list() == [True, False, True, True, False, False, False]
# struct
s = pl.Series(
[
{"x": 1, "y": 2},
{"x": 1, "y": 2},
None,
{"x": 2, "y": 1},
None,
{"x": 3, "y": 2},
{"x": 3, "y": 2},
]
)
assert s.is_first().to_list() == [True, False, True, True, False, True, False]
# list
s = pl.Series([[1, 2], [1, 2], None, [2, 3], None, [3, 4], [3, 4]])
assert s.is_first().to_list() == [True, False, True, True, False, True, False]


def test_is_last() -> None:
# numeric
s = pl.Series([1, 1, None, 2, None, 3, 3])
assert s.is_last().to_list() == [False, True, False, True, True, False, True]
# str
s = pl.Series(["x", "x", None, "y", None, "z", "z"])
assert s.is_last().to_list() == [False, True, False, True, True, False, True]
# boolean
s = pl.Series([True, True, None, False, None, False, False])
assert s.is_last().to_list() == [False, True, False, False, True, False, True]
# struct
s = pl.Series(
[
{"x": 1, "y": 2},
{"x": 1, "y": 2},
None,
{"x": 2, "y": 1},
None,
{"x": 3, "y": 2},
{"x": 3, "y": 2},
]
)
assert s.is_last().to_list() == [False, True, False, True, True, False, True]
# list
s = pl.Series([[1, 2], [1, 2], None, [2, 3], None, [3, 4], [3, 4]])
assert s.is_last().to_list() == [False, True, False, True, True, False, True]


@pytest.mark.parametrize("dtypes", [pl.Int32, pl.Utf8, pl.Boolean, pl.List(pl.Int32)])
def test_is_first_last_all_null(dtypes: pl.PolarsDataType) -> None:
s = pl.Series([None, None, None], dtype=dtypes)
assert s.is_first().to_list() == [True, False, False]
assert s.is_last().to_list() == [False, False, True]


def test_reinterpret() -> None:
s = pl.Series("a", [1, 1, 2], dtype=pl.UInt64)
assert s.reinterpret(signed=True).dtype == pl.Int64
Expand Down
18 changes: 0 additions & 18 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,24 +287,6 @@ def test_is_unique() -> None:
assert_series_equal(result, pl.Series("a", [False, True, False]))


def test_is_first() -> None:
ldf = pl.LazyFrame({"a": [4, 1, 4]})
result = ldf.select(pl.col("a").is_first()).collect()["a"]
assert_series_equal(result, pl.Series("a", [True, True, False]))

# struct
ldf = pl.LazyFrame({"a": [1, 2, 3, 2, None, 2, 1], "b": [0, 2, 3, 2, None, 2, 0]})

assert ldf.select(pl.struct(["a", "b"]).is_first()).collect().to_dict(False) == {
"a": [True, True, True, False, True, False, False]
}

ldf = pl.LazyFrame({"a": [[1, 2], [3], [1, 2], [4, 5], [4, 5]]})
assert ldf.select(pl.col("a").is_first()).collect().to_dict(False) == {
"a": [True, True, False, True, False]
}


def test_is_duplicated() -> None:
ldf = pl.LazyFrame({"a": [4, 1, 4]}).select(pl.col("a").is_duplicated())
assert_series_equal(ldf.collect()["a"], pl.Series("a", [True, False, True]))
Expand Down

0 comments on commit 2927081

Please sign in to comment.