Skip to content

Commit

Permalink
Add test module and new test
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 26, 2023
1 parent 0855d34 commit 210dd17
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 52 deletions.
42 changes: 0 additions & 42 deletions py-polars/tests/unit/datatypes/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,48 +161,6 @@ def test_struct_function_expansion() -> None:
assert pl.Struct(struct_schema) == s.to_frame().schema["a"]


def test_value_counts_expr() -> None:
df = pl.DataFrame(
{
"id": ["a", "b", "b", "c", "c", "c", "d", "d"],
}
)
out = (
df.select(
[
pl.col("id").value_counts(sort=True),
]
)
.to_series()
.to_list()
)
assert out == [
{"id": "c", "counts": 3},
{"id": "b", "counts": 2},
{"id": "d", "counts": 2},
{"id": "a", "counts": 1},
]

# nested value counts. Then the series needs the name
# 6200

df = pl.DataFrame({"session": [1, 1, 1], "id": [2, 2, 3]})

assert df.group_by("session").agg(
[pl.col("id").value_counts(sort=True).first()]
).to_dict(False) == {"session": [1], "id": [{"id": 2, "counts": 2}]}


def test_value_counts_logical_type() -> None:
# test logical type
df = pl.DataFrame({"a": ["b", "c"]}).with_columns(
pl.col("a").cast(pl.Categorical).alias("ac")
)
out = df.select([pl.all().value_counts()])
assert out["ac"].struct.field("ac").dtype == pl.Categorical
assert out["a"].struct.field("a").dtype == pl.Utf8


def test_nested_struct() -> None:
df = pl.DataFrame({"d": [1, 2, 3], "e": ["foo", "bar", "biz"]})
# Nest the dataframe
Expand Down
57 changes: 57 additions & 0 deletions py-polars/tests/unit/operations/test_value_counts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from __future__ import annotations

import pytest

import polars as pl
from polars.testing import assert_frame_equal


def test_value_counts() -> None:
s = pl.Series("a", [1, 2, 2, 3])
result = s.value_counts()
expected = pl.DataFrame(
{"a": [1, 2, 3], "counts": [1, 2, 1]}, schema_overrides={"counts": pl.UInt32}
)
result_sorted = result.sort("a")
assert_frame_equal(result_sorted, expected)


def test_value_counts_logical_type() -> None:
# test logical type
df = pl.DataFrame({"a": ["b", "c"]}).with_columns(
pl.col("a").cast(pl.Categorical).alias("ac")
)
out = df.select(pl.all().value_counts())
assert out["ac"].struct.field("ac").dtype == pl.Categorical
assert out["a"].struct.field("a").dtype == pl.Utf8


def test_value_counts_expr() -> None:
df = pl.DataFrame(
{
"id": ["a", "b", "b", "c", "c", "c", "d", "d"],
}
)
out = df.select(pl.col("id").value_counts(sort=True)).to_series().to_list()
assert out == [
{"id": "c", "counts": 3},
{"id": "b", "counts": 2},
{"id": "d", "counts": 2},
{"id": "a", "counts": 1},
]

# nested value counts. Then the series needs the name
# 6200

df = pl.DataFrame({"session": [1, 1, 1], "id": [2, 2, 3]})

assert df.group_by("session").agg(
pl.col("id").value_counts(sort=True).first()
).to_dict(False) == {"session": [1], "id": [{"id": 2, "counts": 2}]}


def test_value_counts_duplicate_name() -> None:
s = pl.Series("counts", [1])

with pytest.raises(pl.DuplicateError, match="counts"):
s.value_counts()
10 changes: 0 additions & 10 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1695,16 +1695,6 @@ def test_to_dummies() -> None:
assert_frame_equal(result, expected)


def test_value_counts() -> None:
s = pl.Series("a", [1, 2, 2, 3])
result = s.value_counts()
expected = pl.DataFrame(
{"a": [1, 2, 3], "counts": [1, 2, 1]}, schema_overrides={"counts": pl.UInt32}
)
result_sorted = result.sort("a")
assert_frame_equal(result_sorted, expected)


def test_chunk_lengths() -> None:
s = pl.Series("a", [1, 2, 2, 3])
# this is a Series with one chunk, of length 4
Expand Down

0 comments on commit 210dd17

Please sign in to comment.