From b5c3c51d400dc929fdf5e09fc246466aa125a6eb Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 1 Jul 2024 14:36:05 +0100 Subject: [PATCH] chore: add `constructor` fixture for tests (#373) --- narwhals/_pandas_like/series.py | 37 +++++++++---- narwhals/_pandas_like/utils.py | 8 +++ narwhals/dataframe.py | 10 ++-- narwhals/expression.py | 2 +- narwhals/series.py | 2 +- tests/conftest.py | 50 +++++++++++++++++ tests/expr/any_all_test.py | 22 ++++++++ tests/expr/cat/get_categories_test.py | 5 -- tests/expr/cum_sum_test.py | 10 +--- tests/expr/diff_test.py | 5 -- tests/expr/fill_null_test.py | 5 -- tests/expr/filter_test.py | 5 -- tests/expr/is_between_test.py | 3 -- tests/expr/is_duplicated_test.py | 5 -- tests/expr/is_first_distinct_test.py | 5 -- tests/expr/is_last_distinct_test.py | 5 -- tests/expr/is_unique_test.py | 5 -- tests/expr/len_test.py | 5 -- tests/expr/n_unique_test.py | 5 -- tests/expr/null_count_test.py | 5 -- tests/expr/over_test.py | 8 +-- tests/expr/round_test.py | 3 -- tests/expr/sample_test.py | 5 -- tests/expr/shift_test.py | 5 -- tests/expr/str/head_test.py | 5 -- tests/expr/str/slice_test.py | 3 -- tests/expr/str/tail_test.py | 5 -- tests/expr/test_dt.py | 66 ++++++++++++++++------- tests/frame/drop_nulls_test.py | 7 +-- tests/frame/len_test.py | 10 +--- tests/frame/pipe_test.py | 9 ++-- tests/frame/schema_test.py | 1 - tests/frame/shape_test.py | 10 +--- tests/frame/slice_test.py | 24 ++++----- tests/frame/test_common.py | 13 ----- tests/frame/with_columns_sequence_test.py | 4 -- tests/frame/with_row_index_test.py | 7 +-- tests/frame/write_parquet_test.py | 9 ++-- tests/series/arithmetic_test.py | 4 +- tests/test_group_by.py | 2 - tests/test_selectors.py | 11 ++-- 41 files changed, 192 insertions(+), 218 deletions(-) create mode 100644 tests/expr/any_all_test.py diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 9fbbb12f1..cd94b03f8 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -6,6 +6,7 @@ from typing import Literal from typing import Sequence +from narwhals._pandas_like.utils import int_dtype_mapper from narwhals._pandas_like.utils import native_series_from_iterable from narwhals._pandas_like.utils import reverse_translate_dtype from narwhals._pandas_like.utils import to_datetime @@ -609,14 +610,23 @@ def second(self) -> PandasSeries: ) def millisecond(self) -> PandasSeries: + if "pyarrow" in str(self._series._series.dtype): + msg = ".dt.millisecond not implemented for pyarrow-backed pandas" + raise NotImplementedError(msg) return self._series._from_series( self._series._series.dt.microsecond // 1000, ) def microsecond(self) -> PandasSeries: + if "pyarrow" in str(self._series._series.dtype): + msg = ".dt.microsecond not implemented for pyarrow-backed pandas" + raise NotImplementedError(msg) return self._series._from_series(self._series._series.dt.microsecond) def nanosecond(self) -> PandasSeries: + if "pyarrow" in str(self._series._series.dtype): + msg = ".dt.nanosecond not implemented for pyarrow-backed pandas" + raise NotImplementedError(msg) return self._series._from_series( ( (self._series._series.dt.microsecond * 1_000) @@ -639,54 +649,59 @@ def ordinal_day(self) -> PandasSeries: def total_minutes(self) -> PandasSeries: s = self._series._series.dt.total_seconds() s_sign = ( - 2 * (s > 0).astype(int) - 1 + 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 ) # this calculates the sign of each series element s_abs = s.abs() // 60 if ~s.isna().any(): - s_abs = s_abs.astype(int) + s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) return self._series._from_series(s_abs * s_sign) def total_seconds(self) -> PandasSeries: s = self._series._series.dt.total_seconds() s_sign = ( - 2 * (s > 0).astype(int) - 1 + 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 ) # this calculates the sign of each series element s_abs = s.abs() // 1 if ~s.isna().any(): - s_abs = s_abs.astype(int) + s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) return self._series._from_series(s_abs * s_sign) def total_milliseconds(self) -> PandasSeries: s = self._series._series.dt.total_seconds() * 1e3 s_sign = ( - 2 * (s > 0).astype(int) - 1 + 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 ) # this calculates the sign of each series element s_abs = s.abs() // 1 if ~s.isna().any(): - s_abs = s_abs.astype(int) + s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) return self._series._from_series(s_abs * s_sign) def total_microseconds(self) -> PandasSeries: s = self._series._series.dt.total_seconds() * 1e6 s_sign = ( - 2 * (s > 0).astype(int) - 1 + 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 ) # this calculates the sign of each series element s_abs = s.abs() // 1 if ~s.isna().any(): - s_abs = s_abs.astype(int) + s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) return self._series._from_series(s_abs * s_sign) def total_nanoseconds(self) -> PandasSeries: s = self._series._series.dt.total_seconds() * 1e9 s_sign = ( - 2 * (s > 0).astype(int) - 1 + 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1 ) # this calculates the sign of each series element s_abs = s.abs() // 1 if ~s.isna().any(): - s_abs = s_abs.astype(int) + s_abs = s_abs.astype(int_dtype_mapper(s.dtype)) return self._series._from_series(s_abs * s_sign) def to_string(self, format: str) -> PandasSeries: # noqa: A002 # Polars' parser treats `'%.f'` as pandas does `'.%f'` - format = format.replace("%.f", ".%f") + # PyArrow interprets `'%S'` as "seconds, plus fractional seconds" + # and doesn't support `%f` + if "pyarrow" not in str(self._series._series.dtype): + format = format.replace("%S%.f", "%S.%f") + else: + format = format.replace("%S.%f", "%S").replace("%S%.f", "%S") return self._series._from_series(self._series._series.dt.strftime(format)) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 8862d0917..1ff880e57 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -600,3 +600,11 @@ def to_datetime(implementation: str) -> Any: if implementation == "cudf": return get_cudf().to_datetime raise AssertionError + + +def int_dtype_mapper(dtype: Any) -> str: + if "pyarrow" in str(dtype): + return "Int64[pyarrow]" + if str(dtype).lower() != str(dtype): # pragma: no cover + return "Int64" + return "int64" diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 3a9b3740b..6e366dd54 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -501,7 +501,7 @@ def drop_nulls(self) -> Self: >>> import polars as pl >>> import pandas as pd >>> import narwhals as nw - >>> data = {"a": [1.0, 2.0, None], "ba": [1, None, 2.0]} + >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} >>> df_pd = pd.DataFrame(data) >>> df_pl = pl.DataFrame(data) @@ -1828,9 +1828,7 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se >>> def func(df_any): ... df = nw.from_native(df_any) - ... df = df.pipe( - ... lambda _df: _df.select([x for x in _df.columns if len(x) == 1]) - ... ) + ... df = df.pipe(lambda _df: _df.select("a")) ... return nw.to_native(df) We can then pass either pandas or Polars: @@ -1866,7 +1864,7 @@ def drop_nulls(self) -> Self: >>> import polars as pl >>> import pandas as pd >>> import narwhals as nw - >>> data = {"a": [1.0, 2.0, None], "ba": [1, None, 2.0]} + >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]} >>> df_pd = pd.DataFrame(data) >>> df_pl = pl.LazyFrame(data) @@ -1972,7 +1970,7 @@ def columns(self) -> list[str]: ... } ... ).select("foo", "bar") >>> lf = nw.from_native(lf_pl) - >>> lf.columns + >>> lf.columns # doctest: +SKIP ['foo', 'bar'] """ return super().columns diff --git a/narwhals/expression.py b/narwhals/expression.py index 96548d8a3..658dbfcc9 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -2787,7 +2787,7 @@ def to_string(self, format: str) -> Expr: # noqa: A002 Therefore, we make the following adjustments: - - for pandas-like libraries, we replace `".%f"` with `"%.f"`. + - for pandas-like libraries, we replace `"%S.%f"` with `"%S%.f"`. - for PyArrow, we replace `"%S.%f"` with `"%S"`. Workarounds like these don't make us happy, and we try to avoid them as diff --git a/narwhals/series.py b/narwhals/series.py index e6e3869a6..cc0941b3f 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2775,7 +2775,7 @@ def to_string(self, format: str) -> Series: # noqa: A002 Therefore, we make the following adjustments: - - for pandas-like libraries, we replace `".%f"` with `"%.f"`. + - for pandas-like libraries, we replace `"%S.%f"` with `"%S%.f"`. - for PyArrow, we replace `"%S.%f"` with `"%S"`. Workarounds like these don't make us happy, and we try to avoid them as diff --git a/tests/conftest.py b/tests/conftest.py index f0316826a..1f2bcbe41 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,16 @@ +import os from typing import Any +from typing import Callable +import pandas as pd +import polars as pl +import pyarrow as pa import pytest +from narwhals.dependencies import get_modin +from narwhals.typing import IntoDataFrame +from narwhals.utils import parse_version + def pytest_addoption(parser: Any) -> None: parser.addoption( @@ -21,3 +30,44 @@ def pytest_collection_modifyitems(config: Any, items: Any) -> Any: # pragma: no for item in items: if "slow" in item.keywords: item.add_marker(skip_slow) + + +def pandas_constructor(obj: Any) -> IntoDataFrame: + return pd.DataFrame(obj) # type: ignore[no-any-return] + + +def pandas_nullable_constructor(obj: Any) -> IntoDataFrame: + return pd.DataFrame(obj).convert_dtypes() # type: ignore[no-any-return] + + +def pandas_pyarrow_constructor(obj: Any) -> IntoDataFrame: + return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") # type: ignore[no-any-return] + + +def modin_constructor(obj: Any) -> IntoDataFrame: # pragma: no cover + return pd.DataFrame(obj).convert_dtypes(dtype_backend="pyarrow") # type: ignore[no-any-return] + + +def polars_constructor(obj: Any) -> IntoDataFrame: + return pl.DataFrame(obj) + + +if parse_version(pd.__version__) >= parse_version("1.5.0"): + params = [pandas_constructor, pandas_nullable_constructor, pandas_pyarrow_constructor] +else: # pragma: no cover + params = [pandas_constructor] +params.append(polars_constructor) +if os.environ.get("CI") and get_modin() is not None: # pragma: no cover + params.append(modin_constructor) + + +@pytest.fixture(params=params) +def constructor(request: Any) -> Callable[[Any], IntoDataFrame]: + return request.param # type: ignore[no-any-return] + + +# TODO: once pyarrow has complete coverage, we can remove this one, +# and just put `pa.table` into `constructor` +@pytest.fixture(params=[*params, pa.table]) +def constructor_with_pyarrow(request: Any) -> Callable[[Any], IntoDataFrame]: + return request.param # type: ignore[no-any-return] diff --git a/tests/expr/any_all_test.py b/tests/expr/any_all_test.py new file mode 100644 index 000000000..05aef9728 --- /dev/null +++ b/tests/expr/any_all_test.py @@ -0,0 +1,22 @@ +from typing import Any + +import narwhals as nw +from tests.utils import compare_dicts + + +def test_any_all(constructor: Any) -> None: + df = nw.from_native( + constructor( + { + "a": [True, False, True], + "b": [True, True, True], + "c": [False, False, False], + } + ) + ) + result = nw.to_native(df.select(nw.all().all())) + expected = {"a": [False], "b": [True], "c": [False]} + compare_dicts(result, expected) + result = nw.to_native(df.select(nw.all().any())) + expected = {"a": [True], "b": [True], "c": [False]} + compare_dicts(result, expected) diff --git a/tests/expr/cat/get_categories_test.py b/tests/expr/cat/get_categories_test.py index eb2824f39..fabce934c 100644 --- a/tests/expr/cat/get_categories_test.py +++ b/tests/expr/cat/get_categories_test.py @@ -2,17 +2,12 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts data = {"a": ["one", "two", "two"]} -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_get_categories(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) df = df.select(nw.col("a").cast(nw.Categorical)) diff --git a/tests/expr/cum_sum_test.py b/tests/expr/cum_sum_test.py index 400f0f7b5..25e3597ed 100644 --- a/tests/expr/cum_sum_test.py +++ b/tests/expr/cum_sum_test.py @@ -1,10 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pyarrow as pa -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -15,9 +10,8 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) -def test_cum_sum_simple(constructor: Any) -> None: - df = nw.from_native(constructor(data), eager_only=True) +def test_cum_sum_simple(constructor_with_pyarrow: Any) -> None: + df = nw.from_native(constructor_with_pyarrow(data), eager_only=True) result = df.select(nw.all().cum_sum()) expected = { "a": [0, 1, 3, 6, 10], diff --git a/tests/expr/diff_test.py b/tests/expr/diff_test.py index 76f389639..e84d90da0 100644 --- a/tests/expr/diff_test.py +++ b/tests/expr/diff_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -14,7 +10,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_over_single(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.with_columns(c_diff=nw.col("c").diff()).filter(nw.col("i") > 0) diff --git a/tests/expr/fill_null_test.py b/tests/expr/fill_null_test.py index f48815495..26dddc2e8 100644 --- a/tests/expr/fill_null_test.py +++ b/tests/expr/fill_null_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -14,7 +10,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_over_single(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.with_columns(nw.all().fill_null(99)) diff --git a/tests/expr/filter_test.py b/tests/expr/filter_test.py index ea15a94f1..b7a3eac8e 100644 --- a/tests/expr/filter_test.py +++ b/tests/expr/filter_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -15,7 +11,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_filter(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.col("a").filter(nw.col("i") < 2, nw.col("c") == 5)) diff --git a/tests/expr/is_between_test.py b/tests/expr/is_between_test.py index b2832663a..180e3da49 100644 --- a/tests/expr/is_between_test.py +++ b/tests/expr/is_between_test.py @@ -2,8 +2,6 @@ from typing import Any -import pandas as pd -import polars as pl import pytest import narwhals as nw @@ -14,7 +12,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize( ("closed", "expected"), [ diff --git a/tests/expr/is_duplicated_test.py b/tests/expr/is_duplicated_test.py index 52e18f08a..01bbb62e2 100644 --- a/tests/expr/is_duplicated_test.py +++ b/tests/expr/is_duplicated_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_is_duplicated(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.all().is_duplicated()) diff --git a/tests/expr/is_first_distinct_test.py b/tests/expr/is_first_distinct_test.py index 22208c402..a62717fe2 100644 --- a/tests/expr/is_first_distinct_test.py +++ b/tests/expr/is_first_distinct_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_is_first_distinct(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.all().is_first_distinct()) diff --git a/tests/expr/is_last_distinct_test.py b/tests/expr/is_last_distinct_test.py index 984e2ee00..ef128d8dd 100644 --- a/tests/expr/is_last_distinct_test.py +++ b/tests/expr/is_last_distinct_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_is_last_distinct(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.all().is_last_distinct()) diff --git a/tests/expr/is_unique_test.py b/tests/expr/is_unique_test.py index 7ba842add..8271a9bbe 100644 --- a/tests/expr/is_unique_test.py +++ b/tests/expr/is_unique_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_is_unique(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.all().is_unique()) diff --git a/tests/expr/len_test.py b/tests/expr/len_test.py index 54389fbe5..5f5de569b 100644 --- a/tests/expr/len_test.py +++ b/tests/expr/len_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -11,7 +7,6 @@ expected = {"a1": [2], "a2": [1]} -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_len(constructor: Any) -> None: df_raw = constructor(data) df = nw.from_native(df_raw).select( diff --git a/tests/expr/n_unique_test.py b/tests/expr/n_unique_test.py index 5a9f7ff23..49dc46a09 100644 --- a/tests/expr/n_unique_test.py +++ b/tests/expr/n_unique_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_over_single(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.all().n_unique()) diff --git a/tests/expr/null_count_test.py b/tests/expr/null_count_test.py index 68b615585..8b7fc0a58 100644 --- a/tests/expr/null_count_test.py +++ b/tests/expr/null_count_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_null_count(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.all().null_count()) diff --git a/tests/expr/over_test.py b/tests/expr/over_test.py index daeb47a49..872ac0a1e 100644 --- a/tests/expr/over_test.py +++ b/tests/expr/over_test.py @@ -1,7 +1,6 @@ from typing import Any import pandas as pd -import polars as pl import pytest import narwhals as nw @@ -14,7 +13,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_over_single(constructor: Any) -> None: df = nw.from_native(constructor(data)) result = df.with_columns(c_max=nw.col("c").max().over("a")) @@ -27,7 +25,6 @@ def test_over_single(constructor: Any) -> None: compare_dicts(result, expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_over_multiple(constructor: Any) -> None: df = nw.from_native(constructor(data)) result = df.with_columns(c_min=nw.col("c").min().over("a", "b")) @@ -40,8 +37,7 @@ def test_over_multiple(constructor: Any) -> None: compare_dicts(result, expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame]) -def test_over_invalid(constructor: Any) -> None: - df = nw.from_native(constructor(data)) +def test_over_invalid() -> None: + df = nw.from_native(pd.DataFrame(data)) with pytest.raises(ValueError, match="Anonymous expressions"): df.with_columns(c_min=nw.all().min().over("a", "b")) diff --git a/tests/expr/round_test.py b/tests/expr/round_test.py index a1681e437..c52ea6761 100644 --- a/tests/expr/round_test.py +++ b/tests/expr/round_test.py @@ -2,15 +2,12 @@ from typing import Any -import pandas as pd -import polars as pl import pytest import narwhals as nw from tests.utils import compare_dicts -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize("decimals", [0, 1, 2]) def test_round(constructor: Any, decimals: int) -> None: data = {"a": [1.12345, 2.56789, 3.901234]} diff --git a/tests/expr/sample_test.py b/tests/expr/sample_test.py index b1fc686f4..cb45555b8 100644 --- a/tests/expr/sample_test.py +++ b/tests/expr/sample_test.py @@ -1,13 +1,8 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_expr_sample(constructor: Any) -> None: df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy() result_shape = nw.to_native(df.select(nw.col("a").sample(n=2)).collect()).shape diff --git a/tests/expr/shift_test.py b/tests/expr/shift_test.py index 05ad14027..067770982 100644 --- a/tests/expr/shift_test.py +++ b/tests/expr/shift_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -15,7 +11,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_over_single(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.with_columns(nw.col("a", "b", "c").shift(2)).filter(nw.col("i") > 1) diff --git a/tests/expr/str/head_test.py b/tests/expr/str/head_test.py index 2e420673e..31ae94995 100644 --- a/tests/expr/str/head_test.py +++ b/tests/expr/str/head_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -12,7 +8,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_str_head(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.col("a").str.head(3)) diff --git a/tests/expr/str/slice_test.py b/tests/expr/str/slice_test.py index a97cfc815..d2c5861c4 100644 --- a/tests/expr/str/slice_test.py +++ b/tests/expr/str/slice_test.py @@ -2,8 +2,6 @@ from typing import Any -import pandas as pd -import polars as pl import pytest import narwhals as nw @@ -12,7 +10,6 @@ data = {"a": ["fdas", "edfas"]} -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize( ("offset", "length", "expected"), [(1, 2, {"a": ["da", "df"]}), (-2, None, {"a": ["as", "as"]})], diff --git a/tests/expr/str/tail_test.py b/tests/expr/str/tail_test.py index 1ab9d59ed..e9e67e5a5 100644 --- a/tests/expr/str/tail_test.py +++ b/tests/expr/str/tail_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -12,7 +8,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_str_tail(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) expected = { diff --git a/tests/expr/test_dt.py b/tests/expr/test_dt.py index efec288c4..4aa189c5d 100644 --- a/tests/expr/test_dt.py +++ b/tests/expr/test_dt.py @@ -1,5 +1,6 @@ from __future__ import annotations +import contextlib from datetime import datetime from datetime import timedelta from typing import Any @@ -36,7 +37,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize( ("attribute", "expected"), [ @@ -53,16 +53,27 @@ ], ) def test_datetime_attributes( - attribute: str, expected: list[int], constructor: Any + attribute: str, + expected: list[int], + constructor: Any, ) -> None: + if "pyarrow" in str(constructor) and attribute in { + "millisecond", + "microsecond", + "nanosecond", + }: + ctx: Any = pytest.raises(NotImplementedError, match="pyarrow") + else: + ctx = contextlib.nullcontext() df = nw.from_native(constructor(data), eager_only=True) - result = nw.to_native(df.select(getattr(nw.col("a").dt, attribute)())) - compare_dicts(result, {"a": expected}) - result = nw.to_native(df.select(getattr(df["a"].dt, attribute)())) - compare_dicts(result, {"a": expected}) + with ctx: + result = nw.to_native(df.select(getattr(nw.col("a").dt, attribute)())) + compare_dicts(result, {"a": expected}) + with ctx: + result = nw.to_native(df.select(getattr(df["a"].dt, attribute)())) + compare_dicts(result, {"a": expected}) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize( ("attribute", "expected_a", "expected_b"), [ @@ -76,7 +87,14 @@ def test_duration_attributes( expected_a: list[int], expected_b: list[int], constructor: Any, + request: Any, ) -> None: + if ( + parse_version(pd.__version__) == parse_version("2.0.3") + and "pyarrow" in str(constructor) + and attribute in ("total_minutes", "total_seconds", "total_milliseconds") + ): # pragma: no cover + request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data_timedelta), eager_only=True) result_a = nw.to_native(df.select(getattr(nw.col("a").dt, attribute)().fill_null(0))) compare_dicts(result_a, {"a": expected_a}) @@ -88,7 +106,6 @@ def test_duration_attributes( compare_dicts(result_b, {"b": expected_b}) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize( ("attribute", "expected_b", "expected_c"), [ @@ -101,7 +118,21 @@ def test_duration_micro_nano( expected_b: list[int], expected_c: list[int], constructor: Any, + request: Any, ) -> None: + if ( + parse_version(pd.__version__) == parse_version("2.0.3") + and "pyarrow" in str(constructor) + and attribute + in ( + "total_minutes", + "total_seconds", + "total_milliseconds", + "total_microseconds", + "total_nanoseconds", + ) + ): # pragma: no cover + request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data_timedelta), eager_only=True) result_b = nw.to_native(df.select(getattr(nw.col("b").dt, attribute)().fill_null(0))) compare_dicts(result_b, {"b": expected_b}) @@ -170,32 +201,30 @@ def test_total_minutes(timedeltas: timedelta) -> None: assert result_pdns == result_pl -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) @pytest.mark.parametrize( "fmt", ["%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%G-W%V-%u", "%G-W%V"] ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") -def test_dt_to_string(constructor: Any, fmt: str) -> None: - input_frame = nw.from_native(constructor(data), eager_only=True) +def test_dt_to_string(constructor_with_pyarrow: Any, fmt: str) -> None: + input_frame = nw.from_native(constructor_with_pyarrow(data), eager_only=True) input_series = input_frame["a"] expected_col = [datetime.strftime(d, fmt) for d in data["a"]] result = input_series.dt.to_string(fmt).to_list() - if constructor is pa.table: + if constructor_with_pyarrow is pa.table or "pyarrow" in str(constructor_with_pyarrow): # PyArrow differs from other libraries, in that %S also shows # the fraction of a second. result = [x[: x.find(".")] if "." in x else x for x in result] assert result == expected_col result = input_frame.select(nw.col("a").dt.to_string(fmt))["a"].to_list() - if constructor is pa.table: + if constructor_with_pyarrow is pa.table or "pyarrow" in str(constructor_with_pyarrow): # PyArrow differs from other libraries, in that %S also shows # the fraction of a second. result = [x[: x.find(".")] if "." in x else x for x in result] assert result == expected_col -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) @pytest.mark.parametrize( ("data", "expected"), [ @@ -207,7 +236,7 @@ def test_dt_to_string(constructor: Any, fmt: str) -> None: ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") def test_dt_to_string_iso_local_datetime( - constructor: Any, data: datetime, expected: str + constructor_with_pyarrow: Any, data: datetime, expected: str ) -> None: def _clean_string(result: str) -> str: # rstrip '0' to remove trailing zeros, as different libraries handle this differently @@ -216,7 +245,7 @@ def _clean_string(result: str) -> str: result = result.rstrip("0").rstrip(".") return result - df = constructor({"a": [data]}) + df = constructor_with_pyarrow({"a": [data]}) result = ( nw.from_native(df, eager_only=True)["a"] .dt.to_string("%Y-%m-%dT%H:%M:%S.%f") @@ -246,7 +275,6 @@ def _clean_string(result: str) -> str: assert _clean_string(result) == _clean_string(expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) @pytest.mark.parametrize( ("data", "expected"), [ @@ -255,9 +283,9 @@ def _clean_string(result: str) -> str: ) @pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows") def test_dt_to_string_iso_local_date( - constructor: Any, data: datetime, expected: str + constructor_with_pyarrow: Any, data: datetime, expected: str ) -> None: - df = constructor({"a": [data]}) + df = constructor_with_pyarrow({"a": [data]}) result = ( nw.from_native(df, eager_only=True)["a"].dt.to_string("%Y-%m-%d").to_list()[0] ) diff --git a/tests/frame/drop_nulls_test.py b/tests/frame/drop_nulls_test.py index 8d8acfb97..c63e0029a 100644 --- a/tests/frame/drop_nulls_test.py +++ b/tests/frame/drop_nulls_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,7 +9,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.LazyFrame]) def test_drop_nulls(constructor: Any) -> None: result = nw.from_native(constructor(data)).drop_nulls() expected = { @@ -21,3 +16,5 @@ def test_drop_nulls(constructor: Any) -> None: "b": [3.0, 5.0], } compare_dicts(result, expected) + result = nw.from_native(constructor(data)).lazy().drop_nulls() + compare_dicts(result, expected) diff --git a/tests/frame/len_test.py b/tests/frame/len_test.py index 2cd85269c..65f8d3831 100644 --- a/tests/frame/len_test.py +++ b/tests/frame/len_test.py @@ -1,10 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pyarrow as pa -import pytest - import narwhals as nw data = { @@ -13,7 +8,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) -def test_drop_nulls(constructor: Any) -> None: - result = len(nw.from_native(constructor(data))) +def test_len(constructor_with_pyarrow: Any) -> None: + result = len(nw.from_native(constructor_with_pyarrow(data))) assert result == 4 diff --git a/tests/frame/pipe_test.py b/tests/frame/pipe_test.py index 47eead1be..5fc9bd6d3 100644 --- a/tests/frame/pipe_test.py +++ b/tests/frame/pipe_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,10 +9,11 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.LazyFrame]) def test_pipe(constructor: Any) -> None: df = nw.from_native(constructor(data)) - columns = df.columns + columns = df.lazy().collect().columns result = df.pipe(lambda _df: _df.select([x for x in columns if len(x) == 2])) expected = {"ab": ["foo", "bars"]} compare_dicts(result, expected) + result = df.lazy().pipe(lambda _df: _df.select([x for x in columns if len(x) == 2])) + compare_dicts(result, expected) diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 932bc269c..79dadc6c2 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -34,7 +34,6 @@ def test_string_disguised_as_object() -> None: assert result["a"] == nw.String -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_actual_object(constructor: Any) -> None: class Foo: ... diff --git a/tests/frame/shape_test.py b/tests/frame/shape_test.py index b2ad5c29b..674967197 100644 --- a/tests/frame/shape_test.py +++ b/tests/frame/shape_test.py @@ -1,17 +1,11 @@ from typing import Any -import pandas as pd -import polars as pl -import pyarrow as pa -import pytest - import narwhals as nw -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) -def test_shape(constructor: Any) -> None: +def test_shape(constructor_with_pyarrow: Any) -> None: result = nw.from_native( - constructor({"a": [1, 2], "b": [4, 5], "c": [7, 8]}), eager_only=True + constructor_with_pyarrow({"a": [1, 2], "b": [4, 5], "c": [7, 8]}), eager_only=True ).shape expected = (2, 3) assert result == expected diff --git a/tests/frame/slice_test.py b/tests/frame/slice_test.py index e58e71f01..5c47f2872 100644 --- a/tests/frame/slice_test.py +++ b/tests/frame/slice_test.py @@ -1,6 +1,5 @@ from typing import Any -import pandas as pd import polars as pl import pyarrow as pa import pytest @@ -14,23 +13,20 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) -def test_slice_column(constructor: Any) -> None: - result = nw.from_native(constructor(data))["a"] +def test_slice_column(constructor_with_pyarrow: Any) -> None: + result = nw.from_native(constructor_with_pyarrow(data))["a"] assert isinstance(result, nw.Series) assert result.to_numpy().tolist() == [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) -def test_slice_rows(constructor: Any) -> None: - result = nw.from_native(constructor(data))[1:] +def test_slice_rows(constructor_with_pyarrow: Any) -> None: + result = nw.from_native(constructor_with_pyarrow(data))[1:] compare_dicts(result, {"a": [2.0, 3.0, 4.0, 5.0, 6.0], "b": [12, 13, 14, 15, 16]}) - result = nw.from_native(constructor(data))[2:4] + result = nw.from_native(constructor_with_pyarrow(data))[2:4] compare_dicts(result, {"a": [3.0, 4.0], "b": [13, 14]}) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_slice_rows_with_step(constructor: Any) -> None: result = nw.from_native(constructor(data))[1::2] compare_dicts(result, {"a": [2.0, 4.0, 6.0], "b": [12, 14, 16]}) @@ -43,13 +39,11 @@ def test_slice_rows_with_step_pyarrow() -> None: nw.from_native(pa.table(data))[1::2] -@pytest.mark.parametrize("constructor", [pl.LazyFrame]) -def test_slice_lazy_fails(constructor: Any) -> None: +def test_slice_lazy_fails() -> None: with pytest.raises(TypeError, match="Slicing is not supported on LazyFrame"): - _ = nw.from_native(constructor(data))[1:] + _ = nw.from_native(pl.LazyFrame(data))[1:] -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame, pa.table]) -def test_slice_int_fails(constructor: Any) -> None: +def test_slice_int_fails(constructor_with_pyarrow: Any) -> None: with pytest.raises(TypeError, match="Expected str or slice, got: "): - _ = nw.from_native(constructor(data))[1] # type: ignore[call-overload,index] + _ = nw.from_native(constructor_with_pyarrow(data))[1] # type: ignore[call-overload,index] diff --git a/tests/frame/test_common.py b/tests/frame/test_common.py index 126574b46..ddebf6f8f 100644 --- a/tests/frame/test_common.py +++ b/tests/frame/test_common.py @@ -635,19 +635,6 @@ def test_to_dict() -> None: pl_assert_series_equal(nw.to_native(result[key]), expected[key]) -@pytest.mark.parametrize( - "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] -) -def test_any_all(df_raw: Any) -> None: - df = nw.from_native(df_raw) - result = nw.to_native(df.select((nw.all() > 1).all())) - expected = {"a": [False], "b": [True], "z": [True]} - compare_dicts(result, expected) - result = nw.to_native(df.select((nw.all() > 1).any())) - expected = {"a": [True], "b": [True], "z": [True]} - compare_dicts(result, expected) - - def test_invalid() -> None: df = nw.from_native(df_pandas) with pytest.raises(ValueError, match="Multi-output"): diff --git a/tests/frame/with_columns_sequence_test.py b/tests/frame/with_columns_sequence_test.py index 82fb0eec1..335e15125 100644 --- a/tests/frame/with_columns_sequence_test.py +++ b/tests/frame/with_columns_sequence_test.py @@ -1,9 +1,6 @@ from typing import Any import numpy as np -import pandas as pd -import polars as pl -import pytest import narwhals as nw from tests.utils import compare_dicts @@ -14,7 +11,6 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_with_columns(constructor: Any) -> None: result = nw.from_native(constructor(data)).with_columns(d=np.array([4, 5])) expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "d": [4, 5]} diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py index f04862d86..5d705e4a6 100644 --- a/tests/frame/with_row_index_test.py +++ b/tests/frame/with_row_index_test.py @@ -1,9 +1,5 @@ from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw from tests.utils import compare_dicts @@ -13,8 +9,9 @@ } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.LazyFrame]) def test_with_row_index(constructor: Any) -> None: result = nw.from_native(constructor(data)).with_row_index() expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "index": [0, 1]} compare_dicts(result, expected) + result = nw.from_native(constructor(data)).lazy().with_row_index() + compare_dicts(result, expected) diff --git a/tests/frame/write_parquet_test.py b/tests/frame/write_parquet_test.py index 7d4a71f3b..c154011bf 100644 --- a/tests/frame/write_parquet_test.py +++ b/tests/frame/write_parquet_test.py @@ -1,18 +1,17 @@ from __future__ import annotations import os +from typing import TYPE_CHECKING from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals as nw data = {"a": [1, 2, 3]} +if TYPE_CHECKING: + import pytest + -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_write_parquet(constructor: Any, tmpdir: pytest.TempdirFactory) -> None: path = str(tmpdir / "foo.parquet") # type: ignore[operator] nw.from_native(constructor(data), eager_only=True).write_parquet(path) diff --git a/tests/series/arithmetic_test.py b/tests/series/arithmetic_test.py index 83199dc7d..9e72e791f 100644 --- a/tests/series/arithmetic_test.py +++ b/tests/series/arithmetic_test.py @@ -11,7 +11,6 @@ data = [1, 2, 3] -@pytest.mark.parametrize("constructor", [pd.Series, pl.Series]) @pytest.mark.parametrize( ("attr", "rhs", "expected"), [ @@ -24,13 +23,13 @@ ("__pow__", 2, [1, 4, 9]), ], ) +@pytest.mark.parametrize("constructor", [pd.Series, pl.Series]) def test_arithmetic(attr: str, rhs: Any, expected: list[Any], constructor: Any) -> None: s = nw.from_native(constructor(data), series_only=True) result = getattr(s, attr)(rhs) assert result.to_numpy().tolist() == expected -@pytest.mark.parametrize("constructor", [pd.Series, pl.Series]) @pytest.mark.parametrize( ("attr", "rhs", "expected"), [ @@ -42,6 +41,7 @@ def test_arithmetic(attr: str, rhs: Any, expected: list[Any], constructor: Any) ("__rpow__", 2, [2, 4, 8]), ], ) +@pytest.mark.parametrize("constructor", [pd.Series, pl.Series]) def test_rarithmetic(attr: str, rhs: Any, expected: list[Any], constructor: Any) -> None: s = nw.from_native(constructor(data), series_only=True) result = getattr(s, attr)(rhs) diff --git a/tests/test_group_by.py b/tests/test_group_by.py index 0c5fa013f..12fbaa42e 100644 --- a/tests/test_group_by.py +++ b/tests/test_group_by.py @@ -42,7 +42,6 @@ def test_invalid_group_by() -> None: df.group_by("a").agg(nw.all().mean()) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_group_by_iter(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) expected_keys = [(1,), (3,)] @@ -65,7 +64,6 @@ def test_group_by_iter(constructor: Any) -> None: assert sorted(keys) == sorted(expected_keys) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_group_by_len(constructor: Any) -> None: result = ( nw.from_native(constructor(data)).group_by("a").agg(nw.col("b").len()).sort("a") diff --git a/tests/test_selectors.py b/tests/test_selectors.py index 286944028..d73bab0ff 100644 --- a/tests/test_selectors.py +++ b/tests/test_selectors.py @@ -18,28 +18,25 @@ data = { "a": [1, 1, 2], "b": ["a", "b", "c"], - "c": [4.0, 5.0, 6.0], + "c": [4.1, 5.0, 6.0], "d": [True, False, True], } -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_selecctors(constructor: Any) -> None: df = nw.from_native(constructor(data)) result = nw.to_native(df.select(by_dtype([nw.Int64, nw.Float64]) + 1)) - expected = {"a": [2, 2, 3], "c": [5.0, 6.0, 7.0]} + expected = {"a": [2, 2, 3], "c": [5.1, 6.0, 7.0]} compare_dicts(result, expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_numeric(constructor: Any) -> None: df = nw.from_native(constructor(data)) result = nw.to_native(df.select(numeric() + 1)) - expected = {"a": [2, 2, 3], "c": [5.0, 6.0, 7.0]} + expected = {"a": [2, 2, 3], "c": [5.1, 6.0, 7.0]} compare_dicts(result, expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_boolean(constructor: Any) -> None: df = nw.from_native(constructor(data)) result = nw.to_native(df.select(boolean())) @@ -47,7 +44,6 @@ def test_boolean(constructor: Any) -> None: compare_dicts(result, expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) def test_string(constructor: Any) -> None: df = nw.from_native(constructor(data)) result = nw.to_native(df.select(string())) @@ -66,7 +62,6 @@ def test_categorical() -> None: compare_dicts(result, expected) -@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) @pytest.mark.parametrize( ("selector", "expected"), [