diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index dffa5500a..7bfa3c7a9 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -12,6 +12,7 @@ from narwhals._expression_parsing import evaluate_into_exprs from narwhals.dependencies import get_numpy from narwhals.dependencies import get_pyarrow +from narwhals.dependencies import get_pyarrow_parquet from narwhals.utils import flatten if TYPE_CHECKING: @@ -340,3 +341,33 @@ def collect(self) -> ArrowDataFrame: def clone(self) -> Self: raise NotImplementedError("clone is not yet supported on PyArrow tables") + + def is_empty(self: Self) -> bool: + return self.shape[0] == 0 + + def item(self: Self, row: int | None = None, column: int | str | None = None) -> Any: + if row is None and column is None: + if self.shape != (1, 1): + msg = ( + "can only call `.item()` if the dataframe is of shape (1, 1)," + " or if explicit row/col values are provided;" + f" frame has shape {self.shape!r}" + ) + raise ValueError(msg) + return self._native_dataframe[0][0].as_py() + + elif row is None or column is None: + msg = "cannot call `.item()` with only one of `row` or `column`" + raise ValueError(msg) + + _col = self.columns.index(column) if isinstance(column, str) else column + return self._native_dataframe[_col][row].as_py() + + def rename(self, mapping: dict[str, str]) -> Self: + df = self._native_dataframe + new_cols = [mapping.get(c, c) for c in df.column_names] + return self._from_native_dataframe(df.rename_columns(new_cols)) + + def write_parquet(self, file: Any) -> Any: + pp = get_pyarrow_parquet() + pp.write_table(self._native_dataframe, file) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 8c49bb39a..dc29de96d 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -248,6 +248,17 @@ def tail(self, n: int) -> Self: else: return self._from_native_series(ser.slice(abs(n))) + def item(self: Self, index: int | None = None) -> Any: + if index is None: + if len(self) != 1: + msg = ( + "can only call '.item()' if the Series is of length 1," + f" or an explicit index is provided (Series is of length {len(self)})" + ) + raise ValueError(msg) + return self._native_series[0].as_py() + return self._native_series[index].as_py() + @property def shape(self) -> tuple[int]: return (len(self._native_series),) diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index 902609032..cbb5eca3c 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -51,6 +51,15 @@ def get_pyarrow_compute() -> Any: # pragma: no cover return None +def get_pyarrow_parquet() -> Any: # pragma: no cover + """Get pyarrow.parquet module (if pyarrow has already been imported - else return None).""" + if "pyarrow" in sys.modules: + import pyarrow.parquet as pp + + return pp + return None + + def get_numpy() -> Any: """Get numpy module (if already imported - else return None).""" return sys.modules.get("numpy", None) diff --git a/tests/frame/is_empty_test.py b/tests/frame/is_empty_test.py index 06defe73b..b49fbb11a 100644 --- a/tests/frame/is_empty_test.py +++ b/tests/frame/is_empty_test.py @@ -8,9 +8,9 @@ @pytest.mark.parametrize(("threshold", "expected"), [(0, False), (10, True)]) -def test_is_empty(constructor: Any, threshold: Any, expected: Any) -> None: +def test_is_empty(constructor_with_pyarrow: Any, threshold: Any, expected: Any) -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df_raw = constructor(data) + df_raw = constructor_with_pyarrow(data) df = nw.from_native(df_raw, eager_only=True) result = df.filter(nw.col("a") > threshold).is_empty() assert result == expected diff --git a/tests/frame/rename_test.py b/tests/frame/rename_test.py index ab9018036..c857a92e2 100644 --- a/tests/frame/rename_test.py +++ b/tests/frame/rename_test.py @@ -4,10 +4,9 @@ from tests.utils import compare_dicts -def test_rename(constructor: Any) -> None: +def test_rename(constructor_with_pyarrow: Any) -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(constructor(data), eager_only=True) + df = nw.from_native(constructor_with_pyarrow(data), eager_only=True) result = df.rename({"a": "x", "b": "y"}) - result_native = nw.to_native(result) expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]} - compare_dicts(result_native, expected) + compare_dicts(result, expected) diff --git a/tests/frame/rows_test.py b/tests/frame/rows_test.py index 371ff35d5..9d60f48ae 100644 --- a/tests/frame/rows_test.py +++ b/tests/frame/rows_test.py @@ -97,6 +97,7 @@ def test_rows( ): df.rows(named=named) return + result = df.rows(named=named) assert result == expected diff --git a/tests/frame/test_common.py b/tests/frame/test_common.py index fe811d80e..49f419163 100644 --- a/tests/frame/test_common.py +++ b/tests/frame/test_common.py @@ -334,7 +334,7 @@ def test_library(df_raw: Any, df_raw_right: Any) -> None: df_left.join(df_right, left_on=["a"], right_on=["a"], how="inner") -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_pa]) @pytest.mark.parametrize( ("row", "column", "expected"), [(0, 2, 7), (1, "z", 8)], @@ -350,7 +350,7 @@ def test_item( assert df.select("a").head(1).item() == 1 -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_pa]) @pytest.mark.parametrize( ("row", "column", "err_msg"), [ diff --git a/tests/frame/write_parquet_test.py b/tests/frame/write_parquet_test.py index 88e35b3fa..a00c02f75 100644 --- a/tests/frame/write_parquet_test.py +++ b/tests/frame/write_parquet_test.py @@ -15,7 +15,9 @@ @pytest.mark.skipif( parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow" ) -def test_write_parquet(constructor: Any, tmpdir: pytest.TempdirFactory) -> None: +def test_write_parquet( + constructor_with_pyarrow: Any, tmpdir: pytest.TempdirFactory +) -> None: path = str(tmpdir / "foo.parquet") # type: ignore[operator] - nw.from_native(constructor(data), eager_only=True).write_parquet(path) + nw.from_native(constructor_with_pyarrow(data), eager_only=True).write_parquet(path) assert os.path.exists(path) diff --git a/tests/series/test_common.py b/tests/series/test_common.py index a92b2f90f..842883b73 100644 --- a/tests/series/test_common.py +++ b/tests/series/test_common.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd import polars as pl +import pyarrow as pa import pytest from numpy.testing import assert_array_equal from pandas.testing import assert_series_equal @@ -40,6 +41,7 @@ df_pandas_nullable = df_pandas df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +df_pa = pa.table({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) @pytest.mark.parametrize( @@ -255,13 +257,13 @@ def test_cast_string() -> None: df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_pa]) @pytest.mark.parametrize(("index", "expected"), [(0, 1), (1, 3)]) def test_item(df_raw: Any, index: int, expected: int) -> None: s = nw.from_native(df_raw["a"], series_only=True) result = s.item(index) assert result == expected - assert nw.from_native(df_raw["a"].head(1), series_only=True).item() == 1 + assert s.head(1).item() == 1 with pytest.raises( ValueError, diff --git a/utils/check_backend_completeness.py b/utils/check_backend_completeness.py index cb9a6f39b..a9b6ae73c 100644 --- a/utils/check_backend_completeness.py +++ b/utils/check_backend_completeness.py @@ -13,14 +13,10 @@ MISSING = [ "DataFrame.is_duplicated", - "DataFrame.is_empty", "DataFrame.is_unique", - "DataFrame.item", "DataFrame.iter_rows", "DataFrame.pipe", - "DataFrame.rename", "DataFrame.unique", - "DataFrame.write_parquet", "Series.drop_nulls", "Series.fill_null", "Series.from_iterable", @@ -32,7 +28,6 @@ "Series.is_null", "Series.is_sorted", "Series.is_unique", - "Series.item", "Series.len", "Series.n_unique", "Series.quantile",