Skip to content

Commit

Permalink
feat: to_numpy,to_dict,pipe,with_row_index
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Jul 11, 2024
1 parent bb99986 commit 78020d3
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 15 deletions.
30 changes: 30 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Iterable
from typing import Sequence
from typing import overload
Expand Down Expand Up @@ -195,6 +196,35 @@ def sort(
def to_pandas(self) -> Any:
return self._native_dataframe.to_pandas()

def to_numpy(self) -> Any:
import numpy as np

return np.column_stack([col.to_numpy() for col in self._native_dataframe.columns])

def to_dict(self, *, as_series: bool) -> Any:
df = self._native_dataframe

names_and_values = zip(df.column_names, df.columns)
if as_series:
from narwhals._arrow.series import ArrowSeries

return {
name: ArrowSeries(col, name=name, backend_version=self._backend_version)
for name, col in names_and_values
}
else:
return {name: col.to_pylist() for name, col in names_and_values}

def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self:
return function(self, *args, **kwargs)

def with_row_index(self, name: str) -> Self:
pa = get_pyarrow()
df = self._native_dataframe

row_indices = pa.array(range(df.num_rows))
return self._from_native_dataframe(df.append_column(name, row_indices))

def lazy(self) -> Self:
return self

Expand Down
4 changes: 2 additions & 2 deletions tests/frame/pipe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
}


def test_pipe(constructor: Any) -> None:
df = nw.from_native(constructor(data))
def test_pipe(constructor_with_pyarrow: Any) -> None:
df = nw.from_native(constructor_with_pyarrow(data))
columns = df.lazy().collect().columns
result = df.pipe(lambda _df: _df.select([x for x in columns if len(x) == 2]))
expected = {"ab": ["foo", "bars"]}
Expand Down
8 changes: 4 additions & 4 deletions tests/frame/to_dict_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
import narwhals.stable.v1 as nw


def test_to_dict(constructor: Any) -> None:
def test_to_dict(constructor_with_pyarrow: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "c": [7.0, 8, 9]}
df = nw.from_native(constructor(data), eager_only=True)
df = nw.from_native(constructor_with_pyarrow(data), eager_only=True)
result = df.to_dict(as_series=False)
assert result == data


def test_to_dict_as_series(constructor: Any) -> None:
def test_to_dict_as_series(constructor_with_pyarrow: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "c": [7.0, 8, 9]}
df = nw.from_native(constructor(data), eager_only=True)
df = nw.from_native(constructor_with_pyarrow(data), eager_only=True)
result = df.to_dict(as_series=True)
assert isinstance(result["a"], nw.Series)
assert isinstance(result["b"], nw.Series)
Expand Down
4 changes: 2 additions & 2 deletions tests/frame/to_numpy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import narwhals.stable.v1 as nw


def test_convert_numpy(constructor: Any) -> None:
def test_convert_numpy(constructor_with_pyarrow: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
df_raw = constructor(data)
df_raw = constructor_with_pyarrow(data)
result = nw.from_native(df_raw, eager_only=True).to_numpy()

expected = np.array([[1, 3, 2], [4, 4, 6], [7.1, 8, 9]]).T
Expand Down
6 changes: 3 additions & 3 deletions tests/frame/with_row_index_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
}


def test_with_row_index(constructor: Any) -> None:
result = nw.from_native(constructor(data)).with_row_index()
def test_with_row_index(constructor_with_pyarrow: Any) -> None:
result = nw.from_native(constructor_with_pyarrow(data)).with_row_index()
expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "index": [0, 1]}
compare_dicts(result, expected)
result = nw.from_native(constructor(data)).lazy().with_row_index()
result = nw.from_native(constructor_with_pyarrow(data)).lazy().with_row_index()
compare_dicts(result, expected)
4 changes: 0 additions & 4 deletions utils/check_backend_completeness.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,9 @@
"DataFrame.iter_rows",
"DataFrame.join",
"DataFrame.null_count",
"DataFrame.pipe",
"DataFrame.rename",
"DataFrame.tail",
"DataFrame.to_dict",
"DataFrame.to_numpy",
"DataFrame.unique",
"DataFrame.with_row_index",
"DataFrame.write_parquet",
"Series.drop_nulls",
"Series.fill_null",
Expand Down

0 comments on commit 78020d3

Please sign in to comment.