Skip to content

Commit

Permalink
feat: add clone for dataframes (#406)
Browse files Browse the repository at this point in the history
* add clone

* add to lazy frame too
  • Loading branch information
EdAbati authored Jul 4, 2024
1 parent 88f387e commit e90afe4
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
handler: python
options:
members:
- clone
- columns
- drop
- drop_nulls
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/lazyframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
handler: python
options:
members:
- clone
- collect
- columns
- drop
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,6 @@ def lazy(self) -> Self:

def collect(self) -> ArrowDataFrame:
return ArrowDataFrame(self._dataframe)

def clone(self) -> Self:
raise NotImplementedError("clone is not yet supported on PyArrow tables")
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,3 +452,6 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->

_col = self.columns.index(column) if isinstance(column, str) else column
return self._dataframe.iat[row, _col]

def clone(self: Self) -> Self:
return self._from_dataframe(self._dataframe.copy())
74 changes: 74 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def join(
)
)

def clone(self) -> Self:
return self._from_dataframe(self._dataframe.clone())


class DataFrame(BaseFrame):
"""
Expand Down Expand Up @@ -1732,6 +1735,42 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->
"""
return self._dataframe.item(row=row, column=column)

def clone(self) -> Self:
r"""
Create a copy of this DataFrame.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2], "b": [3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function in which we clone the DataFrame:
>>> @nw.narwhalify
... def func(df):
... return df.clone()
>>> func(df_pd)
a b
0 1 3
1 2 4
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 4 │
└─────┴─────┘
"""
return super().clone()


class LazyFrame(BaseFrame):
"""
Expand Down Expand Up @@ -2889,3 +2928,38 @@ def join(
└─────┴─────┴─────┴───────┘
"""
return super().join(other, how=how, left_on=left_on, right_on=right_on)

def clone(self) -> Self:
r"""
Create a copy of this DataFrame.
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2], "b": [3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.LazyFrame(data)
Let's define a dataframe-agnostic function in which we copy the DataFrame:
>>> @nw.narwhalify
... def func(df):
... return df.clone()
>>> func(df_pd)
a b
0 1 3
1 2 4
>>> func(df_pl).collect()
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 4 │
└─────┴─────┘
"""
return super().clone()
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def polars_constructor(obj: Any) -> IntoDataFrame:
return pl.DataFrame(obj)


def polars_lazy_constructor(obj: Any) -> pl.LazyFrame:
return pl.LazyFrame(obj)


if parse_version(pd.__version__) >= parse_version("2.0.0"):
params = [pandas_constructor, pandas_nullable_constructor, pandas_pyarrow_constructor]
else: # pragma: no cover
Expand All @@ -66,6 +70,11 @@ def constructor(request: Any) -> Callable[[Any], IntoDataFrame]:
return request.param # type: ignore[no-any-return]


@pytest.fixture(params=[*params, polars_lazy_constructor])
def constructor_with_lazy(request: Any) -> Callable[[Any], Any]:
return request.param # type: ignore[no-any-return]


# TODO: once pyarrow has complete coverage, we can remove this one,
# and just put `pa.table` into `constructor`
@pytest.fixture(params=[*params, pa.table])
Expand Down
13 changes: 13 additions & 0 deletions tests/frame/clone_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from typing import Any

import narwhals as nw
from tests.utils import compare_dicts


def test_clone(constructor_with_lazy: Any) -> None:
expected = {"a": [1, 2], "b": [3, 4]}
df = nw.from_native(constructor_with_lazy(expected))
df_clone = df.clone()
assert df is not df_clone
assert df._dataframe is not df_clone._dataframe
compare_dicts(df_clone, expected)

0 comments on commit e90afe4

Please sign in to comment.