From 52b427c76ade884361030c2606cda4922317d6d4 Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Sun, 3 Nov 2024 18:04:23 +0100 Subject: [PATCH 01/14] update maybe_set_index to support directly adding indices --- narwhals/utils.py | 16 +++++++++++----- tests/utils_test.py | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 66c2badee..c8093b6c3 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -274,10 +274,9 @@ def maybe_get_index(obj: T) -> Any | None: return None -def maybe_set_index(df: T, column_names: str | list[str]) -> T: +def maybe_set_index(df: T, keys: str | Series | list[Series | str]) -> T: """ Set columns `columns` to be the index of `df`, if `df` is pandas-like. - Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. @@ -297,14 +296,21 @@ def maybe_set_index(df: T, column_names: str | list[str]) -> T: 4 1 5 2 """ + from narwhals.series import Series + df_any = cast(Any, df) native_frame = to_native(df_any) + if is_pandas_like_dataframe(native_frame): + if _is_iterable(keys): + keys = [key.to_native() if isinstance(key, Series) else key for key in keys] + if isinstance(keys, Series): + keys = keys.to_native() + return df_any._from_compliant_dataframe( # type: ignore[no-any-return] - df_any._compliant_frame._from_native_frame( - native_frame.set_index(column_names) - ) + df_any._compliant_frame._from_native_frame(native_frame.set_index(keys)) ) + return df_any # type: ignore[no-any-return] diff --git a/tests/utils_test.py b/tests/utils_test.py index fb668b4d2..ba76d9d5d 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -1,6 +1,7 @@ from __future__ import annotations import string +from typing import TYPE_CHECKING import hypothesis.strategies as st import pandas as pd @@ -15,6 +16,9 @@ from tests.utils import PANDAS_VERSION from tests.utils import get_module_version_as_tuple +if TYPE_CHECKING: + from narwhals.series import Series + def test_maybe_align_index_pandas() -> None: df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}, index=[1, 2, 0])) @@ -58,18 +62,40 @@ def test_maybe_align_index_polars() -> None: nw.maybe_align_index(df, s[1:]) -def test_maybe_set_index_pandas() -> None: - df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[1, 2, 0])) - result = nw.maybe_set_index(df, "b") - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[1, 2, 0]).set_index( - "b" - ) +@pytest.mark.parametrize( + ("pandas_keys", "narwhals_keys"), + [ + ("b", "b"), + (pd.Series([1, 2, 0]), nw.from_native(pd.Series([1, 2, 0]), series_only=True)), + (["a", "b"], ["a", "b"]), + ( + [pd.Series([0, 1, 2]), "b"], + [nw.from_native(pd.Series([0, 1, 2]), series_only=True), "b"], + ), + ], +) +def test_maybe_set_index_pandas( + pandas_keys: str | Series | list[Series | str], + narwhals_keys: str | Series | list[Series | str], +) -> None: + df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + result = nw.maybe_set_index(df, narwhals_keys) + expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).set_index(pandas_keys) assert_frame_equal(nw.to_native(result), expected) -def test_maybe_set_index_polars() -> None: +@pytest.mark.parametrize( + "narwhals_keys", + [ + "b", + nw.from_native(pd.Series([1, 2, 0]), series_only=True), + ["a", "b"], + [nw.from_native(pd.Series([0, 1, 2]), series_only=True), "b"], + ], +) +def test_maybe_set_index_polars(narwhals_keys: str | Series | list[Series | str]) -> None: df = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) - result = nw.maybe_set_index(df, "b") + result = nw.maybe_set_index(df, narwhals_keys) assert result is df From ec888deee413845de276f3d30e078708f51ead4d Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Mon, 4 Nov 2024 10:21:31 +0100 Subject: [PATCH 02/14] update docstring --- narwhals/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index c8093b6c3..2789d5242 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -276,7 +276,8 @@ def maybe_get_index(obj: T) -> Any | None: def maybe_set_index(df: T, keys: str | Series | list[Series | str]) -> T: """ - Set columns `columns` to be the index of `df`, if `df` is pandas-like. + Set columns `keys` to be the index of `df`, if `df` is pandas-like. 'keys' should be + a name of an existing column, a Series, or a list of column names and/or Series. Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. @@ -296,12 +297,13 @@ def maybe_set_index(df: T, keys: str | Series | list[Series | str]) -> T: 4 1 5 2 """ - from narwhals.series import Series df_any = cast(Any, df) native_frame = to_native(df_any) if is_pandas_like_dataframe(native_frame): + from narwhals.series import Series + if _is_iterable(keys): keys = [key.to_native() if isinstance(key, Series) else key for key in keys] if isinstance(keys, Series): From 90a048d91d86cda4ab53a95e036cfc27af3df753 Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Sat, 9 Nov 2024 15:30:41 +0100 Subject: [PATCH 03/14] change to separate and parameters --- narwhals/utils.py | 45 ++++++++++++++++++------ tests/utils_test.py | 83 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 100 insertions(+), 28 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 2789d5242..d01f77bd4 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -274,10 +274,15 @@ def maybe_get_index(obj: T) -> Any | None: return None -def maybe_set_index(df: T, keys: str | Series | list[Series | str]) -> T: +def maybe_set_index( + df: T, + column_names: str | list[str] | None = None, + *, + index: Series | list[Series] | None = None, +) -> T: """ - Set columns `keys` to be the index of `df`, if `df` is pandas-like. 'keys' should be - a name of an existing column, a Series, or a list of column names and/or Series. + Set the index of `df`, if `df` is pandas-like. The index can either be specified as + a existing column name or list of column names with `column_names`, or set directly with a Series or list of Series with `index`. Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. @@ -301,17 +306,35 @@ def maybe_set_index(df: T, keys: str | Series | list[Series | str]) -> T: df_any = cast(Any, df) native_frame = to_native(df_any) + if column_names is not None and index is not None: + msg = "Only one of `column_names` or `keys` should be provided" + raise ValueError(msg) + + if not column_names and not index: + msg = "Either `column_names` or `keys` should be provided" + raise ValueError(msg) + if is_pandas_like_dataframe(native_frame): - from narwhals.series import Series + if column_names is not None: + return df_any._from_compliant_dataframe( # type: ignore[no-any-return] + df_any._compliant_frame._from_native_frame( + native_frame.set_index(column_names) + ) + ) - if _is_iterable(keys): - keys = [key.to_native() if isinstance(key, Series) else key for key in keys] - if isinstance(keys, Series): - keys = keys.to_native() + if index is not None: + from narwhals.series import Series - return df_any._from_compliant_dataframe( # type: ignore[no-any-return] - df_any._compliant_frame._from_native_frame(native_frame.set_index(keys)) - ) + if _is_iterable(index): + index = [ + key.to_native() if isinstance(key, Series) else key for key in index + ] + if isinstance(index, Series): + index = index.to_native() + + return df_any._from_compliant_dataframe( # type: ignore[no-any-return] + df_any._compliant_frame._from_native_frame(native_frame.set_index(index)) + ) return df_any # type: ignore[no-any-return] diff --git a/tests/utils_test.py b/tests/utils_test.py index ba76d9d5d..bdbd32566 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -63,42 +63,91 @@ def test_maybe_align_index_polars() -> None: @pytest.mark.parametrize( - ("pandas_keys", "narwhals_keys"), + "column_names", + ["b", ["a", "b"]], +) +def test_maybe_set_index_pandas_column_names( + column_names: str | list[str] | None, +) -> None: + df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + result = nw.maybe_set_index(df, column_names) + expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).set_index(column_names) + assert_frame_equal(nw.to_native(result), expected) + + +@pytest.mark.parametrize( + "column_names", + [ + "b", + ["a", "b"], + ], +) +def test_maybe_set_index_polars_column_names( + column_names: str | list[str] | None, +) -> None: + df = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + result = nw.maybe_set_index(df, column_names) + assert result is df + + +@pytest.mark.parametrize( + ("narwhals_index", "pandas_index"), [ - ("b", "b"), - (pd.Series([1, 2, 0]), nw.from_native(pd.Series([1, 2, 0]), series_only=True)), - (["a", "b"], ["a", "b"]), + (nw.from_native(pd.Series([1, 2, 0]), series_only=True), pd.Series([1, 2, 0])), ( - [pd.Series([0, 1, 2]), "b"], - [nw.from_native(pd.Series([0, 1, 2]), series_only=True), "b"], + [ + nw.from_native(pd.Series([0, 1, 2]), series_only=True), + nw.from_native(pd.Series([1, 2, 0]), series_only=True), + ], + [ + pd.Series([0, 1, 2]), + pd.Series([1, 2, 0]), + ], ), ], ) -def test_maybe_set_index_pandas( - pandas_keys: str | Series | list[Series | str], - narwhals_keys: str | Series | list[Series | str], +def test_maybe_set_index_pandas_direct_index( + narwhals_index: Series | list[Series] | None, + pandas_index: pd.Series | list[pd.Series] | None, ) -> None: df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) - result = nw.maybe_set_index(df, narwhals_keys) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).set_index(pandas_keys) + result = nw.maybe_set_index(df, index=narwhals_index) + expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).set_index(pandas_index) assert_frame_equal(nw.to_native(result), expected) @pytest.mark.parametrize( - "narwhals_keys", + "index", [ - "b", nw.from_native(pd.Series([1, 2, 0]), series_only=True), - ["a", "b"], - [nw.from_native(pd.Series([0, 1, 2]), series_only=True), "b"], + [ + nw.from_native(pd.Series([0, 1, 2]), series_only=True), + nw.from_native(pd.Series([1, 2, 0]), series_only=True), + ], ], ) -def test_maybe_set_index_polars(narwhals_keys: str | Series | list[Series | str]) -> None: +def test_maybe_set_index_polars_direct_index( + index: Series | list[Series] | None, +) -> None: df = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) - result = nw.maybe_set_index(df, narwhals_keys) + result = nw.maybe_set_index(df, index=index) assert result is df +def test_maybe_set_index_pandas_either_index_or_column_names() -> None: + df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + column_names = ["a", "b"] + index = nw.from_native(pd.Series([0, 1, 2]), series_only=True) + with pytest.raises( + ValueError, match="Only one of `column_names` or `keys` should be provided" + ): + nw.maybe_set_index(df, column_names=column_names, index=index) + with pytest.raises( + ValueError, match="Either `column_names` or `keys` should be provided" + ): + nw.maybe_set_index(df) + + def test_maybe_get_index_pandas() -> None: pandas_df = pd.DataFrame({"a": [1, 2, 3]}, index=[1, 2, 0]) result = nw.maybe_get_index(nw.from_native(pandas_df)) From 540532a1db2da45d428d6267499ac6ddbd9d9f23 Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Sat, 9 Nov 2024 21:42:24 +0100 Subject: [PATCH 04/14] fix issue with coverage --- narwhals/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index d01f77bd4..539deeebf 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -322,7 +322,7 @@ def maybe_set_index( ) ) - if index is not None: + if index is not None: # pragma: no cover from narwhals.series import Series if _is_iterable(index): From 968da4fabaa18724947444134aeba507cc1d036e Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Sat, 9 Nov 2024 21:50:07 +0100 Subject: [PATCH 05/14] rename keys to index --- narwhals/utils.py | 6 +++--- tests/utils_test.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 539deeebf..51887e203 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -307,11 +307,11 @@ def maybe_set_index( native_frame = to_native(df_any) if column_names is not None and index is not None: - msg = "Only one of `column_names` or `keys` should be provided" + msg = "Only one of `column_names` or `index` should be provided" raise ValueError(msg) if not column_names and not index: - msg = "Either `column_names` or `keys` should be provided" + msg = "Either `column_names` or `index` should be provided" raise ValueError(msg) if is_pandas_like_dataframe(native_frame): @@ -327,7 +327,7 @@ def maybe_set_index( if _is_iterable(index): index = [ - key.to_native() if isinstance(key, Series) else key for key in index + idx.to_native() if isinstance(idx, Series) else idx for idx in index ] if isinstance(index, Series): index = index.to_native() diff --git a/tests/utils_test.py b/tests/utils_test.py index bdbd32566..912a8a405 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -139,11 +139,11 @@ def test_maybe_set_index_pandas_either_index_or_column_names() -> None: column_names = ["a", "b"] index = nw.from_native(pd.Series([0, 1, 2]), series_only=True) with pytest.raises( - ValueError, match="Only one of `column_names` or `keys` should be provided" + ValueError, match="Only one of `column_names` or `index` should be provided" ): nw.maybe_set_index(df, column_names=column_names, index=index) with pytest.raises( - ValueError, match="Either `column_names` or `keys` should be provided" + ValueError, match="Either `column_names` or `index` should be provided" ): nw.maybe_set_index(df) From 0edf51954b096ddc335541d17613c19812e44023 Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Sat, 9 Nov 2024 22:17:44 +0100 Subject: [PATCH 06/14] add support for setting index on series --- narwhals/utils.py | 5 +++++ tests/utils_test.py | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 51887e203..0a6f58bf8 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -332,6 +332,11 @@ def maybe_set_index( if isinstance(index, Series): index = index.to_native() + if is_pandas_like_series(df_any): + native_frame.index = index + else: + native_frame = native_frame.set_index(index) + return df_any._from_compliant_dataframe( # type: ignore[no-any-return] df_any._compliant_frame._from_native_frame(native_frame.set_index(index)) ) diff --git a/tests/utils_test.py b/tests/utils_test.py index 912a8a405..e2b8e4523 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -90,6 +90,10 @@ def test_maybe_set_index_polars_column_names( assert result is df +@pytest.mark.parametrize( + "native_df_or_series", + [pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}), pd.Series([0, 1, 2])], +) @pytest.mark.parametrize( ("narwhals_index", "pandas_index"), [ @@ -109,11 +113,16 @@ def test_maybe_set_index_polars_column_names( def test_maybe_set_index_pandas_direct_index( narwhals_index: Series | list[Series] | None, pandas_index: pd.Series | list[pd.Series] | None, + native_df_or_series: pd.DataFrame | pd.Series, ) -> None: - df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + df = nw.from_native(native_df_or_series, allow_series=True) result = nw.maybe_set_index(df, index=narwhals_index) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).set_index(pandas_index) - assert_frame_equal(nw.to_native(result), expected) + if isinstance(native_df_or_series, pd.Series): + native_df_or_series.index = pandas_index + assert_series_equal(nw.to_native(result), native_df_or_series) + else: + expected = native_df_or_series.set_index(pandas_index) + assert_frame_equal(nw.to_native(result), expected) @pytest.mark.parametrize( From e4e43bbfbc57b591ad090c9969fe62e3067b2db7 Mon Sep 17 00:00:00 2001 From: Rik van der Vlist Date: Sat, 9 Nov 2024 22:37:24 +0100 Subject: [PATCH 07/14] add check for column_names-series combination and refactor logic --- narwhals/utils.py | 33 ++++++++++++++++++--------------- tests/utils_test.py | 8 ++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 0a6f58bf8..a2cda86d8 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -314,33 +314,36 @@ def maybe_set_index( msg = "Either `column_names` or `index` should be provided" raise ValueError(msg) - if is_pandas_like_dataframe(native_frame): - if column_names is not None: + if column_names is not None: + if is_pandas_like_dataframe(native_frame): return df_any._from_compliant_dataframe( # type: ignore[no-any-return] df_any._compliant_frame._from_native_frame( native_frame.set_index(column_names) ) ) + elif is_pandas_like_series(native_frame): + msg = "Cannot set index using column names on a Series" + raise ValueError(msg) - if index is not None: # pragma: no cover - from narwhals.series import Series - - if _is_iterable(index): - index = [ - idx.to_native() if isinstance(idx, Series) else idx for idx in index - ] - if isinstance(index, Series): - index = index.to_native() + if index is not None: # pragma: no cover + from narwhals.series import Series - if is_pandas_like_series(df_any): - native_frame.index = index - else: - native_frame = native_frame.set_index(index) + if _is_iterable(index): + index = [idx.to_native() if isinstance(idx, Series) else idx for idx in index] + if isinstance(index, Series): + index = index.to_native() + if is_pandas_like_dataframe(native_frame): return df_any._from_compliant_dataframe( # type: ignore[no-any-return] df_any._compliant_frame._from_native_frame(native_frame.set_index(index)) ) + elif is_pandas_like_series(native_frame): + native_frame.index = index + return df_any._from_compliant_series( # type: ignore[no-any-return] + df_any._compliant_series._from_native_series(native_frame) + ) + return df_any # type: ignore[no-any-return] diff --git a/tests/utils_test.py b/tests/utils_test.py index e2b8e4523..dc2415c8d 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -143,6 +143,14 @@ def test_maybe_set_index_polars_direct_index( assert result is df +def test_maybe_set_index_pandas_series_column_names() -> None: + df = nw.from_native(pd.Series([0, 1, 2]), allow_series=True) + with pytest.raises( + ValueError, match="Cannot set index using column names on a Series" + ): + nw.maybe_set_index(df, column_names=["a"]) + + def test_maybe_set_index_pandas_either_index_or_column_names() -> None: df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) column_names = ["a", "b"] From 5c3a3d40a872c8127bfb33d7159b3590d79e62be Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 10 Nov 2024 12:48:09 +0100 Subject: [PATCH 08/14] simplify logic a bit --- narwhals/utils.py | 58 +++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index a2cda86d8..b26c5aa97 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -282,10 +282,12 @@ def maybe_set_index( ) -> T: """ Set the index of `df`, if `df` is pandas-like. The index can either be specified as - a existing column name or list of column names with `column_names`, or set directly with a Series or list of Series with `index`. + a existing column name or list of column names with `column_names`, or set directly + with a Series or list of Series with `index`. + Notes: - This is only really intended for backwards-compatibility purposes, - for example if your library already aligns indices for users. + This is only really intended for backwards-compatibility purposes, for example if + your library already aligns indices for users. If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this is a no-op. @@ -304,7 +306,7 @@ def maybe_set_index( """ df_any = cast(Any, df) - native_frame = to_native(df_any) + native_obj = to_native(df_any) if column_names is not None and index is not None: msg = "Only one of `column_names` or `index` should be provided" @@ -314,37 +316,29 @@ def maybe_set_index( msg = "Either `column_names` or `index` should be provided" raise ValueError(msg) - if column_names is not None: - if is_pandas_like_dataframe(native_frame): - return df_any._from_compliant_dataframe( # type: ignore[no-any-return] - df_any._compliant_frame._from_native_frame( - native_frame.set_index(column_names) - ) - ) - elif is_pandas_like_series(native_frame): + if index is not None: + keys = ( + [to_native(idx, pass_through=True) for idx in index] + if _is_iterable(index) + else [to_native(index, pass_through=True)] + ) + else: + keys = column_names + + if is_pandas_like_dataframe(native_obj): + return df_any._from_compliant_dataframe( # type: ignore[no-any-return] + df_any._compliant_frame._from_native_frame(native_obj.set_index(keys)) + ) + elif is_pandas_like_series(native_obj): + if column_names: msg = "Cannot set index using column names on a Series" raise ValueError(msg) - if index is not None: # pragma: no cover - from narwhals.series import Series - - if _is_iterable(index): - index = [idx.to_native() if isinstance(idx, Series) else idx for idx in index] - if isinstance(index, Series): - index = index.to_native() - - if is_pandas_like_dataframe(native_frame): - return df_any._from_compliant_dataframe( # type: ignore[no-any-return] - df_any._compliant_frame._from_native_frame(native_frame.set_index(index)) - ) - - elif is_pandas_like_series(native_frame): - native_frame.index = index - return df_any._from_compliant_series( # type: ignore[no-any-return] - df_any._compliant_series._from_native_series(native_frame) - ) - - return df_any # type: ignore[no-any-return] + return df_any._from_compliant_series( # type: ignore[no-any-return] + df_any._compliant_series._from_native_series(native_obj.set_axis(keys)) + ) + else: + return df_any # type: ignore[no-any-return] def maybe_reset_index(obj: T) -> T: From 2466bc7024c5fec0b39df30bb56c87791710b26f Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 10 Nov 2024 16:25:44 +0100 Subject: [PATCH 09/14] use set_axis, rm list wrapper --- narwhals/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 63dffc571..1ec8621e8 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -320,7 +320,7 @@ def maybe_set_index( keys = ( [to_native(idx, pass_through=True) for idx in index] if _is_iterable(index) - else [to_native(index, pass_through=True)] + else to_native(index, pass_through=True) ) else: keys = column_names From b43197c912e0ac60208c8b915184e876de5cb2c1 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 10 Nov 2024 16:33:29 +0100 Subject: [PATCH 10/14] back to s.index = keys --- narwhals/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 1ec8621e8..5b0ef4060 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -333,9 +333,9 @@ def maybe_set_index( if column_names: msg = "Cannot set index using column names on a Series" raise ValueError(msg) - + native_obj.index = keys return df_any._from_compliant_series( # type: ignore[no-any-return] - df_any._compliant_series._from_native_series(native_obj.set_axis(keys)) + df_any._compliant_series._from_native_series(native_obj) ) else: return df_any # type: ignore[no-any-return] From 673750e9bb96fd51bdf864aabbe27ecf2848344a Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 11 Nov 2024 20:55:20 +0100 Subject: [PATCH 11/14] use series set_axis --- narwhals/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 5b0ef4060..bad13ee8f 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -333,9 +333,8 @@ def maybe_set_index( if column_names: msg = "Cannot set index using column names on a Series" raise ValueError(msg) - native_obj.index = keys return df_any._from_compliant_series( # type: ignore[no-any-return] - df_any._compliant_series._from_native_series(native_obj) + df_any._compliant_series._from_native_series(native_obj.set_axis(keys)) ) else: return df_any # type: ignore[no-any-return] From 4be63d012b94685af9ebe123136f8adc2f70625a Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 11 Nov 2024 21:07:40 +0100 Subject: [PATCH 12/14] docstring with arguments --- narwhals/utils.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index bad13ee8f..386b5b6e5 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -281,15 +281,30 @@ def maybe_set_index( index: Series | list[Series] | None = None, ) -> T: """ - Set the index of `df`, if `df` is pandas-like. The index can either be specified as - a existing column name or list of column names with `column_names`, or set directly - with a Series or list of Series with `index`. + Set the index of `df`, if `df` is pandas-like, otherwise this is a no-op. + + Arguments: + df: object for which maybe set the index (can be either a Narwhals `DataFrame` + or `Series`). + column_names: name or list of names of the columns to set as index. + For dataframes, only one of `column_names` and `index` can be specified but + not both. If `column_names` is passed and `df` is a Series, then a + `ValueError` is raised. + index: series or list of series to set as index. + + Raises: + ValueError: If one of the following condition happens: + + - none of `column_names` and `index` are provided + - both `column_names` and `index` are provided + - `column_names` is provided and `df` is a Series Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. If you're designing a new library, we highly encourage you to not rely on the Index. + For non-pandas-like inputs, this is a no-op. Examples: From c068d797841ed7ff4975a76d98e4f641cab0aff9 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 11 Nov 2024 22:07:53 +0100 Subject: [PATCH 13/14] very old pandas --- narwhals/utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 386b5b6e5..7c8701248 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -348,8 +348,17 @@ def maybe_set_index( if column_names: msg = "Cannot set index using column names on a Series" raise ValueError(msg) + + if ( + df_any._compliant_series._implementation is Implementation.PANDAS + and df_any._compliant_series._backend_version < (1,) + ): # pragma: no cover + native_obj = native_obj.set_axis(keys, inplace=False) + else: + native_obj = native_obj.set_axis(keys) + return df_any._from_compliant_series( # type: ignore[no-any-return] - df_any._compliant_series._from_native_series(native_obj.set_axis(keys)) + df_any._compliant_series._from_native_series(native_obj) ) else: return df_any # type: ignore[no-any-return] From 3cccb997c726db0c2c69cc6a951112d5daf37ceb Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 12 Nov 2024 15:12:32 +0100 Subject: [PATCH 14/14] rename df to obj --- narwhals/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/narwhals/utils.py b/narwhals/utils.py index 7c8701248..85ec5a00f 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -275,16 +275,16 @@ def maybe_get_index(obj: T) -> Any | None: def maybe_set_index( - df: T, + obj: T, column_names: str | list[str] | None = None, *, index: Series | list[Series] | None = None, ) -> T: """ - Set the index of `df`, if `df` is pandas-like, otherwise this is a no-op. + Set the index of a DataFrame or a Series, if it's pandas-like. Arguments: - df: object for which maybe set the index (can be either a Narwhals `DataFrame` + obj: object for which maybe set the index (can be either a Narwhals `DataFrame` or `Series`). column_names: name or list of names of the columns to set as index. For dataframes, only one of `column_names` and `index` can be specified but @@ -320,7 +320,7 @@ def maybe_set_index( 5 2 """ - df_any = cast(Any, df) + df_any = cast(Any, obj) native_obj = to_native(df_any) if column_names is not None and index is not None: