From 6fbd640859f5e7b0dbe933a4233e9f8cde35d80c Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 13 Jul 2024 00:34:27 +0200 Subject: [PATCH] enh: pandas-like group-by complex-agg --- narwhals/_pandas_like/group_by.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index f4e3ef950..18b29f68d 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -45,6 +45,7 @@ def agg( **named_aggs, ) implementation: Implementation = self._df._implementation + backend = self._df.__native_namespace__() output_names: list[str] = copy(self._keys) for expr in exprs: if expr._output_names is None: @@ -64,7 +65,7 @@ def agg( self._from_native_dataframe, dataframe_is_empty=self._df._native_dataframe.empty, implementation=implementation, - backend_version=self._df._backend_version, + backend=backend, ) def _from_native_dataframe(self, df: PandasLikeDataFrame) -> PandasLikeDataFrame: @@ -99,8 +100,8 @@ def agg_pandas( from_dataframe: Callable[[Any], PandasLikeDataFrame], *, implementation: Any, - backend_version: tuple[int, ...], dataframe_is_empty: bool, + backend: Any, ) -> PandasLikeDataFrame: """ This should be the fastpath, but cuDF is too far behind to use it. @@ -189,11 +190,8 @@ def func(df: Any) -> Any: implementation=implementation, ) - if implementation is Implementation.PANDAS and backend_version >= (2, 2): - result_complex = grouped.apply(func, include_groups=False) - else: # pragma: no cover - result_complex = grouped.apply(func) - - result = result_complex.reset_index() - - return from_dataframe(result.loc[:, output_names]) + output_values = [ + dict(zip(output_names, (*grp_name, *func(grp_values)))) + for grp_name, grp_values in grouped + ] + return from_dataframe(backend.DataFrame(output_values))