diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 8c4c95292..dd344fab5 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -118,7 +118,12 @@ def select( if exprs and all(isinstance(x, str) for x in exprs) and not named_exprs: # This is a simple slice => fastpath! return self._from_native_frame( - select_columns_by_name(self._native_frame, list(exprs)) # type: ignore[arg-type] + select_columns_by_name( + self._native_frame, + list(exprs), # type: ignore[arg-type] + self._backend_version, + self._implementation, + ) ) new_series = parse_exprs_and_named_exprs(self, *exprs, **named_exprs) @@ -140,7 +145,10 @@ def select( return self._from_native_frame(df) df = select_columns_by_name( - self._native_frame.assign(**new_series), list(new_series.keys()) + self._native_frame.assign(**new_series), + list(new_series.keys()), + self._backend_version, + self._implementation, ) return self._from_native_frame(df) @@ -265,7 +273,12 @@ def join( msg = "`right_on` cannot be `None` in anti-join" raise TypeError(msg) other_native = ( - select_columns_by_name(other._native_frame, right_on) + select_columns_by_name( + other._native_frame, + right_on, + self._backend_version, + self._implementation, + ) .rename( # rename to avoid creating extra columns in join columns=dict(zip(right_on, left_on)) # type: ignore[arg-type] ) @@ -287,7 +300,12 @@ def join( msg = "`right_on` cannot be `None` in semi-join" raise TypeError(msg) other_native = ( - select_columns_by_name(other._native_frame, right_on) + select_columns_by_name( + other._native_frame, + right_on, + self._backend_version, + self._implementation, + ) .rename( # rename to avoid creating extra columns in join columns=dict(zip(right_on, left_on)) # type: ignore[arg-type] ) diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index a705bd525..afd3610df 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -234,7 +234,12 @@ def __getitem__( elif is_sequence_but_not_str(item) or (is_numpy_array(item) and item.ndim == 1): if all(isinstance(x, str) for x in item) and len(item) > 0: return self._from_native_frame( - select_columns_by_name(self._native_frame, item) + select_columns_by_name( + self._native_frame, + item, + self._backend_version, + self._implementation, + ) ) return self._from_native_frame(self._native_frame.iloc[item]) @@ -333,7 +338,12 @@ def select( # This is a simple slice => fastpath! column_names = list(exprs) return self._from_native_frame( - select_columns_by_name(self._native_frame, column_names) # type: ignore[arg-type] + select_columns_by_name( + self._native_frame, + column_names, # type: ignore[arg-type] + self._backend_version, + self._implementation, + ) ) new_series = evaluate_into_exprs(self, *exprs, **named_exprs) if not new_series: @@ -556,7 +566,12 @@ def join( raise TypeError(msg) other_native = ( - select_columns_by_name(other._native_frame, right_on) + select_columns_by_name( + other._native_frame, + right_on, + self._backend_version, + self._implementation, + ) .rename( # rename to avoid creating extra columns in join columns=dict(zip(right_on, left_on)), # type: ignore[arg-type] copy=False, @@ -580,7 +595,12 @@ def join( msg = "`right_on` cannot be `None` in semi-join" raise TypeError(msg) other_native = ( - select_columns_by_name(other._native_frame, right_on) + select_columns_by_name( + other._native_frame, + right_on, + self._backend_version, + self._implementation, + ) .rename( # rename to avoid creating extra columns in join columns=dict(zip(right_on, left_on)), # type: ignore[arg-type] copy=False, diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index 9629475f6..b21392d8b 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -39,7 +39,12 @@ def __init__( ): # pragma: no cover if ( not drop_null_keys - and select_columns_by_name(self._df._native_frame, self._keys) + and select_columns_by_name( + self._df._native_frame, + self._keys, + self._df._backend_version, + self._df._implementation, + ) .isna() .any() .any() @@ -231,7 +236,11 @@ def agg_pandas( # noqa: PLR0915 result_aggs = native_namespace.DataFrame( list(grouped.groups.keys()), columns=keys ) - return from_dataframe(select_columns_by_name(result_aggs, output_names)) + return from_dataframe( + select_columns_by_name( + result_aggs, output_names, backend_version, implementation + ) + ) if dataframe_is_empty: # Don't even attempt this, it's way too inconsistent across pandas versions. @@ -279,4 +288,8 @@ def func(df: Any) -> Any: # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files result_complex.reset_index(inplace=True) # noqa: PD002 - return from_dataframe(select_columns_by_name(result_complex, output_names)) + return from_dataframe( + select_columns_by_name( + result_complex, output_names, backend_version, implementation + ) + ) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 323197acb..d68f41bed 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -638,10 +638,17 @@ def calculate_timestamp_date(s: pd.Series, time_unit: str) -> pd.Series: return result -def select_columns_by_name(df: T, column_names: Sequence[str]) -> T: +def select_columns_by_name( + df: T, + column_names: Sequence[str], + backend_version: tuple[int, ...], + implementation: Implementation, +) -> T: """Select columns by name. Prefer this over `df.loc[:, column_names]` as it's generally more performant.""" - if df.columns.dtype.kind == "b": # type: ignore[attr-defined] + if (df.columns.dtype.kind == "b") or ( # type: ignore[attr-defined] + implementation is Implementation.PANDAS and backend_version < (1, 5) + ): # See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122 # for why we need this return df.loc[:, column_names] # type: ignore[no-any-return, attr-defined]