Skip to content

Commit

Permalink
old pandas compat
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Nov 12, 2024
1 parent 714d097 commit 6a611ab
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 13 deletions.
26 changes: 22 additions & 4 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,12 @@ def select(
if exprs and all(isinstance(x, str) for x in exprs) and not named_exprs:
# This is a simple slice => fastpath!
return self._from_native_frame(
select_columns_by_name(self._native_frame, list(exprs)) # type: ignore[arg-type]
select_columns_by_name(
self._native_frame,
list(exprs), # type: ignore[arg-type]
self._backend_version,
self._implementation,
)
)

new_series = parse_exprs_and_named_exprs(self, *exprs, **named_exprs)
Expand All @@ -140,7 +145,10 @@ def select(
return self._from_native_frame(df)

df = select_columns_by_name(
self._native_frame.assign(**new_series), list(new_series.keys())
self._native_frame.assign(**new_series),
list(new_series.keys()),
self._backend_version,
self._implementation,
)
return self._from_native_frame(df)

Expand Down Expand Up @@ -265,7 +273,12 @@ def join(
msg = "`right_on` cannot be `None` in anti-join"
raise TypeError(msg)
other_native = (
select_columns_by_name(other._native_frame, right_on)
select_columns_by_name(
other._native_frame,
right_on,
self._backend_version,
self._implementation,
)
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
)
Expand All @@ -287,7 +300,12 @@ def join(
msg = "`right_on` cannot be `None` in semi-join"
raise TypeError(msg)
other_native = (
select_columns_by_name(other._native_frame, right_on)
select_columns_by_name(
other._native_frame,
right_on,
self._backend_version,
self._implementation,
)
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
)
Expand Down
28 changes: 24 additions & 4 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,12 @@ def __getitem__(
elif is_sequence_but_not_str(item) or (is_numpy_array(item) and item.ndim == 1):
if all(isinstance(x, str) for x in item) and len(item) > 0:
return self._from_native_frame(
select_columns_by_name(self._native_frame, item)
select_columns_by_name(
self._native_frame,
item,
self._backend_version,
self._implementation,
)
)
return self._from_native_frame(self._native_frame.iloc[item])

Expand Down Expand Up @@ -333,7 +338,12 @@ def select(
# This is a simple slice => fastpath!
column_names = list(exprs)
return self._from_native_frame(
select_columns_by_name(self._native_frame, column_names) # type: ignore[arg-type]
select_columns_by_name(
self._native_frame,
column_names, # type: ignore[arg-type]
self._backend_version,
self._implementation,
)
)
new_series = evaluate_into_exprs(self, *exprs, **named_exprs)
if not new_series:
Expand Down Expand Up @@ -556,7 +566,12 @@ def join(
raise TypeError(msg)

other_native = (
select_columns_by_name(other._native_frame, right_on)
select_columns_by_name(
other._native_frame,
right_on,
self._backend_version,
self._implementation,
)
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
copy=False,
Expand All @@ -580,7 +595,12 @@ def join(
msg = "`right_on` cannot be `None` in semi-join"
raise TypeError(msg)
other_native = (
select_columns_by_name(other._native_frame, right_on)
select_columns_by_name(
other._native_frame,
right_on,
self._backend_version,
self._implementation,
)
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
copy=False,
Expand Down
19 changes: 16 additions & 3 deletions narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ def __init__(
): # pragma: no cover
if (
not drop_null_keys
and select_columns_by_name(self._df._native_frame, self._keys)
and select_columns_by_name(
self._df._native_frame,
self._keys,
self._df._backend_version,
self._df._implementation,
)
.isna()
.any()
.any()
Expand Down Expand Up @@ -231,7 +236,11 @@ def agg_pandas( # noqa: PLR0915
result_aggs = native_namespace.DataFrame(
list(grouped.groups.keys()), columns=keys
)
return from_dataframe(select_columns_by_name(result_aggs, output_names))
return from_dataframe(
select_columns_by_name(
result_aggs, output_names, backend_version, implementation
)
)

if dataframe_is_empty:
# Don't even attempt this, it's way too inconsistent across pandas versions.
Expand Down Expand Up @@ -279,4 +288,8 @@ def func(df: Any) -> Any:
# This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
result_complex.reset_index(inplace=True) # noqa: PD002

return from_dataframe(select_columns_by_name(result_complex, output_names))
return from_dataframe(
select_columns_by_name(
result_complex, output_names, backend_version, implementation
)
)
11 changes: 9 additions & 2 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,17 @@ def calculate_timestamp_date(s: pd.Series, time_unit: str) -> pd.Series:
return result


def select_columns_by_name(df: T, column_names: Sequence[str]) -> T:
def select_columns_by_name(
df: T,
column_names: Sequence[str],
backend_version: tuple[int, ...],
implementation: Implementation,
) -> T:
"""Select columns by name. Prefer this over `df.loc[:, column_names]` as it's
generally more performant."""
if df.columns.dtype.kind == "b": # type: ignore[attr-defined]
if (df.columns.dtype.kind == "b") or ( # type: ignore[attr-defined]
implementation is Implementation.PANDAS and backend_version < (1, 5)
):
# See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122
# for why we need this
return df.loc[:, column_names] # type: ignore[no-any-return, attr-defined]
Expand Down

0 comments on commit 6a611ab

Please sign in to comment.