Skip to content

Commit

Permalink
Merge branch 'narwhals-dev:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
ugohuche authored Jul 10, 2024
2 parents 82d7d4d + bb99986 commit f16f1a2
Show file tree
Hide file tree
Showing 33 changed files with 590 additions and 195 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/publish_to_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,43 @@ jobs:
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

github-release:
name: >-
Sign the Python 🐍 distribution 📦 with Sigstore
and upload them to GitHub Release
needs:
- publish-to-pypi
runs-on: ubuntu-latest
permissions:
contents: write # IMPORTANT: mandatory for making GitHub Releases
id-token: write # IMPORTANT: mandatory for sigstore
steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
- name: Sign the dists with Sigstore
uses: sigstore/[email protected]
with:
inputs: >-
./dist/*.tar.gz
./dist/*.whl
- name: Create GitHub Release
env:
GITHUB_TOKEN: ${{ github.token }}
run: >-
gh release create
'${{ github.ref_name }}'
--repo '${{ github.repository }}'
--notes ""
- name: Upload artifact signatures to GitHub Release
env:
GITHUB_TOKEN: ${{ github.token }}
# Upload to GitHub Release using the `gh` CLI.
# `dist/` contains the built packages, and the
# sigstore-produced signatures and certificates.
run: >-
gh release upload
'${{ github.ref_name }}' dist/**
--repo '${{ github.repository }}'
2 changes: 2 additions & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
handler: python
options:
members:
- __getitem__
- clone
- columns
- drop
- drop_nulls
- filter
- get_column
- group_by
- head
- is_duplicated
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
```python
>>> import narwhals
>>> narwhals.__version__
'1.0.2'
'1.0.4'
```
then installation worked correctly!
20 changes: 20 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,26 @@ theme:
- content.code.annotate
- navigation.footer
- navigation.indexes
palette:
# Palette toggle for automatic mode
- media: "(prefers-color-scheme)"
toggle:
icon: material/brightness-auto
name: Switch to light mode

# Palette toggle for light mode
- media: "(prefers-color-scheme: light)"
scheme: default
toggle:
icon: material/brightness-7
name: Switch to dark mode

# Palette toggle for dark mode
- media: "(prefers-color-scheme: dark)"
scheme: slate
toggle:
icon: material/brightness-4
name: Switch to system preference
plugins:
- search
- mkdocstrings:
Expand Down
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from narwhals.utils import maybe_convert_dtypes
from narwhals.utils import maybe_set_index

__version__ = "1.0.2"
__version__ = "1.0.4"

__all__ = [
"selectors",
Expand Down
9 changes: 9 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ def rows(
raise NotImplementedError(msg)
return self._native_dataframe.to_pylist() # type: ignore[no-any-return]

def get_column(self, name: str) -> ArrowSeries:
from narwhals._arrow.series import ArrowSeries

return ArrowSeries(
self._native_dataframe[name],
name=name,
backend_version=self._backend_version,
)

@overload
def __getitem__(self, item: str) -> ArrowSeries: ...

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


class ArrowExpr:
def __init__( # noqa: PLR0913
def __init__(
self,
call: Callable[[ArrowDataFrame], list[ArrowSeries]],
*,
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class ArrowNamespace:
Duration = dtypes.Duration
Date = dtypes.Date

def _create_expr_from_callable( # noqa: PLR0913
def _create_expr_from_callable(
self,
func: Callable[[ArrowDataFrame], list[ArrowSeries]],
*,
Expand Down
33 changes: 29 additions & 4 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ def _from_native_dataframe(self, df: Any) -> Self:
backend_version=self._backend_version,
)

def get_column(self, name: str) -> PandasLikeSeries:
from narwhals._pandas_like.series import PandasLikeSeries

return PandasLikeSeries(
self._native_dataframe.loc[:, name],
implementation=self._implementation,
backend_version=self._backend_version,
)

@overload
def __getitem__(self, item: str) -> PandasLikeSeries: ...

Expand Down Expand Up @@ -296,8 +305,8 @@ def join(
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None = None,
right_on: str | list[str] | None = None,
left_on: str | list[str] | None,
right_on: str | list[str] | None,
) -> Self:
if isinstance(left_on, str):
left_on = [left_on]
Expand Down Expand Up @@ -356,7 +365,6 @@ def join(
)
.loc[lambda t: t[indicator_token] == "left_only"]
.drop(columns=[indicator_token])
.reset_index(drop=True)
)

if how == "semi":
Expand All @@ -373,8 +381,25 @@ def join(
how="inner",
left_on=left_on,
right_on=left_on,
).reset_index(drop=True)
)
)

if how == "left":
other_native = other._native_dataframe
result_native = self._native_dataframe.merge(
other_native,
how="left",
left_on=left_on,
right_on=right_on,
suffixes=("", "_right"),
)
extra = []
for left_key, right_key in zip(left_on, right_on): # type: ignore[arg-type]
if right_key != left_key and right_key not in self.columns:
extra.append(right_key)
elif right_key != left_key:
extra.append(f"{right_key}_right")
return self._from_native_dataframe(result_native.drop(columns=extra))

return self._from_native_dataframe(
self._native_dataframe.merge(
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


class PandasLikeExpr:
def __init__( # noqa: PLR0913
def __init__(
self,
call: Callable[[PandasLikeDataFrame], list[PandasLikeSeries]],
*,
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __iter__(self) -> Iterator[tuple[Any, PandasLikeDataFrame]]:
)


def agg_pandas( # noqa: PLR0913
def agg_pandas(
grouped: Any,
exprs: list[PandasLikeExpr],
keys: list[str],
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
self._implementation = implementation
self._backend_version = backend_version

def _create_expr_from_callable( # noqa: PLR0913
def _create_expr_from_callable(
self,
func: Callable[[PandasLikeDataFrame], list[PandasLikeSeries]],
*,
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _from_native_series(self, series: Any) -> Self:
)

@classmethod
def _from_iterable( # noqa: PLR0913
def _from_iterable(
cls: type[Self],
data: Iterable[Any],
name: str,
Expand Down
99 changes: 95 additions & 4 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,11 @@ def join(
self,
other: Self,
*,
how: Literal["inner", "cross", "semi", "anti"] = "inner",
how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
left_on: str | list[str] | None = None,
right_on: str | list[str] | None = None,
) -> Self:
_supported_joins = ("inner", "cross", "anti", "semi")
_supported_joins = ("inner", "left", "cross", "anti", "semi")

if how not in _supported_joins:
msg = f"Only the following join stragies are supported: {_supported_joins}; found '{how}'."
Expand Down Expand Up @@ -407,6 +407,54 @@ def shape(self) -> tuple[int, int]:
"""
return self._compliant_frame.shape # type: ignore[no-any-return]

def get_column(self, name: str) -> Series:
"""
Get a single column by name.
Notes:
Although `name` is typed as `str`, pandas does allow non-string column
names, and they will work when passed to this function if the
`narwhals.DataFrame` is backed by a pandas dataframe with non-string
columns. This function can only be used to extract a column by name, so
there is no risk of ambiguity.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {"a": [1, 2], "b": [3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> @nw.narwhalify(eager_only=True)
... def func(df):
... name = df.columns[0]
... return df.get_column(name)
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
0 1
1 2
Name: a, dtype: int64
>>> func(df_pl) # doctest:+NORMALIZE_WHITESPACE
shape: (2,)
Series: 'a' [i64]
[
1
2
]
"""
from narwhals.series import Series

return Series(
self._compliant_frame.get_column(name),
backend_version=self._backend_version,
is_polars=self._is_polars,
)

@overload
def __getitem__(self, item: Sequence[int]) -> Series: ...

Expand All @@ -417,6 +465,49 @@ def __getitem__(self, item: str) -> Series: ...
def __getitem__(self, item: slice) -> Self: ...

def __getitem__(self, item: str | slice | Sequence[int]) -> Series | Self:
"""
Extract column or slice of DataFrame.
Arguments:
item: how to slice dataframe:
- str: extract column
- slice or Sequence of integers: slice rows from dataframe.
Notes:
In contrast with Polars, pandas allows non-string column names.
If you don't know whether the column name you're trying to extract
is definitely a string (e.g. `df[df.columns[0]]`) then you should
use `DataFrame.get_column` instead.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {"a": [1, 2], "b": [3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> @nw.narwhalify(eager_only=True)
... def func(df):
... return df["a"]
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
0 1
1 2
Name: a, dtype: int64
>>> func(df_pl) # doctest:+NORMALIZE_WHITESPACE
shape: (2,)
Series: 'a' [i64]
[
1
2
]
"""
if isinstance(item, str):
from narwhals.series import Series

Expand Down Expand Up @@ -1478,7 +1569,7 @@ def join(
self,
other: Self,
*,
how: Literal["inner", "cross", "semi", "anti"] = "inner",
how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
left_on: str | list[str] | None = None,
right_on: str | list[str] | None = None,
) -> Self:
Expand Down Expand Up @@ -2904,7 +2995,7 @@ def join(
self,
other: Self,
*,
how: Literal["inner", "cross", "semi", "anti"] = "inner",
how: Literal["inner", "left", "cross", "semi", "anti"] = "inner",
left_on: str | list[str] | None = None,
right_on: str | list[str] | None = None,
) -> Self:
Expand Down
11 changes: 7 additions & 4 deletions narwhals/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,17 @@ def columns(self) -> Any: ...
def join(self, *args: Any, **kwargs: Any) -> Any: ...


# Anything which can be converted to an expression.
IntoExpr: TypeAlias = Union["Expr", str, "Series"]
# Anything which can be converted to a Narwhals DataFrame.
"""Anything which can be converted to an expression."""

IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]"]
# Anything which can be converted to a Narwhals DataFrame or LazyFrame.
"""Anything which can be converted to a Narwhals DataFrame."""

IntoFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "LazyFrame[Any]"]
# DataFrame or LazyFrame
"""Anything which can be converted to a Narwhals DataFrame or LazyFrame."""

Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
"""DataFrame or LazyFrame"""

# TypeVars for some of the above
IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
Expand Down
Loading

0 comments on commit f16f1a2

Please sign in to comment.