Skip to content

Commit

Permalink
Merge branch 'narwhals-dev:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
ugohuche authored Jul 12, 2024
2 parents 2854ac1 + 5f8fb59 commit 73ee74c
Show file tree
Hide file tree
Showing 29 changed files with 214 additions and 89 deletions.
1 change: 0 additions & 1 deletion .github/release-drafter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ exclude-labels:
- skip changelog
- release
name-template: 'Narwhals v$RESOLVED_VERSION'
tag-template: 'v$RESOLVED_VERSION'

change-template: '- $TITLE (#$NUMBER)'

Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
```python
>>> import narwhals
>>> narwhals.__version__
'1.0.4'
'1.0.5'
```
then installation worked correctly!
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from narwhals.utils import maybe_convert_dtypes
from narwhals.utils import maybe_set_index

__version__ = "1.0.4"
__version__ = "1.0.5"

__all__ = [
"selectors",
Expand Down
25 changes: 25 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,31 @@ def with_row_index(self, name: str) -> Self:
row_indices = pa.array(range(df.num_rows))
return self._from_native_dataframe(df.append_column(name, row_indices))

def null_count(self) -> Self:
pa = get_pyarrow()
df = self._native_dataframe
names_and_values = zip(df.column_names, df.columns)

return self._from_native_dataframe(
pa.table({name: [col.null_count] for name, col in names_and_values})
)

def head(self, n: int) -> Self:
df = self._native_dataframe
if n >= 0:
return self._from_native_dataframe(df.slice(0, n))
else:
num_rows = df.num_rows
return self._from_native_dataframe(df.slice(0, max(0, num_rows + n)))

def tail(self, n: int) -> Self:
df = self._native_dataframe
if n >= 0:
num_rows = df.num_rows
return self._from_native_dataframe(df.slice(max(0, num_rows - n)))
else:
return self._from_native_dataframe(df.slice(abs(n)))

def lazy(self) -> Self:
return self

Expand Down
15 changes: 15 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ def cum_sum(self) -> Self:
def any(self) -> Self:
return reuse_series_implementation(self, "any", returns_scalar=True)

def min(self) -> Self:
return reuse_series_implementation(self, "min", returns_scalar=True)

def max(self) -> Self:
return reuse_series_implementation(self, "max", returns_scalar=True)

def all(self) -> Self:
return reuse_series_implementation(self, "all", returns_scalar=True)

Expand All @@ -125,6 +131,15 @@ def alias(self, name: str) -> Self:
backend_version=self._backend_version,
)

def null_count(self) -> Self:
return reuse_series_implementation(self, "null_count", returns_scalar=True)

def head(self, n: int) -> Self:
return reuse_series_implementation(self, "head", n)

def tail(self, n: int) -> Self:
return reuse_series_implementation(self, "tail", n)

@property
def dt(self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _create_expr_from_series(self, series: ArrowSeries) -> ArrowExpr:
def _create_series_from_scalar(self, value: Any, series: ArrowSeries) -> ArrowSeries:
from narwhals._arrow.series import ArrowSeries

if self._backend_version < (13,): # pragma: no cover
if self._backend_version < (13,) and hasattr(value, "as_py"): # pragma: no cover
value = value.as_py()
return ArrowSeries._from_iterable(
[value],
Expand Down
27 changes: 27 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ def mean(self) -> int:
pc = get_pyarrow_compute()
return pc.mean(self._native_series) # type: ignore[no-any-return]

def min(self) -> int:
pc = get_pyarrow_compute()
return pc.min(self._native_series) # type: ignore[no-any-return]

def max(self) -> int:
pc = get_pyarrow_compute()
return pc.max(self._native_series) # type: ignore[no-any-return]

def std(self, ddof: int = 1) -> int:
pc = get_pyarrow_compute()
return pc.stddev(self._native_series, ddof=ddof) # type: ignore[no-any-return]
Expand Down Expand Up @@ -148,6 +156,25 @@ def cast(self, dtype: DType) -> Self:
dtype = reverse_translate_dtype(dtype)
return self._from_native_series(pc.cast(ser, dtype))

def null_count(self: Self) -> int:
return self._native_series.null_count # type: ignore[no-any-return]

def head(self, n: int) -> Self:
ser = self._native_series
if n >= 0:
return self._from_native_series(ser.slice(0, n))
else:
num_rows = len(ser)
return self._from_native_series(ser.slice(0, max(0, num_rows + n)))

def tail(self, n: int) -> Self:
ser = self._native_series
if n >= 0:
num_rows = len(ser)
return self._from_native_series(ser.slice(max(0, num_rows - n)))
else:
return self._from_native_series(ser.slice(abs(n)))

@property
def shape(self) -> tuple[int]:
return (len(self._native_series),)
Expand Down
3 changes: 2 additions & 1 deletion narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,5 @@ def validate_dataframe_comparand(other: Any) -> Any:
msg = "not implemented yet" # pragma: no cover
raise NotImplementedError(msg)
return other._native_series
raise AssertionError("Please report a bug")
msg = "Please report a bug" # pragma: no cover
raise AssertionError(msg)
3 changes: 2 additions & 1 deletion narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def _validate_columns(self, columns: Sequence[str]) -> None:
raise ValueError(
msg,
)
raise AssertionError("Pls report bug")
msg = "Please report a bug" # pragma: no cover
raise AssertionError(msg)

def _from_native_dataframe(self, df: Any) -> Self:
return self.__class__(
Expand Down
5 changes: 2 additions & 3 deletions narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,8 @@ def agg_pandas(
try:
result_simple = grouped.agg(aggs)
except AttributeError as exc:
raise RuntimeError(
"Failed to aggregated - does your aggregation function return a scalar?"
) from exc
msg = "Failed to aggregated - does your aggregation function return a scalar?"
raise RuntimeError(msg) from exc
result_simple.columns = [f"{a}_{b}" for a, b in result_simple.columns]
result_simple = result_simple.rename(columns=name_mapping).reset_index()
return from_dataframe(result_simple.loc[:, output_names])
Expand Down
3 changes: 2 additions & 1 deletion narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
backend_version=other._backend_version,
)
return other._native_series
raise AssertionError("Please report a bug")
msg = "Please report a bug" # pragma: no cover
raise AssertionError(msg)


def create_native_series(
Expand Down
3 changes: 2 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1955,7 +1955,8 @@ def __repr__(self) -> str: # pragma: no cover
)

def __getitem__(self, item: str | slice) -> Series | Self:
raise TypeError("Slicing is not supported on LazyFrame")
msg = "Slicing is not supported on LazyFrame"
raise TypeError(msg)

def collect(self) -> DataFrame[Any]:
r"""
Expand Down
3 changes: 2 additions & 1 deletion narwhals/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3391,10 +3391,11 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
"""
if (np := get_numpy()) is not None and isinstance(value, np.ndarray):
raise ValueError(
msg = (
"numpy arrays are not supported as literal values. "
"Consider using `with_columns` to create a new column from the array."
)
raise ValueError(msg)

if isinstance(value, (list, tuple)):
msg = f"Nested datatypes are not supported yet. Got {value}"
Expand Down
3 changes: 2 additions & 1 deletion narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def concat(
"Only horizontal and vertical concatenations are supported"
)
if not items:
raise ValueError("No items to concatenate")
msg = "No items to concatenate"
raise ValueError(msg)
items = list(items)
validate_same_library(items)
validate_laziness(items)
Expand Down
48 changes: 32 additions & 16 deletions narwhals/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,25 +265,29 @@ def from_native( # noqa: PLR0915

if (pl := get_polars()) is not None and isinstance(native_dataframe, pl.DataFrame):
if series_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `series_only` with polars.DataFrame")
msg = "Cannot only use `series_only` with polars.DataFrame"
raise TypeError(msg)
return DataFrame(
native_dataframe,
is_polars=True,
backend_version=parse_version(pl.__version__),
)
elif (pl := get_polars()) is not None and isinstance(native_dataframe, pl.LazyFrame):
if series_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `series_only` with polars.LazyFrame")
msg = "Cannot only use `series_only` with polars.LazyFrame"
raise TypeError(msg)
if eager_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `eager_only` with polars.LazyFrame")
msg = "Cannot only use `eager_only` with polars.LazyFrame"
raise TypeError(msg)
return LazyFrame(
native_dataframe,
is_polars=True,
backend_version=parse_version(pl.__version__),
)
elif (pd := get_pandas()) is not None and isinstance(native_dataframe, pd.DataFrame):
if series_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `series_only` with dataframe")
msg = "Cannot only use `series_only` with dataframe"
raise TypeError(msg)
return DataFrame(
PandasLikeDataFrame(
native_dataframe,
Expand All @@ -297,7 +301,8 @@ def from_native( # noqa: PLR0915
native_dataframe, mpd.DataFrame
): # pragma: no cover
if series_only:
raise TypeError("Cannot only use `series_only` with modin.DataFrame")
msg = "Cannot only use `series_only` with modin.DataFrame"
raise TypeError(msg)
return DataFrame(
PandasLikeDataFrame(
native_dataframe,
Expand All @@ -311,7 +316,8 @@ def from_native( # noqa: PLR0915
native_dataframe, cudf.DataFrame
):
if series_only:
raise TypeError("Cannot only use `series_only` with modin.DataFrame")
msg = "Cannot only use `series_only` with modin.DataFrame"
raise TypeError(msg)
return DataFrame(
PandasLikeDataFrame(
native_dataframe,
Expand All @@ -323,7 +329,8 @@ def from_native( # noqa: PLR0915
)
elif (pa := get_pyarrow()) is not None and isinstance(native_dataframe, pa.Table):
if series_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `series_only` with arrow table")
msg = "Cannot only use `series_only` with arrow table"
raise TypeError(msg)
return DataFrame(
ArrowDataFrame(
native_dataframe, backend_version=parse_version(pa.__version__)
Expand All @@ -333,7 +340,8 @@ def from_native( # noqa: PLR0915
)
elif hasattr(native_dataframe, "__narwhals_dataframe__"): # pragma: no cover
if series_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `series_only` with dataframe")
msg = "Cannot only use `series_only` with dataframe"
raise TypeError(msg)
# placeholder (0,) version here, as we wouldn't use it in this case anyway.
return DataFrame(
native_dataframe.__narwhals_dataframe__(),
Expand All @@ -342,9 +350,11 @@ def from_native( # noqa: PLR0915
)
elif hasattr(native_dataframe, "__narwhals_lazyframe__"): # pragma: no cover
if series_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `series_only` with lazyframe")
msg = "Cannot only use `series_only` with lazyframe"
raise TypeError(msg)
if eager_only: # pragma: no cover (todo)
raise TypeError("Cannot only use `eager_only` with lazyframe")
msg = "Cannot only use `eager_only` with lazyframe"
raise TypeError(msg)
# placeholder (0,) version here, as we wouldn't use it in this case anyway.
return LazyFrame(
native_dataframe.__narwhals_lazyframe__(),
Expand All @@ -353,15 +363,17 @@ def from_native( # noqa: PLR0915
)
elif (pl := get_polars()) is not None and isinstance(native_dataframe, pl.Series):
if not allow_series: # pragma: no cover (todo)
raise TypeError("Please set `allow_series=True`")
msg = "Please set `allow_series=True`"
raise TypeError(msg)
return Series(
native_dataframe,
is_polars=True,
backend_version=parse_version(pl.__version__),
)
elif (pd := get_pandas()) is not None and isinstance(native_dataframe, pd.Series):
if not allow_series: # pragma: no cover (todo)
raise TypeError("Please set `allow_series=True`")
msg = "Please set `allow_series=True`"
raise TypeError(msg)
return Series(
PandasLikeSeries(
native_dataframe,
Expand All @@ -375,7 +387,8 @@ def from_native( # noqa: PLR0915
native_dataframe, mpd.Series
): # pragma: no cover
if not allow_series: # pragma: no cover (todo)
raise TypeError("Please set `allow_series=True`")
msg = "Please set `allow_series=True`"
raise TypeError(msg)
return Series(
PandasLikeSeries(
native_dataframe,
Expand All @@ -389,7 +402,8 @@ def from_native( # noqa: PLR0915
native_dataframe, cudf.Series
): # pragma: no cover
if not allow_series: # pragma: no cover (todo)
raise TypeError("Please set `allow_series=True`")
msg = "Please set `allow_series=True`"
raise TypeError(msg)
return Series(
PandasLikeSeries(
native_dataframe,
Expand All @@ -403,7 +417,8 @@ def from_native( # noqa: PLR0915
native_dataframe, pa.ChunkedArray
):
if not allow_series: # pragma: no cover (todo)
raise TypeError("Please set `allow_series=True`")
msg = "Please set `allow_series=True`"
raise TypeError(msg)
return Series(
ArrowSeries(
native_dataframe, backend_version=parse_version(pa.__version__), name=""
Expand All @@ -413,7 +428,8 @@ def from_native( # noqa: PLR0915
)
elif hasattr(native_dataframe, "__narwhals_series__"): # pragma: no cover
if not allow_series: # pragma: no cover (todo)
raise TypeError("Please set `allow_series=True`")
msg = "Please set `allow_series=True`"
raise TypeError(msg)
# placeholder (0,) version here, as we wouldn't use it in this case anyway.
return Series(
native_dataframe.__narwhals_series__(), backend_version=(0,), is_polars=False
Expand Down
3 changes: 2 additions & 1 deletion narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def maybe_align_index(lhs: T, rhs: Series | BaseFrame[Any]) -> T:

def _validate_index(index: Any) -> None:
if not index.is_unique:
raise ValueError("given index doesn't have a unique index")
msg = "given index doesn't have a unique index"
raise ValueError(msg)

lhs_any = cast(Any, lhs)
rhs_any = cast(Any, rhs)
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "narwhals"
version = "1.0.4"
version = "1.0.5"
authors = [
{ name="Marco Gorelli", email="[email protected]" },
]
Expand Down Expand Up @@ -84,6 +84,7 @@ filterwarnings = [
'ignore:make_block is deprecated and will be removed',
'ignore:np.find_common_type is deprecated',
'ignore:is_sparse is deprecated and will be removed',
'ignore:Passing a BlockManager to DataFrame is deprecated',
]
xfail_strict = true
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
Expand Down
Loading

0 comments on commit 73ee74c

Please sign in to comment.