Skip to content

Commit

Permalink
Merge branch 'main' into mypy-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
EdAbati authored Feb 9, 2025
2 parents 99a6690 + df8144b commit 81ee931
Show file tree
Hide file tree
Showing 23 changed files with 558 additions and 56 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/selectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set operations are supported:
- boolean
- by_dtype
- categorical
- datetime
- matches
- numeric
- string
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/typing.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Narwhals comes fully statically typed. In addition to `nw.DataFrame`, `nw.Expr`,
- IntoFrameT
- IntoSeries
- IntoSeriesT
- SizeUnit
- TimeUnit
show_source: false
show_bases: false

Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._expression_parsing import reuse_series_namespace_implementation

if TYPE_CHECKING:
from typing_extensions import Self

from narwhals._arrow.expr import ArrowExpr
from narwhals.typing import TimeUnit


class ArrowExprDateTimeNamespace:
Expand All @@ -30,7 +30,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone
)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"]) -> ArrowExpr:
def timestamp(self: Self, time_unit: TimeUnit) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._compliant_expr, "dt", "timestamp", time_unit=time_unit
)
Expand Down
53 changes: 52 additions & 1 deletion narwhals/_arrow/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,24 @@
import re
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Sequence

from narwhals._arrow.expr import ArrowExpr
from narwhals.utils import Implementation
from narwhals.utils import _parse_time_unit_and_time_zone
from narwhals.utils import dtype_matches_time_unit_and_time_zone
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
from datetime import timezone

from typing_extensions import Self

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.series import ArrowSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand All @@ -26,7 +32,7 @@ def __init__(
self._implementation = Implementation.PYARROW
self._version = version

def by_dtype(self: Self, dtypes: list[DType | type[DType]]) -> ArrowSelector:
def by_dtype(self: Self, dtypes: Iterable[DType | type[DType]]) -> ArrowSelector:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
return [df[col] for col in df.columns if df.schema[col] in dtypes]

Expand Down Expand Up @@ -108,6 +114,51 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Iterable[TimeUnit] | None,
time_zone: str | timezone | Iterable[str | timezone | None] | None,
) -> ArrowSelector:
dtypes = import_dtypes_module(version=self._version)
time_units, time_zones = _parse_time_unit_and_time_zone(
time_unit=time_unit, time_zone=time_zone
)

def func(df: ArrowDataFrame) -> list[ArrowSeries]:
return [
df[col]
for col in df.columns
if dtype_matches_time_unit_and_time_zone(
dtype=df.schema[col],
dtypes=dtypes,
time_units=time_units,
time_zones=time_zones,
)
]

def evalute_output_names(df: ArrowDataFrame) -> Sequence[str]:
return [
col
for col in df.columns
if dtype_matches_time_unit_and_time_zone(
dtype=df.schema[col],
dtypes=dtypes,
time_units=time_units,
time_zones=time_zones,
)
]

return ArrowSelector(
func,
depth=0,
function_name="selector",
evaluate_output_names=evalute_output_names,
alias_output_names=None,
backend_version=self._backend_version,
version=self._version,
kwargs={},
)


class ArrowSelector(ArrowExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/series_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

import pyarrow as pa
import pyarrow.compute as pc
Expand All @@ -13,6 +12,7 @@
from typing_extensions import Self

from narwhals._arrow.series import ArrowSeries
from narwhals.typing import TimeUnit


class ArrowSeriesDateTimeNamespace:
Expand Down Expand Up @@ -49,7 +49,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:

return self._compliant_series._from_native_series(result)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"]) -> ArrowSeries:
def timestamp(self: Self, time_unit: TimeUnit) -> ArrowSeries:
s = self._compliant_series._native_series
dtype = self._compliant_series.dtype
dtypes = import_dtypes_module(self._compliant_series._version)
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_dask/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
Expand All @@ -18,6 +17,7 @@
from typing_extensions import Self

from narwhals._dask.expr import DaskExpr
from narwhals.typing import TimeUnit


class DaskExprDateTimeNamespace:
Expand Down Expand Up @@ -145,8 +145,8 @@ def func(s: dx.Series, time_zone: str) -> dx.Series:
returns_scalar=self._compliant_expr._returns_scalar,
)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"]) -> DaskExpr:
def func(s: dx.Series, time_unit: Literal["ns", "us", "ms"]) -> dx.Series:
def timestamp(self: Self, time_unit: TimeUnit) -> DaskExpr:
def func(s: dx.Series, time_unit: TimeUnit) -> dx.Series:
dtype = native_to_narwhals_dtype(
s.dtype, self._compliant_expr._version, Implementation.DASK
)
Expand Down
56 changes: 56 additions & 0 deletions narwhals/_dask/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,23 @@
from typing import Sequence

from narwhals._dask.expr import DaskExpr
from narwhals.utils import _parse_time_unit_and_time_zone
from narwhals.utils import dtype_matches_time_unit_and_time_zone
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
try:
import dask.dataframe.dask_expr as dx
except ModuleNotFoundError:
import dask_expr as dx

from datetime import timezone

from typing_extensions import Self

from narwhals._dask.dataframe import DaskLazyFrame
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version

try:
Expand Down Expand Up @@ -118,6 +128,52 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Iterable[TimeUnit] | None,
time_zone: str | timezone | Iterable[str | timezone | None] | None,
) -> DaskSelector: # pragma: no cover
dtypes = import_dtypes_module(version=self._version)
time_units, time_zones = _parse_time_unit_and_time_zone(
time_unit=time_unit, time_zone=time_zone
)

def func(df: DaskLazyFrame) -> list[dx.Series]:
return [
df._native_frame[col]
for col in df.columns
if dtype_matches_time_unit_and_time_zone(
dtype=df.schema[col],
dtypes=dtypes,
time_units=time_units,
time_zones=time_zones,
)
]

def evalute_output_names(df: DaskLazyFrame) -> Sequence[str]:
return [
col
for col in df.columns
if dtype_matches_time_unit_and_time_zone(
dtype=df.schema[col],
dtypes=dtypes,
time_units=time_units,
time_zones=time_zones,
)
]

return DaskSelector(
func,
depth=0,
function_name="selector",
evaluate_output_names=evalute_output_names,
alias_output_names=None,
backend_version=self._backend_version,
returns_scalar=False,
version=self._version,
kwargs={},
)


class DaskSelector(DaskExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
49 changes: 49 additions & 0 deletions narwhals/_duckdb/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,19 @@

from narwhals._duckdb.expr import DuckDBExpr
from narwhals._duckdb.utils import ExprKind
from narwhals.utils import _parse_time_unit_and_time_zone
from narwhals.utils import dtype_matches_time_unit_and_time_zone
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
from datetime import timezone

import duckdb
from typing_extensions import Self

from narwhals._duckdb.dataframe import DuckDBLazyFrame
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand Down Expand Up @@ -111,6 +116,50 @@ def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
version=self._version,
)

def datetime(
self: Self,
time_unit: TimeUnit | Iterable[TimeUnit] | None,
time_zone: str | timezone | Iterable[str | timezone | None] | None,
) -> DuckDBSelector:
dtypes = import_dtypes_module(version=self._version)
time_units, time_zones = _parse_time_unit_and_time_zone(
time_unit=time_unit, time_zone=time_zone
)

def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
return [
ColumnExpression(col)
for col in df.columns
if dtype_matches_time_unit_and_time_zone(
dtype=df.schema[col],
dtypes=dtypes,
time_units=time_units,
time_zones=time_zones,
)
]

def evalute_output_names(df: DuckDBLazyFrame) -> Sequence[str]:
return [
col
for col in df.columns
if dtype_matches_time_unit_and_time_zone(
dtype=df.schema[col],
dtypes=dtypes,
time_units=time_units,
time_zones=time_zones,
)
]

return DuckDBSelector(
func,
function_name="selector",
evaluate_output_names=evalute_output_names,
alias_output_names=None,
backend_version=self._backend_version,
expr_kind=ExprKind.TRANSFORM,
version=self._version,
)


class DuckDBSelector(DuckDBExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._expression_parsing import reuse_series_namespace_implementation

if TYPE_CHECKING:
from typing_extensions import Self

from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals.typing import TimeUnit


class PandasLikeExprDateTimeNamespace:
Expand Down Expand Up @@ -101,7 +101,7 @@ def convert_time_zone(self: Self, time_zone: str) -> PandasLikeExpr:
self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone
)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"]) -> PandasLikeExpr:
def timestamp(self: Self, time_unit: TimeUnit) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._compliant_expr, "dt", "timestamp", time_unit=time_unit
)
Loading

0 comments on commit 81ee931

Please sign in to comment.