From 54760afa041e0c07e51672e17b0ba797650a1867 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 3 Feb 2025 14:00:59 +0000 Subject: [PATCH 01/28] feat: add `nw.Schema.to_*` methods Will close #1912 - Starting with porting `nw.functions._from_dict_impl` - Thinking that `Schema` should have `._version: ClassVar[Version]` to remove the need for user-facing arg (https://github.com/narwhals-dev/narwhals/issues/1912#issuecomment-2629365749) --- narwhals/schema.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/narwhals/schema.py b/narwhals/schema.py index 52991c476e..3cca3ac49f 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -11,7 +11,16 @@ from typing import Iterable from typing import Mapping +from narwhals.utils import Implementation +from narwhals.utils import Version +from narwhals.utils import parse_version + if TYPE_CHECKING: + from types import ModuleType + from typing import Any + + import polars as pl + import pyarrow as pa from typing_extensions import Self from narwhals.dtypes import DType @@ -85,3 +94,65 @@ def len(self: Self) -> int: Number of columns. """ return len(self) + + def to_native( + self: Self, *, native_namespace: ModuleType, dtype_backend: str | None = None + ) -> dict[str, Any] | pl.Schema | pa.Schema: + implementation = Implementation.from_native_namespace(native_namespace) + version = Version.MAIN + if implementation is Implementation.POLARS: + return self.to_polars(backend=implementation, version=version) + elif implementation.is_pandas_like(): + return self.to_pandas( + backend=implementation, version=version, dtype_backend=dtype_backend + ) + elif implementation is Implementation.PYARROW: + return self.to_arrow(backend=implementation, version=version) + + raise NotImplementedError + + def to_arrow( + self: Self, *, backend: ModuleType | Implementation | str, version: Version + ) -> pa.Schema: + from narwhals._arrow.utils import narwhals_to_native_dtype + + implementation = Implementation.from_backend(backend) + schema: pa.Schema = implementation.to_native_namespace().schema( + (name, narwhals_to_native_dtype(dtype, version)) + for name, dtype in self.items() + ) + return schema + + def to_pandas( + self: Self, + *, + backend: ModuleType | Implementation | str, + version: Version, + dtype_backend: str | None = None, + ) -> dict[str, Any]: + from narwhals._pandas_like.utils import narwhals_to_native_dtype + + implementation = Implementation.from_backend(backend) + backend_version = parse_version(implementation.to_native_namespace().__version__) + return { + name: narwhals_to_native_dtype( + dtype=dtype, + dtype_backend=dtype_backend, + implementation=implementation, + backend_version=backend_version, + version=version, + ) + for name, dtype in self.items() + } + + def to_polars( + self: Self, *, backend: ModuleType | Implementation | str, version: Version + ) -> pl.Schema: + from narwhals._polars.utils import narwhals_to_native_dtype + + implementation = Implementation.from_backend(backend) + schema: pl.Schema = implementation.to_native_namespace().Schema( + (name, narwhals_to_native_dtype(dtype, version)) + for name, dtype in self.items() + ) + return schema From e09426d1b04d631c35593a2ef766be0b6768e6db Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 12:54:55 +0000 Subject: [PATCH 02/28] feat: replace `native_namespace` -> `backend` New API can start without deprecations, related #1931 --- narwhals/schema.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 3cca3ac49f..8d5cfba173 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -96,9 +96,12 @@ def len(self: Self) -> int: return len(self) def to_native( - self: Self, *, native_namespace: ModuleType, dtype_backend: str | None = None + self: Self, + *, + backend: ModuleType | Implementation | str, + dtype_backend: str | None = None, ) -> dict[str, Any] | pl.Schema | pa.Schema: - implementation = Implementation.from_native_namespace(native_namespace) + implementation = Implementation.from_backend(backend) version = Version.MAIN if implementation is Implementation.POLARS: return self.to_polars(backend=implementation, version=version) From d1e05769a49bc5981aba80b6d5571d5993bb0f04 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 19:06:08 +0000 Subject: [PATCH 03/28] feat: adds `Schema._version` https://github.com/narwhals-dev/narwhals/pull/1924#issuecomment-2631115464 https://github.com/narwhals-dev/narwhals/pull/1924#issuecomment-2634670454 --- narwhals/schema.py | 27 +++++++++++---------------- narwhals/stable/v1/__init__.py | 2 ++ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 8d5cfba173..b448bc6789 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -18,6 +18,7 @@ if TYPE_CHECKING: from types import ModuleType from typing import Any + from typing import ClassVar import polars as pl import pyarrow as pa @@ -64,6 +65,8 @@ class Schema(BaseSchema): 2 """ + _version: ClassVar[Version] = Version.MAIN + def __init__( self: Self, schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None = None, @@ -102,26 +105,21 @@ def to_native( dtype_backend: str | None = None, ) -> dict[str, Any] | pl.Schema | pa.Schema: implementation = Implementation.from_backend(backend) - version = Version.MAIN if implementation is Implementation.POLARS: - return self.to_polars(backend=implementation, version=version) + return self.to_polars(backend=implementation) elif implementation.is_pandas_like(): - return self.to_pandas( - backend=implementation, version=version, dtype_backend=dtype_backend - ) + return self.to_pandas(backend=implementation, dtype_backend=dtype_backend) elif implementation is Implementation.PYARROW: - return self.to_arrow(backend=implementation, version=version) + return self.to_arrow(backend=implementation) raise NotImplementedError - def to_arrow( - self: Self, *, backend: ModuleType | Implementation | str, version: Version - ) -> pa.Schema: + def to_arrow(self: Self, *, backend: ModuleType | Implementation | str) -> pa.Schema: from narwhals._arrow.utils import narwhals_to_native_dtype implementation = Implementation.from_backend(backend) schema: pa.Schema = implementation.to_native_namespace().schema( - (name, narwhals_to_native_dtype(dtype, version)) + (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) return schema @@ -130,7 +128,6 @@ def to_pandas( self: Self, *, backend: ModuleType | Implementation | str, - version: Version, dtype_backend: str | None = None, ) -> dict[str, Any]: from narwhals._pandas_like.utils import narwhals_to_native_dtype @@ -143,19 +140,17 @@ def to_pandas( dtype_backend=dtype_backend, implementation=implementation, backend_version=backend_version, - version=version, + version=self._version, ) for name, dtype in self.items() } - def to_polars( - self: Self, *, backend: ModuleType | Implementation | str, version: Version - ) -> pl.Schema: + def to_polars(self: Self, *, backend: ModuleType | Implementation | str) -> pl.Schema: from narwhals._polars.utils import narwhals_to_native_dtype implementation = Implementation.from_backend(backend) schema: pl.Schema = implementation.to_native_namespace().Schema( - (name, narwhals_to_native_dtype(dtype, version)) + (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) return schema diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index b7a218fcc7..de2ca50cf2 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1081,6 +1081,8 @@ class Schema(NwSchema): *instantiated* Narwhals data type. Accepts a mapping or an iterable of tuples. """ + _version = Version.V1 + @overload def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... From 62b6e24b303dc34289a29fb608e984b166cdebda Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 19:11:17 +0000 Subject: [PATCH 04/28] revert: remove `Schema.to_native` https://github.com/narwhals-dev/narwhals/pull/1924#issuecomment-2634670454 --- narwhals/schema.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index b448bc6789..eb45fcded2 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -98,22 +98,6 @@ def len(self: Self) -> int: """ return len(self) - def to_native( - self: Self, - *, - backend: ModuleType | Implementation | str, - dtype_backend: str | None = None, - ) -> dict[str, Any] | pl.Schema | pa.Schema: - implementation = Implementation.from_backend(backend) - if implementation is Implementation.POLARS: - return self.to_polars(backend=implementation) - elif implementation.is_pandas_like(): - return self.to_pandas(backend=implementation, dtype_backend=dtype_backend) - elif implementation is Implementation.PYARROW: - return self.to_arrow(backend=implementation) - - raise NotImplementedError - def to_arrow(self: Self, *, backend: ModuleType | Implementation | str) -> pa.Schema: from narwhals._arrow.utils import narwhals_to_native_dtype From fe96f8cd834ce52a60af67e0d7780af52f47d2c1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 19:39:50 +0000 Subject: [PATCH 05/28] refactor: drop `backend`, use hard imports https://github.com/narwhals-dev/narwhals/pull/1924#issuecomment-2634847465 --- narwhals/schema.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index eb45fcded2..b4040340d8 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -16,7 +16,6 @@ from narwhals.utils import parse_version if TYPE_CHECKING: - from types import ModuleType from typing import Any from typing import ClassVar @@ -98,26 +97,23 @@ def len(self: Self) -> int: """ return len(self) - def to_arrow(self: Self, *, backend: ModuleType | Implementation | str) -> pa.Schema: + def to_arrow(self: Self) -> pa.Schema: + import pyarrow as pa + from narwhals._arrow.utils import narwhals_to_native_dtype - implementation = Implementation.from_backend(backend) - schema: pa.Schema = implementation.to_native_namespace().schema( + return pa.schema( (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - return schema - def to_pandas( - self: Self, - *, - backend: ModuleType | Implementation | str, - dtype_backend: str | None = None, - ) -> dict[str, Any]: + def to_pandas(self: Self, *, dtype_backend: str | None = None) -> dict[str, Any]: + import pandas as pd + from narwhals._pandas_like.utils import narwhals_to_native_dtype - implementation = Implementation.from_backend(backend) - backend_version = parse_version(implementation.to_native_namespace().__version__) + backend_version = parse_version(pd.__version__) + implementation = Implementation.from_native_namespace(pd) return { name: narwhals_to_native_dtype( dtype=dtype, @@ -129,12 +125,12 @@ def to_pandas( for name, dtype in self.items() } - def to_polars(self: Self, *, backend: ModuleType | Implementation | str) -> pl.Schema: + def to_polars(self: Self) -> pl.Schema: + import polars as pl + from narwhals._polars.utils import narwhals_to_native_dtype - implementation = Implementation.from_backend(backend) - schema: pl.Schema = implementation.to_native_namespace().Schema( + return pl.Schema( (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - return schema From a575ea804c9fa045213ff9cf7c697b3c80b2fce2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 19:54:01 +0000 Subject: [PATCH 06/28] refactor: use `Schema.to_(arrow|polars)` in `from_dict` `pandas` seems a bit more complex, leaving for now --- narwhals/functions.py | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 74f0c84e55..19920c574c 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -22,6 +22,7 @@ from narwhals.dependencies import is_numpy_array from narwhals.exceptions import ShapeError from narwhals.expr import Expr +from narwhals.schema import Schema from narwhals.translate import from_native from narwhals.utils import Implementation from narwhals.utils import Version @@ -43,7 +44,6 @@ from typing_extensions import Self from narwhals.dtypes import DType - from narwhals.schema import Schema from narwhals.series import Series from narwhals.typing import IntoDataFrameT from narwhals.typing import IntoExpr @@ -495,18 +495,7 @@ def _from_dict_impl( # noqa: PLR0915 msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}." raise ValueError(msg) if eager_backend is Implementation.POLARS: - if schema: - from narwhals._polars.utils import ( - narwhals_to_native_dtype as polars_narwhals_to_native_dtype, - ) - - schema_pl = { - name: polars_narwhals_to_native_dtype(dtype, version=version) - for name, dtype in schema.items() - } - else: - schema_pl = None - + schema_pl = Schema(schema).to_polars() if schema else None native_frame = native_namespace.from_dict(data, schema=schema_pl) elif eager_backend in ( Implementation.PANDAS, @@ -552,18 +541,8 @@ def _from_dict_impl( # noqa: PLR0915 native_frame = native_frame.astype(schema) elif eager_backend is Implementation.PYARROW: - if schema: - from narwhals._arrow.utils import ( - narwhals_to_native_dtype as arrow_narwhals_to_native_dtype, - ) - - schema = native_namespace.schema( - [ - (name, arrow_narwhals_to_native_dtype(dtype, version)) - for name, dtype in schema.items() - ] - ) - native_frame = native_namespace.table(data, schema=schema) + pa_schema = Schema(schema).to_arrow() if schema is not None else schema + native_frame = native_namespace.table(data, schema=pa_schema) else: # pragma: no cover try: # implementation is UNKNOWN, Narwhals extension using this feature should From 72fc3defa1f9b840de05a6aa7af5e5c0b8c8b1ec Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 21:32:51 +0000 Subject: [PATCH 07/28] refactor: use `Schema..to_pandas` in `from_dict` --- narwhals/functions.py | 24 ++++++++---------------- narwhals/schema.py | 33 ++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 19920c574c..5cd33fdd74 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -457,12 +457,12 @@ def from_dict( ) -def _from_dict_impl( # noqa: PLR0915 +def _from_dict_impl( data: dict[str, Any], schema: dict[str, DType] | Schema | None = None, *, backend: ModuleType | Implementation | str | None = None, - version: Version, + version: Version, # noqa: ARG001 ) -> DataFrame[Any]: from narwhals.series import Series from narwhals.translate import to_native @@ -523,22 +523,14 @@ def _from_dict_impl( # noqa: PLR0915 if schema: from narwhals._pandas_like.utils import get_dtype_backend - from narwhals._pandas_like.utils import ( - narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype, - ) - backend_version = parse_version(native_namespace.__version__) - schema = { - name: pandas_like_narwhals_to_native_dtype( - dtype=schema[name], - dtype_backend=get_dtype_backend(native_type, eager_backend), - implementation=eager_backend, - backend_version=backend_version, - version=version, + pd_schema = Schema(schema).to_pandas( + dtype_backend=( + get_dtype_backend(native_type, eager_backend) + for native_type in native_frame.dtypes ) - for name, native_type in native_frame.dtypes.items() - } - native_frame = native_frame.astype(schema) + ) + native_frame = native_frame.astype(pd_schema) elif eager_backend is Implementation.PYARROW: pa_schema = Schema(schema).to_arrow() if schema is not None else schema diff --git a/narwhals/schema.py b/narwhals/schema.py index b4040340d8..42e5ed140c 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -7,6 +7,7 @@ from __future__ import annotations from collections import OrderedDict +from functools import partial from typing import TYPE_CHECKING from typing import Iterable from typing import Mapping @@ -107,23 +108,29 @@ def to_arrow(self: Self) -> pa.Schema: for name, dtype in self.items() ) - def to_pandas(self: Self, *, dtype_backend: str | None = None) -> dict[str, Any]: + def to_pandas( + self: Self, *, dtype_backend: str | Iterable[str] | None = None + ) -> dict[str, Any]: import pandas as pd from narwhals._pandas_like.utils import narwhals_to_native_dtype - backend_version = parse_version(pd.__version__) - implementation = Implementation.from_native_namespace(pd) - return { - name: narwhals_to_native_dtype( - dtype=dtype, - dtype_backend=dtype_backend, - implementation=implementation, - backend_version=backend_version, - version=self._version, - ) - for name, dtype in self.items() - } + to_native = partial( + narwhals_to_native_dtype, + implementation=Implementation.from_native_namespace(pd), + backend_version=parse_version(pd.__version__), + version=self._version, + ) + if dtype_backend is None or isinstance(dtype_backend, str): + return { + name: to_native(dtype=dtype, dtype_backend=dtype_backend) + for name, dtype in self.items() + } + else: + return { + name: to_native(dtype=dtype, dtype_backend=backend) + for name, dtype, backend in zip(self.keys(), self.values(), dtype_backend) + } def to_polars(self: Self) -> pl.Schema: import polars as pl From 82b1623f879a925d847f639ecf174305aecb3767 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 4 Feb 2025 21:43:59 +0000 Subject: [PATCH 08/28] refactor: remove `version` parameter from `_from_dict_impl` When needed, this is now available on `Schema` (d1e05769a49bc5981aba80b6d5571d5993bb0f04) --- narwhals/functions.py | 8 +------- narwhals/stable/v1/__init__.py | 7 +------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 5cd33fdd74..50d83d0b77 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -449,12 +449,7 @@ def from_dict( backend = validate_native_namespace_and_backend( backend, native_namespace, emit_deprecation_warning=True ) - return _from_dict_impl( - data, - schema, - backend=backend, - version=Version.MAIN, - ) + return _from_dict_impl(data, schema, backend=backend) def _from_dict_impl( @@ -462,7 +457,6 @@ def _from_dict_impl( schema: dict[str, DType] | Schema | None = None, *, backend: ModuleType | Implementation | str | None = None, - version: Version, # noqa: ARG001 ) -> DataFrame[Any]: from narwhals.series import Series from narwhals.translate import to_native diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index de2ca50cf2..f154fa990c 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -2202,12 +2202,7 @@ def from_dict( backend, native_namespace, emit_deprecation_warning=False ) return _stableify( # type: ignore[no-any-return] - _from_dict_impl( - data, - schema, - backend=backend, - version=Version.V1, - ) + _from_dict_impl(data, schema, backend=backend) ) From 7d9055591f7a459a230e863c0b78fea79136ab94 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 09:21:39 +0000 Subject: [PATCH 09/28] chore: ignore banned imports https://github.com/narwhals-dev/narwhals/pull/1924#issuecomment-2635181448 --- narwhals/schema.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 42e5ed140c..bdf2c6a614 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -99,7 +99,7 @@ def len(self: Self) -> int: return len(self) def to_arrow(self: Self) -> pa.Schema: - import pyarrow as pa + import pyarrow as pa # ignore-banned-import from narwhals._arrow.utils import narwhals_to_native_dtype @@ -111,7 +111,7 @@ def to_arrow(self: Self) -> pa.Schema: def to_pandas( self: Self, *, dtype_backend: str | Iterable[str] | None = None ) -> dict[str, Any]: - import pandas as pd + import pandas as pd # ignore-banned-import from narwhals._pandas_like.utils import narwhals_to_native_dtype @@ -133,7 +133,7 @@ def to_pandas( } def to_polars(self: Self) -> pl.Schema: - import polars as pl + import polars as pl # ignore-banned-import from narwhals._polars.utils import narwhals_to_native_dtype From 89254450a822ef195d0f2277656cc1e1b2b00e48 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 09:57:30 +0000 Subject: [PATCH 10/28] test: adds `test_schema_to_pandas` Resolves https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1941939064 --- tests/frame/schema_test.py | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 1590e4a1f2..ea3511e39c 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -17,6 +17,8 @@ from tests.utils import PANDAS_VERSION if TYPE_CHECKING: + from collections.abc import Iterable + from tests.utils import Constructor from tests.utils import ConstructorEager @@ -330,3 +332,67 @@ def test_all_nulls_pandas() -> None: nw.from_native(pd.Series([None] * 3, dtype="object"), series_only=True).dtype == nw.Object ) + + +@pytest.mark.parametrize( + ("dtype_backend", "expected"), + [ + ( + None, + {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[us]"}, + ), + ( + "numpy", + {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[us]"}, + ), + ( + "pyarrow-nullable", + { + "a": "Int64[pyarrow]", + "b": "string[pyarrow]", + "c": "boolean[pyarrow]", + "d": "Float64[pyarrow]", + "e": "timestamp[us][pyarrow]", + }, + ), + ( + "pandas-nullable", + { + "a": "Int64", + "b": "string", + "c": "boolean", + "d": "Float64", + "e": "datetime64[us]", + }, + ), + ( + [ + "pandas-nullable", + "pyarrow-nullable", + "numpy", + "pyarrow-nullable", + "pandas-nullable", + ], + { + "a": "Int64", + "b": "string[pyarrow]", + "c": "bool", + "d": "Float64[pyarrow]", + "e": "datetime64[us]", + }, + ), + ], +) +def test_schema_to_pandas( + dtype_backend: str | Iterable[str] | None, expected: dict[str, Any] +) -> None: + schema = nw.Schema( + { + "a": nw.Int64(), + "b": nw.String(), + "c": nw.Boolean(), + "d": nw.Float64(), + "e": nw.Datetime("us"), + } + ) + assert schema.to_pandas(dtype_backend=dtype_backend) == expected From 83949f8da3c038556ab7bd491f686ef8064dbb7c Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 10:37:13 +0000 Subject: [PATCH 11/28] test: use `[ns]` instead of `[us]` https://github.com/narwhals-dev/narwhals/actions/runs/13154862799/job/36709604399?pr=1924 --- tests/frame/schema_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index ea3511e39c..94386acc67 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -339,11 +339,11 @@ def test_all_nulls_pandas() -> None: [ ( None, - {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[us]"}, + {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[ns]"}, ), ( "numpy", - {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[us]"}, + {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[ns]"}, ), ( "pyarrow-nullable", @@ -352,7 +352,7 @@ def test_all_nulls_pandas() -> None: "b": "string[pyarrow]", "c": "boolean[pyarrow]", "d": "Float64[pyarrow]", - "e": "timestamp[us][pyarrow]", + "e": "timestamp[ns][pyarrow]", }, ), ( @@ -362,7 +362,7 @@ def test_all_nulls_pandas() -> None: "b": "string", "c": "boolean", "d": "Float64", - "e": "datetime64[us]", + "e": "datetime64[ns]", }, ), ( @@ -378,7 +378,7 @@ def test_all_nulls_pandas() -> None: "b": "string[pyarrow]", "c": "bool", "d": "Float64[pyarrow]", - "e": "datetime64[us]", + "e": "datetime64[ns]", }, ), ], @@ -392,7 +392,7 @@ def test_schema_to_pandas( "b": nw.String(), "c": nw.Boolean(), "d": nw.Float64(), - "e": nw.Datetime("us"), + "e": nw.Datetime("ns"), } ) assert schema.to_pandas(dtype_backend=dtype_backend) == expected From e62221e0e83496530e60ba89cd674c4b3016ad53 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 10:49:40 +0000 Subject: [PATCH 12/28] fix: return `dict` when `pl.Schema` unavailable https://github.com/narwhals-dev/narwhals/actions/runs/13154862799/job/36709604399?pr=1924#step:9:737 --- narwhals/schema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index bdf2c6a614..dd1ee6ac4a 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -132,12 +132,13 @@ def to_pandas( for name, dtype, backend in zip(self.keys(), self.values(), dtype_backend) } - def to_polars(self: Self) -> pl.Schema: + def to_polars(self: Self) -> pl.Schema | Any: import polars as pl # ignore-banned-import from narwhals._polars.utils import narwhals_to_native_dtype - return pl.Schema( + it = ( (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) + return pl.Schema(it) if parse_version(pl.__version__) >= (1, 0, 0) else dict(it) From 347f5ace717792150414614d97c536d08b2e9d49 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 14:00:30 +0000 Subject: [PATCH 13/28] fix: handle unequal length case --- narwhals/schema.py | 24 +++++++++++++++++++++++- tests/frame/schema_test.py | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index dd1ee6ac4a..dca17c8763 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -127,9 +127,31 @@ def to_pandas( for name, dtype in self.items() } else: + backends = tuple(dtype_backend) + if len(backends) != len(self): + from itertools import chain + from itertools import islice + from itertools import repeat + + n_user, n_actual = len(backends), len(self) + suggestion = tuple( + islice( + chain.from_iterable(islice(repeat(backends), n_actual)), n_actual + ) + ) + msg = ( + f"Provided {n_user!r} `dtype_backend`(s), but schema contains {n_actual!r} field(s).\n" + "Hint: instead of\n" + f" schema.to_pandas(dtype_backend={backends})\n" + "you may want to use\n" + f" schema.to_pandas(dtype_backend={backends[0]})\n" + f"or\n" + f" schema.to_pandas(dtype_backend={suggestion})" + ) + raise ValueError(msg) return { name: to_native(dtype=dtype, dtype_backend=backend) - for name, dtype, backend in zip(self.keys(), self.values(), dtype_backend) + for name, dtype, backend in zip(self.keys(), self.values(), backends) } def to_polars(self: Self) -> pl.Schema | Any: diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 94386acc67..c571defec9 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from datetime import date from datetime import datetime from datetime import timedelta @@ -17,7 +18,7 @@ from tests.utils import PANDAS_VERSION if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Sequence from tests.utils import Constructor from tests.utils import ConstructorEager @@ -384,7 +385,7 @@ def test_all_nulls_pandas() -> None: ], ) def test_schema_to_pandas( - dtype_backend: str | Iterable[str] | None, expected: dict[str, Any] + dtype_backend: str | Sequence[str] | None, expected: dict[str, Any] ) -> None: schema = nw.Schema( { @@ -396,3 +397,32 @@ def test_schema_to_pandas( } ) assert schema.to_pandas(dtype_backend=dtype_backend) == expected + + +def test_schema_to_pandas_strict_zip() -> None: + schema = nw.Schema( + { + "a": nw.Int64(), + "b": nw.String(), + "c": nw.Boolean(), + "d": nw.Float64(), + "e": nw.Datetime("ns"), + } + ) + dtype_backend = ["pandas-nullable", "pyarrow-nullable", "numpy"] + tup = ( + "pandas-nullable", + "pyarrow-nullable", + "numpy", + "pandas-nullable", + "pyarrow-nullable", + ) + suggestion = re.escape(f"(dtype_backend={tup})") + with pytest.raises( + ValueError, + match=re.compile( + rf".+3.+but.+schema contains.+5.+field.+Hint.+schema.to_pandas{suggestion}", + re.DOTALL, + ), + ): + schema.to_pandas(dtype_backend=dtype_backend) From d423a0bacead23bb4755b31a4bce3da3031ea21d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 15:24:21 +0000 Subject: [PATCH 14/28] docs: add docs for new methods https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1942667014, https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1942667155, https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1942667344 --- narwhals/schema.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/narwhals/schema.py b/narwhals/schema.py index dca17c8763..896f67543c 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -99,6 +99,15 @@ def len(self: Self) -> int: return len(self) def to_arrow(self: Self) -> pa.Schema: + """Convert Schema to a pyarrow Schema. + + Examples: + >>> import narwhals as nw + >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) + >>> schema.to_arrow() + a: int64 + b: timestamp[ns] + """ import pyarrow as pa # ignore-banned-import from narwhals._arrow.utils import narwhals_to_native_dtype @@ -111,6 +120,24 @@ def to_arrow(self: Self) -> pa.Schema: def to_pandas( self: Self, *, dtype_backend: str | Iterable[str] | None = None ) -> dict[str, Any]: + """Convert Schema to an ordered mapping of column names to their pandas data type. + + Arguments: + dtype_backend: Backend(s) used for the native types. When providing more than + one, the length of the iterable must be equal to the length of the schema. + + Examples: + >>> import narwhals as nw + >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) + >>> schema.to_pandas() + {'a': 'int64', 'b': 'datetime64[ns]'} + + >>> schema.to_pandas(dtype_backend="pyarrow-nullable") + {'a': 'Int64[pyarrow]', 'b': 'timestamp[ns][pyarrow]'} + + >>> schema.to_pandas(dtype_backend=["pandas-nullable", "pyarrow-nullable"]) + {'a': 'Int64', 'b': 'timestamp[ns][pyarrow]'} + """ import pandas as pd # ignore-banned-import from narwhals._pandas_like.utils import narwhals_to_native_dtype @@ -155,6 +182,17 @@ def to_pandas( } def to_polars(self: Self) -> pl.Schema | Any: + """Convert Schema to a polars Schema. + + Returns: + A polars schema or plain dict (prior to polars 1.0). + + Examples: + >>> import narwhals as nw + >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) + >>> schema.to_polars() # doctest:+SKIP + Schema([('a', Int64), ('b', Datetime(time_unit='ns', time_zone=None))]) + """ import polars as pl # ignore-banned-import from narwhals._polars.utils import narwhals_to_native_dtype From fd283374c7e77f23afdcd676b71631968b1261fc Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 5 Feb 2025 15:44:27 +0000 Subject: [PATCH 15/28] docs: add "Returns" to all https://github.com/narwhals-dev/narwhals/issues/1943#issuecomment-2637091363 https://results.pre-commit.ci/run/github/760058710/1738769072.VVgrdRT3RtKAv9VJ_4DEJg --- narwhals/schema.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 896f67543c..f90048a963 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -101,6 +101,9 @@ def len(self: Self) -> int: def to_arrow(self: Self) -> pa.Schema: """Convert Schema to a pyarrow Schema. + Returns: + A pyarrow Schema. + Examples: >>> import narwhals as nw >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) @@ -126,6 +129,9 @@ def to_pandas( dtype_backend: Backend(s) used for the native types. When providing more than one, the length of the iterable must be equal to the length of the schema. + Returns: + An ordered mapping of column names to their pandas data type. + Examples: >>> import narwhals as nw >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) @@ -185,7 +191,7 @@ def to_polars(self: Self) -> pl.Schema | Any: """Convert Schema to a polars Schema. Returns: - A polars schema or plain dict (prior to polars 1.0). + A polars Schema or plain dict (prior to polars 1.0). Examples: >>> import narwhals as nw From 4581d95912f36f6fa664576ab3ebd31888afd2cf Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Feb 2025 17:29:36 +0000 Subject: [PATCH 16/28] refactor: `to_pandas` -> positional or keyword https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1946726033 --- narwhals/functions.py | 6 ++---- narwhals/schema.py | 12 ++++++------ tests/frame/schema_test.py | 6 +++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index e94f0f1960..7eb52ea765 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -520,10 +520,8 @@ def _from_dict_impl( from narwhals._pandas_like.utils import get_dtype_backend pd_schema = Schema(schema).to_pandas( - dtype_backend=( - get_dtype_backend(native_type, eager_backend) - for native_type in native_frame.dtypes - ) + get_dtype_backend(native_type, eager_backend) + for native_type in native_frame.dtypes ) native_frame = native_frame.astype(pd_schema) diff --git a/narwhals/schema.py b/narwhals/schema.py index f90048a963..d25ec8ad41 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -121,7 +121,7 @@ def to_arrow(self: Self) -> pa.Schema: ) def to_pandas( - self: Self, *, dtype_backend: str | Iterable[str] | None = None + self: Self, dtype_backend: str | Iterable[str] | None = None ) -> dict[str, Any]: """Convert Schema to an ordered mapping of column names to their pandas data type. @@ -138,10 +138,10 @@ def to_pandas( >>> schema.to_pandas() {'a': 'int64', 'b': 'datetime64[ns]'} - >>> schema.to_pandas(dtype_backend="pyarrow-nullable") + >>> schema.to_pandas("pyarrow-nullable") {'a': 'Int64[pyarrow]', 'b': 'timestamp[ns][pyarrow]'} - >>> schema.to_pandas(dtype_backend=["pandas-nullable", "pyarrow-nullable"]) + >>> schema.to_pandas(["pandas-nullable", "pyarrow-nullable"]) {'a': 'Int64', 'b': 'timestamp[ns][pyarrow]'} """ import pandas as pd # ignore-banned-import @@ -175,11 +175,11 @@ def to_pandas( msg = ( f"Provided {n_user!r} `dtype_backend`(s), but schema contains {n_actual!r} field(s).\n" "Hint: instead of\n" - f" schema.to_pandas(dtype_backend={backends})\n" + f" schema.to_pandas({backends})\n" "you may want to use\n" - f" schema.to_pandas(dtype_backend={backends[0]})\n" + f" schema.to_pandas({backends[0]})\n" f"or\n" - f" schema.to_pandas(dtype_backend={suggestion})" + f" schema.to_pandas({suggestion})" ) raise ValueError(msg) return { diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index c571defec9..427948913b 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -396,7 +396,7 @@ def test_schema_to_pandas( "e": nw.Datetime("ns"), } ) - assert schema.to_pandas(dtype_backend=dtype_backend) == expected + assert schema.to_pandas(dtype_backend) == expected def test_schema_to_pandas_strict_zip() -> None: @@ -417,7 +417,7 @@ def test_schema_to_pandas_strict_zip() -> None: "pandas-nullable", "pyarrow-nullable", ) - suggestion = re.escape(f"(dtype_backend={tup})") + suggestion = re.escape(f"({tup})") with pytest.raises( ValueError, match=re.compile( @@ -425,4 +425,4 @@ def test_schema_to_pandas_strict_zip() -> None: re.DOTALL, ), ): - schema.to_pandas(dtype_backend=dtype_backend) + schema.to_pandas(dtype_backend) From 95a123a644215683fec0eb9619e25103b915d649 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Feb 2025 17:32:37 +0000 Subject: [PATCH 17/28] refactor: use `Implementation.PANDAS` https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1946878340 --- narwhals/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index d25ec8ad41..0466bbb431 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -150,7 +150,7 @@ def to_pandas( to_native = partial( narwhals_to_native_dtype, - implementation=Implementation.from_native_namespace(pd), + implementation=Implementation.PANDAS, backend_version=parse_version(pd.__version__), version=self._version, ) From 47ad060eb7ac8b4e60b765e879ff5c1b30c137db Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Feb 2025 17:39:17 +0000 Subject: [PATCH 18/28] test: try removing doctest skip https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1946730458 --- narwhals/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 0466bbb431..bdf3751211 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -196,7 +196,7 @@ def to_polars(self: Self) -> pl.Schema | Any: Examples: >>> import narwhals as nw >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) - >>> schema.to_polars() # doctest:+SKIP + >>> schema.to_polars() Schema([('a', Int64), ('b', Datetime(time_unit='ns', time_zone=None))]) """ import polars as pl # ignore-banned-import From 99b3925ed14e370736551fd19280b5a3fdb81ee3 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Feb 2025 18:18:24 +0000 Subject: [PATCH 19/28] test: fix `to_polars` doctest repr https://github.com/narwhals-dev/narwhals/actions/runs/13205078790/job/36866210140?pr=1924#step:10:347 --- narwhals/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index bdf3751211..0e56dc0530 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -197,7 +197,7 @@ def to_polars(self: Self) -> pl.Schema | Any: >>> import narwhals as nw >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")}) >>> schema.to_polars() - Schema([('a', Int64), ('b', Datetime(time_unit='ns', time_zone=None))]) + Schema({'a': Int64, 'b': Datetime(time_unit='ns', time_zone=None)}) """ import polars as pl # ignore-banned-import From 881aae0219482613098725d9a6d1986a78e983ca Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 7 Feb 2025 18:35:16 +0000 Subject: [PATCH 20/28] style: rename `it` to `schema` https://github.com/narwhals-dev/narwhals/pull/1924#discussion_r1946727454 --- narwhals/schema.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 0e56dc0530..1c25286e27 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -203,8 +203,12 @@ def to_polars(self: Self) -> pl.Schema | Any: from narwhals._polars.utils import narwhals_to_native_dtype - it = ( + schema = ( (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - return pl.Schema(it) if parse_version(pl.__version__) >= (1, 0, 0) else dict(it) + return ( + pl.Schema(schema) + if parse_version(pl.__version__) >= (1, 0, 0) + else dict(schema) + ) From 5e9beb2382ad9644515872b2f2380fd75a52f1b8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:25:11 +0000 Subject: [PATCH 21/28] match pandas dtype_backend --- narwhals/_pandas_like/series.py | 4 +- narwhals/_pandas_like/utils.py | 112 +++++++++++++++----------------- narwhals/functions.py | 9 +-- narwhals/schema.py | 14 ++-- narwhals/typing.py | 1 + tests/frame/schema_test.py | 38 ++++++----- 6 files changed, 89 insertions(+), 89 deletions(-) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index a652ea7a99..bb59a439c4 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -241,14 +241,14 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: dtype_backend = get_dtype_backend( dtype=ser.dtype, implementation=self._implementation ) - dtype = narwhals_to_native_dtype( + pd_dtype = narwhals_to_native_dtype( dtype, dtype_backend=dtype_backend, implementation=self._implementation, backend_version=self._backend_version, version=self._version, ) - return self._from_native_series(ser.astype(dtype)) + return self._from_native_series(ser.astype(pd_dtype)) def item(self: Self, index: int | None) -> Any: # cuDF doesn't have Series.item(). diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index e02807de95..99c1892307 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -23,10 +23,13 @@ T = TypeVar("T") if TYPE_CHECKING: + from pandas._typing import Dtype as PandasDtype + from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr from narwhals._pandas_like.series import PandasLikeSeries from narwhals.dtypes import DType + from narwhals.typing import DTypeBackend ExprT = TypeVar("ExprT", bound=PandasLikeExpr) @@ -499,113 +502,106 @@ def native_to_narwhals_dtype( raise AssertionError(msg) -def get_dtype_backend(dtype: Any, implementation: Implementation) -> str: +def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBackend: + """Get dtype backend for pandas type. + + Matches pandas' `dtype_backend` argument in `convert_dtypes`. + """ if implementation in {Implementation.PANDAS, Implementation.MODIN}: import pandas as pd if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype): - return "pyarrow-nullable" + return "pyarrow" with suppress(AttributeError): if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype): - return "pandas-nullable" - return "numpy" - else: # pragma: no cover - return "numpy" + return "numpy_nullable" + return None def narwhals_to_native_dtype( # noqa: PLR0915 dtype: DType | type[DType], - dtype_backend: str | None, + dtype_backend: DTypeBackend, implementation: Implementation, backend_version: tuple[int, ...], version: Version, -) -> Any: +) -> str | PandasDtype: + if dtype_backend is not None and dtype_backend not in {"pyarrow", "numpy_nullable"}: + msg = f"Expected one of {{None, 'pyarrow', 'numpy_nullable'}}, got: '{dtype_backend}'" + raise ValueError(msg) dtypes = import_dtypes_module(version) if isinstance_or_issubclass(dtype, dtypes.Float64): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "Float64[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "Float64" - else: - return "float64" + return "float64" if isinstance_or_issubclass(dtype, dtypes.Float32): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "Float32[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "Float32" - else: - return "float32" + return "float32" if isinstance_or_issubclass(dtype, dtypes.Int64): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "Int64[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "Int64" - else: - return "int64" + return "int64" if isinstance_or_issubclass(dtype, dtypes.Int32): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "Int32[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "Int32" - else: - return "int32" + return "int32" if isinstance_or_issubclass(dtype, dtypes.Int16): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "Int16[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "Int16" - else: - return "int16" + return "int16" if isinstance_or_issubclass(dtype, dtypes.Int8): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "Int8[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "Int8" - else: - return "int8" + return "int8" if isinstance_or_issubclass(dtype, dtypes.UInt64): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "UInt64[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "UInt64" - else: - return "uint64" + return "uint64" if isinstance_or_issubclass(dtype, dtypes.UInt32): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "UInt32[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "UInt32" - else: - return "uint32" + return "uint32" if isinstance_or_issubclass(dtype, dtypes.UInt16): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "UInt16[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "UInt16" - else: - return "uint16" + return "uint16" if isinstance_or_issubclass(dtype, dtypes.UInt8): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "UInt8[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "UInt8" - else: - return "uint8" + return "uint8" if isinstance_or_issubclass(dtype, dtypes.String): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "string[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "string" - else: - return str + return str if isinstance_or_issubclass(dtype, dtypes.Boolean): - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": return "boolean[pyarrow]" - if dtype_backend == "pandas-nullable": + elif dtype_backend == "numpy_nullable": return "boolean" - else: - return "bool" + return "bool" if isinstance_or_issubclass(dtype, dtypes.Categorical): # TODO(Unassigned): is there no pyarrow-backed categorical? # or at least, convert_dtypes(dtype_backend='pyarrow') doesn't @@ -622,7 +618,7 @@ def narwhals_to_native_dtype( # noqa: PLR0915 ): # pragma: no cover dt_time_unit = "ns" - if dtype_backend == "pyarrow-nullable": + if dtype_backend == "pyarrow": tz_part = f", tz={dt_time_zone}" if dt_time_zone else "" return f"timestamp[{dt_time_unit}{tz_part}][pyarrow]" else: @@ -636,7 +632,7 @@ def narwhals_to_native_dtype( # noqa: PLR0915 dt_time_unit = "ns" return ( f"duration[{du_time_unit}][pyarrow]" - if dtype_backend == "pyarrow-nullable" + if dtype_backend == "pyarrow" else f"timedelta64[{du_time_unit}]" ) if isinstance_or_issubclass(dtype, dtypes.Date): diff --git a/narwhals/functions.py b/narwhals/functions.py index 7eb52ea765..1597428d35 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -343,10 +343,11 @@ def _new_series_impl( ) backend_version = parse_version(native_namespace.__version__) - dtype = pandas_like_narwhals_to_native_dtype( + pd_dtype = pandas_like_narwhals_to_native_dtype( dtype, None, implementation, backend_version, version ) - native_series = native_namespace.Series(values, name=name, dtype=dtype) + native_series = native_namespace.Series(values, name=name, dtype=pd_dtype) + native_series = native_namespace.Series(values, name=name) elif implementation is Implementation.PYARROW: if dtype: @@ -735,7 +736,7 @@ def _from_numpy_impl( ) backend_version = parse_version(native_namespace.__version__) - schema = { + pd_schema = { name: pandas_like_narwhals_to_native_dtype( dtype=schema[name], dtype_backend=get_dtype_backend(native_type, implementation), @@ -746,7 +747,7 @@ def _from_numpy_impl( for name, native_type in schema.items() } native_frame = native_namespace.DataFrame(data, columns=schema.keys()).astype( - schema + pd_schema ) elif isinstance(schema, list): native_frame = native_namespace.DataFrame(data, columns=schema) diff --git a/narwhals/schema.py b/narwhals/schema.py index 1c25286e27..fee9de99ba 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -25,6 +25,7 @@ from typing_extensions import Self from narwhals.dtypes import DType + from narwhals.typing import DTypeBackend BaseSchema = OrderedDict[str, DType] else: @@ -121,7 +122,7 @@ def to_arrow(self: Self) -> pa.Schema: ) def to_pandas( - self: Self, dtype_backend: str | Iterable[str] | None = None + self: Self, dtype_backend: DTypeBackend | Iterable[DTypeBackend] = None ) -> dict[str, Any]: """Convert Schema to an ordered mapping of column names to their pandas data type. @@ -138,17 +139,14 @@ def to_pandas( >>> schema.to_pandas() {'a': 'int64', 'b': 'datetime64[ns]'} - >>> schema.to_pandas("pyarrow-nullable") + >>> schema.to_pandas("pyarrow") {'a': 'Int64[pyarrow]', 'b': 'timestamp[ns][pyarrow]'} - - >>> schema.to_pandas(["pandas-nullable", "pyarrow-nullable"]) - {'a': 'Int64', 'b': 'timestamp[ns][pyarrow]'} """ import pandas as pd # ignore-banned-import from narwhals._pandas_like.utils import narwhals_to_native_dtype - to_native = partial( + to_native_dtype = partial( narwhals_to_native_dtype, implementation=Implementation.PANDAS, backend_version=parse_version(pd.__version__), @@ -156,7 +154,7 @@ def to_pandas( ) if dtype_backend is None or isinstance(dtype_backend, str): return { - name: to_native(dtype=dtype, dtype_backend=dtype_backend) + name: to_native_dtype(dtype=dtype, dtype_backend=dtype_backend) for name, dtype in self.items() } else: @@ -183,7 +181,7 @@ def to_pandas( ) raise ValueError(msg) return { - name: to_native(dtype=dtype, dtype_backend=backend) + name: to_native_dtype(dtype=dtype, dtype_backend=backend) for name, dtype, backend in zip(self.keys(), self.values(), backends) } diff --git a/narwhals/typing.py b/narwhals/typing.py index 0c284cfa34..4e6e0815d4 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -244,6 +244,7 @@ def lit( ... return s.abs().to_native() """ +DTypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"] | None SizeUnit: TypeAlias = Literal[ "b", "kb", diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 427948913b..ae33ef0151 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -20,6 +20,7 @@ if TYPE_CHECKING: from collections.abc import Sequence + from narwhals.typing import DTypeBackend from tests.utils import Constructor from tests.utils import ConstructorEager @@ -343,11 +344,7 @@ def test_all_nulls_pandas() -> None: {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[ns]"}, ), ( - "numpy", - {"a": "int64", "b": str, "c": "bool", "d": "float64", "e": "datetime64[ns]"}, - ), - ( - "pyarrow-nullable", + "pyarrow", { "a": "Int64[pyarrow]", "b": "string[pyarrow]", @@ -357,7 +354,7 @@ def test_all_nulls_pandas() -> None: }, ), ( - "pandas-nullable", + "numpy_nullable", { "a": "Int64", "b": "string", @@ -368,11 +365,11 @@ def test_all_nulls_pandas() -> None: ), ( [ - "pandas-nullable", - "pyarrow-nullable", - "numpy", - "pyarrow-nullable", - "pandas-nullable", + "numpy_nullable", + "pyarrow", + None, + "pyarrow", + "numpy_nullable", ], { "a": "Int64", @@ -385,7 +382,7 @@ def test_all_nulls_pandas() -> None: ], ) def test_schema_to_pandas( - dtype_backend: str | Sequence[str] | None, expected: dict[str, Any] + dtype_backend: DTypeBackend | Sequence[DTypeBackend] | None, expected: dict[str, Any] ) -> None: schema = nw.Schema( { @@ -409,13 +406,13 @@ def test_schema_to_pandas_strict_zip() -> None: "e": nw.Datetime("ns"), } ) - dtype_backend = ["pandas-nullable", "pyarrow-nullable", "numpy"] + dtype_backend: list[DTypeBackend] = ["numpy_nullable", "pyarrow", None] tup = ( - "pandas-nullable", - "pyarrow-nullable", + "numpy_nullable", + "pyarrow", "numpy", - "pandas-nullable", - "pyarrow-nullable", + "numpy_nullable", + "pyarrow", ) suggestion = re.escape(f"({tup})") with pytest.raises( @@ -426,3 +423,10 @@ def test_schema_to_pandas_strict_zip() -> None: ), ): schema.to_pandas(dtype_backend) + + +def test_schema_to_pandas_invalid() -> None: + schema = nw.Schema({"a": nw.Int64()}) + msg = "Expected one of {None, 'pyarrow', 'numpy_nullable'}, got: 'cabbage'" + with pytest.raises(ValueError, match=msg): + schema.to_pandas("cabbage") # type: ignore[arg-type] From bae64aff8c4fa98a174f5d41ae7bd2e89c485e8f Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:28:26 +0000 Subject: [PATCH 22/28] py39 compat --- narwhals/typing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/narwhals/typing.py b/narwhals/typing.py index 4e6e0815d4..3179f34835 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -5,6 +5,7 @@ from typing import Callable from typing import Generic from typing import Literal +from typing import Optional from typing import Protocol from typing import Sequence from typing import TypeVar @@ -244,7 +245,7 @@ def lit( ... return s.abs().to_native() """ -DTypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"] | None +DTypeBackend: TypeAlias = Optional[Literal["pyarrow", "numpy_nullable"]] SizeUnit: TypeAlias = Literal[ "b", "kb", From b9019576237319f06611f7ad9c1d68359623db6c Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:37:18 +0000 Subject: [PATCH 23/28] remove Any in return from Schema.to_polars --- narwhals/schema.py | 10 ++++------ narwhals/typing.py | 3 +-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index fee9de99ba..5253860908 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -185,7 +185,7 @@ def to_pandas( for name, dtype, backend in zip(self.keys(), self.values(), backends) } - def to_polars(self: Self) -> pl.Schema | Any: + def to_polars(self: Self) -> pl.Schema: """Convert Schema to a polars Schema. Returns: @@ -205,8 +205,6 @@ def to_polars(self: Self) -> pl.Schema | Any: (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - return ( - pl.Schema(schema) - if parse_version(pl.__version__) >= (1, 0, 0) - else dict(schema) - ) + if parse_version(pl.__version__) >= (1, 0, 0): + return pl.Schema(schema) + return dict(schema) # type: ignore[return-value] diff --git a/narwhals/typing.py b/narwhals/typing.py index 3179f34835..68d725b51d 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -5,7 +5,6 @@ from typing import Callable from typing import Generic from typing import Literal -from typing import Optional from typing import Protocol from typing import Sequence from typing import TypeVar @@ -245,7 +244,7 @@ def lit( ... return s.abs().to_native() """ -DTypeBackend: TypeAlias = Optional[Literal["pyarrow", "numpy_nullable"]] +DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None' SizeUnit: TypeAlias = Literal[ "b", "kb", From bde48a8daf301378727955fdc628f2939afdb96b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:38:27 +0000 Subject: [PATCH 24/28] fixup --- tests/frame/schema_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index ae33ef0151..33cfadb5b2 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -410,7 +410,7 @@ def test_schema_to_pandas_strict_zip() -> None: tup = ( "numpy_nullable", "pyarrow", - "numpy", + None, "numpy_nullable", "pyarrow", ) From 0133050ade6a298623b1378a3d6ceb4e973043b8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:55:08 +0000 Subject: [PATCH 25/28] missing else --- narwhals/functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 1597428d35..0d0a1e29c7 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -347,7 +347,8 @@ def _new_series_impl( dtype, None, implementation, backend_version, version ) native_series = native_namespace.Series(values, name=name, dtype=pd_dtype) - native_series = native_namespace.Series(values, name=name) + else: + native_series = native_namespace.Series(values, name=name) elif implementation is Implementation.PYARROW: if dtype: From 5c26e9aebdaf638ae3a5d88a9b1d3e5eef223b21 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 8 Feb 2025 16:00:43 +0000 Subject: [PATCH 26/28] coverage --- narwhals/_pandas_like/utils.py | 16 +++++++--------- narwhals/schema.py | 6 +++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 99c1892307..518d2e8f43 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -507,15 +507,13 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBacken Matches pandas' `dtype_backend` argument in `convert_dtypes`. """ - if implementation in {Implementation.PANDAS, Implementation.MODIN}: - import pandas as pd - - if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype): - return "pyarrow" - - with suppress(AttributeError): - if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype): - return "numpy_nullable" + if implementation is Implementation.CUDF: + return None + if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype): + return "pyarrow" + with suppress(AttributeError): + if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype): + return "numpy_nullable" return None diff --git a/narwhals/schema.py b/narwhals/schema.py index 5253860908..df41153038 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -205,6 +205,6 @@ def to_polars(self: Self) -> pl.Schema: (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - if parse_version(pl.__version__) >= (1, 0, 0): - return pl.Schema(schema) - return dict(schema) # type: ignore[return-value] + if parse_version(pl.__version__) < (1, 0, 0): # pragma: no cover + return dict(schema) # type: ignore[return-value] + return pl.Schema(schema) From 3bcebc1c2b75eddebf3e88544e0cb312b9361020 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 8 Feb 2025 16:43:33 +0000 Subject: [PATCH 27/28] refactor(typing): lie more explicitly https://github.com/narwhals-dev/narwhals/pull/1924/files#r1947895133 --- narwhals/schema.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index df41153038..4f6da44166 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING from typing import Iterable from typing import Mapping +from typing import cast from narwhals.utils import Implementation from narwhals.utils import Version @@ -205,6 +206,6 @@ def to_polars(self: Self) -> pl.Schema: (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - if parse_version(pl.__version__) < (1, 0, 0): # pragma: no cover - return dict(schema) # type: ignore[return-value] - return pl.Schema(schema) + if parse_version(pl.__version__) >= (1, 0, 0): # pragma: no cover + return pl.Schema(schema) + return cast("pl.Schema", dict(schema)) From 7b89936e1af2ce1906de555ad5e6c5f678b2c5d1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 8 Feb 2025 16:47:21 +0000 Subject: [PATCH 28/28] chore: move pragma https://github.com/narwhals-dev/narwhals/actions/runs/13217489312/job/36898464609?pr=1924 --- narwhals/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/schema.py b/narwhals/schema.py index 4f6da44166..8749ef4be3 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -206,6 +206,6 @@ def to_polars(self: Self) -> pl.Schema: (name, narwhals_to_native_dtype(dtype, self._version)) for name, dtype in self.items() ) - if parse_version(pl.__version__) >= (1, 0, 0): # pragma: no cover + if parse_version(pl.__version__) >= (1, 0, 0): return pl.Schema(schema) - return cast("pl.Schema", dict(schema)) + return cast("pl.Schema", dict(schema)) # pragma: no cover