Skip to content

Commit

Permalink
enh: Deprecate native_namespace in favour of backend in `from_dic…
Browse files Browse the repository at this point in the history
…t` (#1931)
  • Loading branch information
raisadz authored Feb 4, 2025
1 parent 4d0c9c5 commit 71a5bc5
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 71 deletions.
8 changes: 4 additions & 4 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,16 +777,16 @@ def unique(
def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyFrame:
from narwhals.utils import parse_version

pandas_df = self.to_pandas()
if backend is None:
return self
elif backend is Implementation.DUCKDB:
import duckdb # ignore-banned-import

from narwhals._duckdb.dataframe import DuckDBLazyFrame

df = self._native_frame # noqa: F841
return DuckDBLazyFrame(
df=duckdb.table("df"),
df=duckdb.table("pandas_df"),
backend_version=parse_version(duckdb.__version__),
version=self._version,
validate_column_names=False,
Expand All @@ -797,7 +797,7 @@ def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyF
from narwhals._polars.dataframe import PolarsLazyFrame

return PolarsLazyFrame(
df=pl.from_pandas(self._native_frame).lazy(),
df=pl.from_pandas(pandas_df).lazy(),
backend_version=parse_version(pl.__version__),
version=self._version,
)
Expand All @@ -808,7 +808,7 @@ def lazy(self: Self, *, backend: Implementation | None = None) -> CompliantLazyF
from narwhals._dask.dataframe import DaskLazyFrame

return DaskLazyFrame(
native_dataframe=dd.from_pandas(self._native_frame),
native_dataframe=dd.from_pandas(pandas_df),
backend_version=parse_version(dask.__version__),
version=self._version,
validate_column_names=False,
Expand Down
1 change: 0 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,6 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob

def lazy(
self: Self,
*,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.
Expand Down
75 changes: 51 additions & 24 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from narwhals.utils import flatten
from narwhals.utils import parse_version
from narwhals.utils import validate_laziness
from narwhals.utils import validate_native_namespace_and_backend

# Missing type parameters for generic type "DataFrame"
# However, trying to provide one results in mypy still complaining...
Expand Down Expand Up @@ -374,6 +375,7 @@ def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Expand All @@ -388,9 +390,22 @@ def from_dict(
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}.
native_namespace: The native library to use for DataFrame creation. Only
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.
`backend` can be specified in various ways:
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
native_namespace: The native library to use for DataFrame creation.
**Deprecated** (v1.26.0):
Please use `backend` instead. Note that `native_namespace` is still available
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
see [perfect backwards compatibility policy](../backcompat.md/).
Returns:
A new DataFrame.
Expand All @@ -400,24 +415,20 @@ def from_dict(
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
Let's create a new dataframe and specify the backend argument.
>>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT:
... new_data = {"c": [5, 2], "d": [1, 4]}
... native_namespace = nw.get_native_namespace(df_native)
... return nw.from_dict(
... new_data, native_namespace=native_namespace
... ).to_native()
>>> def agnostic_from_dict(backend: str) -> IntoFrameT:
... data = {"c": [5, 2], "d": [1, 4]}
... return nw.from_dict(data, backend=backend).to_native()
Let's see what happens when passing pandas, Polars or PyArrow input:
>>> agnostic_from_dict(pd.DataFrame(data))
>>> agnostic_from_dict(backend="pandas")
c d
0 5 1
1 2 4
>>> agnostic_from_dict(pl.DataFrame(data))
>>> agnostic_from_dict(backend="polars")
shape: (2, 2)
┌─────┬─────┐
│ c ┆ d │
Expand All @@ -427,27 +438,30 @@ def from_dict(
│ 5 ┆ 1 │
│ 2 ┆ 4 │
└─────┴─────┘
>>> agnostic_from_dict(pa.table(data))
>>> agnostic_from_dict(backend="pyarrow")
pyarrow.Table
c: int64
d: int64
----
c: [[5,2]]
d: [[1,4]]
"""
backend = validate_native_namespace_and_backend(
backend, native_namespace, emit_deprecation_warning=True
)
return _from_dict_impl(
data,
schema,
native_namespace=native_namespace,
backend=backend,
version=Version.MAIN,
)


def _from_dict_impl(
def _from_dict_impl( # noqa: PLR0915
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
native_namespace: ModuleType | None = None,
backend: ModuleType | Implementation | str | None = None,
version: Version,
) -> DataFrame[Any]:
from narwhals.series import Series
Expand All @@ -456,18 +470,31 @@ def _from_dict_impl(
if not data:
msg = "from_dict cannot be called with empty dictionary"
raise ValueError(msg)
if native_namespace is None:
if backend is None:
for val in data.values():
if isinstance(val, Series):
native_namespace = val.__native_namespace__()
break
else:
msg = "Calling `from_dict` without `native_namespace` is only supported if all input values are already Narwhals Series"
msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
raise TypeError(msg)
data = {key: to_native(value, pass_through=True) for key, value in data.items()}
implementation = Implementation.from_native_namespace(native_namespace)
eager_backend = Implementation.from_native_namespace(native_namespace)
else:
eager_backend = Implementation.from_backend(backend)
native_namespace = eager_backend.to_native_namespace()

if implementation is Implementation.POLARS:
supported_eager_backends = (
Implementation.POLARS,
Implementation.PANDAS,
Implementation.PYARROW,
Implementation.MODIN,
Implementation.CUDF,
)
if eager_backend is not None and eager_backend not in supported_eager_backends:
msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
raise ValueError(msg)
if eager_backend is Implementation.POLARS:
if schema:
from narwhals._polars.utils import (
narwhals_to_native_dtype as polars_narwhals_to_native_dtype,
Expand All @@ -481,11 +508,11 @@ def _from_dict_impl(
schema_pl = None

native_frame = native_namespace.from_dict(data, schema=schema_pl)
elif implementation in {
elif eager_backend in (
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
}:
):
aligned_data = {}
left_most_series = None
for key, native_series in data.items():
Expand Down Expand Up @@ -515,16 +542,16 @@ def _from_dict_impl(
schema = {
name: pandas_like_narwhals_to_native_dtype(
dtype=schema[name],
dtype_backend=get_dtype_backend(native_type, implementation),
implementation=implementation,
dtype_backend=get_dtype_backend(native_type, eager_backend),
implementation=eager_backend,
backend_version=backend_version,
version=version,
)
for name, native_type in native_frame.dtypes.items()
}
native_frame = native_frame.astype(schema)

elif implementation is Implementation.PYARROW:
elif eager_backend is Implementation.PYARROW:
if schema:
from narwhals._arrow.utils import (
narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
Expand Down
23 changes: 20 additions & 3 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from narwhals.utils import maybe_get_index
from narwhals.utils import maybe_reset_index
from narwhals.utils import maybe_set_index
from narwhals.utils import validate_native_namespace_and_backend
from narwhals.utils import validate_strict_and_pass_though

if TYPE_CHECKING:
Expand Down Expand Up @@ -169,7 +170,6 @@ def __getitem__(self: Self, item: Any) -> Any:

def lazy(
self: Self,
*,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.
Expand Down Expand Up @@ -2162,6 +2162,7 @@ def from_dict(
data: dict[str, Any],
schema: dict[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Expand All @@ -2176,17 +2177,33 @@ def from_dict(
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}.
native_namespace: The native library to use for DataFrame creation. Only
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.
`backend` can be specified in various ways:
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
native_namespace: The native library to use for DataFrame creation.
**Deprecated** (v1.26.0):
Please use `backend` instead. Note that `native_namespace` is still available
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
see [perfect backwards compatibility policy](../backcompat.md/).
Returns:
A new DataFrame.
"""
backend = validate_native_namespace_and_backend(
backend, native_namespace, emit_deprecation_warning=False
)
return _stableify( # type: ignore[no-any-return]
_from_dict_impl(
data,
schema,
native_namespace=native_namespace,
backend=backend,
version=Version.V1,
)
)
Expand Down
66 changes: 56 additions & 10 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,41 @@ def to_native_namespace(self: Self) -> ModuleType:
Returns:
Native module.
"""
mapping = {
Implementation.PANDAS: get_pandas(),
Implementation.MODIN: get_modin(),
Implementation.CUDF: get_cudf(),
Implementation.PYARROW: get_pyarrow(),
Implementation.PYSPARK: get_pyspark_sql(),
Implementation.POLARS: get_polars(),
Implementation.DASK: get_dask_dataframe(),
}
return mapping[self] # type: ignore[no-any-return]
if self is Implementation.PANDAS:
import pandas as pd # ignore-banned-import

return pd # type: ignore[no-any-return]
if self is Implementation.MODIN:
import modin.pandas

return modin.pandas # type: ignore[no-any-return]
if self is Implementation.CUDF: # pragma: no cover
import cudf # ignore-banned-import

return cudf # type: ignore[no-any-return]
if self is Implementation.PYARROW:
import pyarrow as pa # ignore-banned-import

return pa # type: ignore[no-any-return]
if self is Implementation.PYSPARK: # pragma: no cover
import pyspark.sql

return pyspark.sql # type: ignore[no-any-return]
if self is Implementation.POLARS:
import polars as pl # ignore-banned-import

return pl
if self is Implementation.DASK:
import dask.dataframe # ignore-banned-import

return dask.dataframe # type: ignore[no-any-return]

if self is Implementation.DUCKDB:
import duckdb # ignore-banned-import

return duckdb # type: ignore[no-any-return]
msg = "Not supported Implementation" # pragma: no cover
raise AssertionError(msg)

def is_pandas(self: Self) -> bool:
"""Return whether implementation is pandas.
Expand Down Expand Up @@ -1042,6 +1067,27 @@ def validate_strict_and_pass_though(
return pass_through


def validate_native_namespace_and_backend(
backend: ModuleType | Implementation | str | None = None,
native_namespace: ModuleType | None = None,
*,
emit_deprecation_warning: bool,
) -> ModuleType | Implementation | str | None:
if native_namespace is not None and backend is None: # pragma: no cover
if emit_deprecation_warning:
msg = (
"`native_namespace` is deprecated, please use `pass_through` instead.\n\n"
"Note: `native_namespace` will remain available in `narwhals.stable.v1`.\n"
"See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
)
issue_deprecation_warning(msg, _version="1.25.1")
backend = native_namespace
elif native_namespace is not None and backend is not None:
msg = "Can't pass both `native_namespace` and `backend`"
raise ValueError(msg)
return backend


def _validate_rolling_arguments(
window_size: int, min_samples: int | None
) -> tuple[int, int]:
Expand Down
3 changes: 0 additions & 3 deletions tests/frame/lazy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,9 @@ def test_lazy_to_default(constructor_eager: ConstructorEager) -> None:
],
)
def test_lazy_backend(
request: pytest.FixtureRequest,
constructor_eager: ConstructorEager,
backend: Implementation | str,
) -> None:
if "modin" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
if (backend is Implementation.DASK) or backend == "dask":
pytest.importorskip("dask")
if (backend is Implementation.DUCKDB) or backend == "duckdb":
Expand Down
Loading

0 comments on commit 71a5bc5

Please sign in to comment.