From 48f7d805a4d72a009ae9ab45f34a7ce6843ba904 Mon Sep 17 00:00:00 2001 From: anopsy Date: Wed, 21 Aug 2024 12:25:57 +0200 Subject: [PATCH 1/4] dask-quantile-draft --- narwhals/_dask/expr.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index faedb6095..4dd8b9a1b 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable +from typing import Literal from typing import NoReturn from narwhals._dask.utils import add_row_index @@ -515,6 +516,22 @@ def len(self: Self) -> Self: returns_scalar=True, ) + def quantile( + self: Self, + quantile: float, + interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], + ) -> Self: + if interpolation == "nearest": + return self._from_call( + lambda _input, quantile: _input.quantile(q=quantile, method="dask"), + "quantile", + quantile, + returns_scalar=True, + ) + else: + msg = "`higher`, `lower`, `midpoint`, `linear` - interpolation methods are not supported by Dask. Please use `nearest` instead." + raise NotImplementedError(msg) + def is_first_distinct(self: Self) -> Self: def func(_input: Any) -> Any: _name = _input.name From 76d229d83235ffbbd5be152c3b6f75222f7997ff Mon Sep 17 00:00:00 2001 From: anopsy Date: Wed, 21 Aug 2024 14:49:21 +0200 Subject: [PATCH 2/4] dask-expr-quantile with test --- narwhals/_dask/expr.py | 4 ++-- tests/expr_and_series/quantile_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 4dd8b9a1b..84288cf53 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -521,7 +521,7 @@ def quantile( quantile: float, interpolation: Literal["nearest", "higher", "lower", "midpoint", "linear"], ) -> Self: - if interpolation == "nearest": + if interpolation == "linear": return self._from_call( lambda _input, quantile: _input.quantile(q=quantile, method="dask"), "quantile", @@ -529,7 +529,7 @@ def quantile( returns_scalar=True, ) else: - msg = "`higher`, `lower`, `midpoint`, `linear` - interpolation methods are not supported by Dask. Please use `nearest` instead." + msg = "`higher`, `lower`, `midpoint`, `nearest` - interpolation methods are not supported by Dask. Please use `linear` instead." raise NotImplementedError(msg) def is_first_distinct(self: Self) -> Self: diff --git a/tests/expr_and_series/quantile_test.py b/tests/expr_and_series/quantile_test.py index 8cb9320c7..d9064541f 100644 --- a/tests/expr_and_series/quantile_test.py +++ b/tests/expr_and_series/quantile_test.py @@ -26,7 +26,7 @@ def test_quantile_expr( expected: dict[str, list[float]], request: Any, ) -> None: - if "dask" in str(constructor): + if "dask" in str(constructor) and interpolation != "linear": request.applymarker(pytest.mark.xfail) q = 0.3 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} From 8b310fd4ca68d75d75e40378771f318753175e49 Mon Sep 17 00:00:00 2001 From: anopsy Date: Wed, 21 Aug 2024 15:09:49 +0200 Subject: [PATCH 3/4] dask-quantile with test and docs adjusted --- narwhals/expr.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/narwhals/expr.py b/narwhals/expr.py index a8097a2a2..900538b04 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1539,6 +1539,9 @@ def quantile( Note: pandas and Polars may have implementation differences for a given interpolation method. + Note: + dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' instead + Arguments: quantile : float Quantile between 0.0 and 1.0. From 5b1c94cf62fce0393b7d10e29526061407a2c272 Mon Sep 17 00:00:00 2001 From: anopsy Date: Wed, 21 Aug 2024 17:35:38 +0200 Subject: [PATCH 4/4] dask-quantile with test and docs adjusted --- narwhals/expr.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 900538b04..cabdfb4d1 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1537,10 +1537,9 @@ def quantile( r"""Get quantile value. Note: - pandas and Polars may have implementation differences for a given interpolation method. - - Note: - dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' instead + * pandas and Polars may have implementation differences for a given interpolation method. + * [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' + as interpolation method - use 'linear' which is closest to the native 'dask' - method. Arguments: quantile : float