diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4b7570b3b..0a203a4cd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,7 @@ Changelog v0.55.0 (unreleased) -------------------- -Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`). +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`), Sascha Hofmann (:user:`saschahofmann`). New indicators ^^^^^^^^^^^^^^ @@ -16,12 +16,17 @@ New features and enhancements * New function ``ensemble.partition.general_partition`` (:pull:`2035`) * Added a new ``xclim.indices.generic.bivariate_count_occurrences`` function to count instances where operations and performed and validated for two variables. (:pull:`2030`). * `xclim` now tracks energy usage and carbon emissions ("last run", "average", and "total") during CI workflows using the `eco-ci-energy-estimation` GitHub Action. (:pull:`2046`). +* ``xclim.testing.helpers.test_timeseries`` (:pull:`2019`) now accepts a `calendar` argument thats forwarded to ``xr.cftime_range``. Internal changes ^^^^^^^^^^^^^^^^ * `sphinx-codeautolink` and `pygments` have been temporarily pinned due to breaking API changes. (:pull:`2030`). * Adjusted the ``TestOfficialYaml`` test to use a dynamic method for finding the installed location of `xclim`. (:pull:`2028`). +Bug fixes +^^^^^^^^^ +* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didn't correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`). + v0.54.0 (2024-12-16) -------------------- Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`). diff --git a/docs/installation.rst b/docs/installation.rst index d83d5d7fd..d045b99fa 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -160,4 +160,4 @@ To create a conda environment including `xclim`'s dependencies and several optio conda env create -n my_xclim_env python=3.10 --file=environment.yml conda activate my_xclim_env - (my_xclim_env) python -m pip install -e --no-deps . + (my_xclim_env) python -m pip install --no-deps -e . diff --git a/src/xclim/sdba/_adjustment.py b/src/xclim/sdba/_adjustment.py index 4398cec0e..291659e12 100644 --- a/src/xclim/sdba/_adjustment.py +++ b/src/xclim/sdba/_adjustment.py @@ -8,6 +8,7 @@ from __future__ import annotations +import warnings from collections.abc import Callable, Sequence import numpy as np @@ -587,6 +588,13 @@ def qdm_adjust(ds: xr.Dataset, *, group, interp, extrapolation, kind) -> xr.Data sim : Data to adjust. """ sim_q = group.apply(u.rank, ds.sim, main_only=True, pct=True) + if group.prop and interp != "nearest": + + msg = ( + f"Using a {interp} interpolation with QuantileDeltaMapping might create sudden jumps between different" + " groups. See discussion https://github.com/Ouranosinc/xclim/discussions/2048 for more information." + ) + warnings.warn(msg) af = u.interp_on_quantiles( sim_q, ds.quantiles, diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index ca5f93d02..0c2d3f626 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -179,7 +179,9 @@ def prop_name(self): """Create a significant name for the grouping.""" return "year" if self.prop == "group" else self.prop - def get_coordinate(self, ds: xr.Dataset | None = None) -> xr.DataArray: + def get_coordinate( + self, ds: xr.Dataset | xr.DataArray | None = None + ) -> xr.DataArray: """Return the coordinate as in the output of group.apply. Currently, only implemented for groupings with prop == `month` or `dayofyear`. @@ -293,21 +295,39 @@ def get_index( return da[self.dim].rename("group") ind = da.indexes[self.dim] - if self.prop == "week": - i = da[self.dim].copy(data=ind.isocalendar().week).astype(int) - elif self.prop == "season": - i = da[self.dim].copy(data=ind.month % 12 // 3) - else: - i = getattr(ind, self.prop) - if not np.issubdtype(i.dtype, np.integer): - raise ValueError( - f"Index {self.name} is not of type int (rather {i.dtype}), " - f"but {self.__class__.__name__} requires integer indexes." - ) + if interp and self.dim == "time": + if self.prop == "month": + i = ind.month - 0.5 + ind.day / ind.days_in_month - if interp and self.dim == "time" and self.prop == "month": - i = ind.month - 0.5 + ind.day / ind.days_in_month + elif self.prop == "season": + calendar = ind.calendar if hasattr(ind, "calendar") else "standard" + length_year = ( + 360 + if calendar == "360_day" + else 365 + (0 if calendar == "noleap" else ind.is_leap_year) + ) + # This is assuming that seasons have the same length. The factor 1/6 comes from the fact that + # the first season is shifted by 1 month the but the middle of the season is shifted in the other direction + # by half a month so -(1/12-1/24)*4 = -1/6 + i = ind.dayofyear / length_year * 4 - 1 / 6 + else: + raise ValueError( + f"Interpolation is not supported for {self.dim}.{self.prop}." + ) + else: + if self.prop == "week": + i = da[self.dim].copy(data=ind.isocalendar().week).astype(int) + elif self.prop == "season": + i = da[self.dim].copy(data=ind.month % 12 // 3) + else: + i = getattr(ind, self.prop) + + if not np.issubdtype(i.dtype, np.integer): + raise ValueError( + f"Index {self.name} is not of type int (rather {i.dtype}), " + f"but {self.__class__.__name__} requires integer indexes." + ) xi = xr.DataArray( i, diff --git a/src/xclim/sdba/nbutils.py b/src/xclim/sdba/nbutils.py index 28fad5647..e96c6e5cd 100644 --- a/src/xclim/sdba/nbutils.py +++ b/src/xclim/sdba/nbutils.py @@ -393,7 +393,7 @@ def _extrapolate_on_quantiles( Arguments are the same as _interp_on_quantiles_2D. """ bnds = _first_and_last_nonnull(oldx) - xp = np.arange(bnds.shape[0]) + xp = oldg[:, 0] toolow = newx < np.interp(newg, xp, bnds[:, 0]) toohigh = newx > np.interp(newg, xp, bnds[:, 1]) if method == "constant": diff --git a/src/xclim/sdba/utils.py b/src/xclim/sdba/utils.py index f8c2a6d2f..45a1c000b 100644 --- a/src/xclim/sdba/utils.py +++ b/src/xclim/sdba/utils.py @@ -219,6 +219,8 @@ def broadcast( sel.update({group.prop: group.get_index(x, interp=interp != "nearest")}) if sel: + if group.prop == "season": + grouped = grouped.assign_coords(season=map_season_to_int(grouped.season)) # Extract the correct mean factor for each time step. if interp == "nearest": # Interpolate both the time group and the quantile. grouped = grouped.sel(sel, method="nearest") @@ -473,11 +475,11 @@ def interp_on_quantiles( output_dtypes=[yq.dtype], ) return out - + prop_coords = group.get_coordinate(newx) if prop not in xq.dims: - xq = xq.expand_dims({prop: group.get_coordinate()}) + xq = xq.expand_dims({prop: prop_coords}) if prop not in yq.dims: - yq = yq.expand_dims({prop: group.get_coordinate()}) + yq = yq.expand_dims({prop: prop_coords}) # Adding the cyclic bounds fails for string coordinates like seasons # That's why we map the seasons to integers diff --git a/src/xclim/testing/helpers.py b/src/xclim/testing/helpers.py index 6b21fba59..7b1df4a90 100644 --- a/src/xclim/testing/helpers.py +++ b/src/xclim/testing/helpers.py @@ -190,6 +190,7 @@ def test_timeseries( freq: str = "D", as_dataset: bool = False, cftime: bool = False, + calendar: str | None = None, ) -> xr.DataArray | xr.Dataset: """ Create a generic timeseries object based on pre-defined dictionaries of existing variables. @@ -210,14 +211,18 @@ def test_timeseries( Whether to return a Dataset or a DataArray. Default is False. cftime : bool Whether to use cftime or not. Default is False. + calendar : str or None + Whether to use a calendar. If a calendar is provided, cftime is used. Returns ------- xr.DataArray or xr.Dataset A DataArray or Dataset with time, lon and lat dimensions. """ - if cftime: - coords = xr.cftime_range(start, periods=len(values), freq=freq) + if calendar or cftime: + coords = xr.cftime_range( + start, periods=len(values), freq=freq, calendar=calendar or "standard" + ) else: coords = pd.date_range(start, periods=len(values), freq=freq) diff --git a/tests/test_bootstrapping.py b/tests/test_bootstrapping.py index 484f7f163..fb98eaf34 100644 --- a/tests/test_bootstrapping.py +++ b/tests/test_bootstrapping.py @@ -23,30 +23,30 @@ class Test_bootstrap: @pytest.mark.slow @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize( - "var,p,index,freq, cftime", + "var,p,index,freq, calendar", ( - ["tas", 98, tg90p, "MS", False], - ["tasmin", 98, tn90p, "YS-JUL", False], - ["tasmax", 98, tx90p, "QS-APR", False], - ["tasmax", 98, tx90p, "QS-APR", True], - ["tasmin", 2, tn10p, "MS", False], - ["tasmax", 2, tx10p, "YS", False], - ["tasmax", 2, tx10p, "YS", True], - ["tas", 2, tg10p, "MS", False], - ["tasmax", 98, warm_spell_duration_index, "MS", False], - ["tasmin", 2, cold_spell_duration_index, "MS", False], - ["pr", 99, days_over_precip_thresh, "MS", False], - ["pr", 98, fraction_over_precip_thresh, "MS", False], - ["pr", 98, fraction_over_precip_thresh, "MS", True], + ["tas", 98, tg90p, "MS", None], + ["tasmin", 98, tn90p, "YS-JUL", None], + ["tasmax", 98, tx90p, "QS-APR", None], + ["tasmax", 98, tx90p, "QS-APR", "standard"], + ["tasmin", 2, tn10p, "MS", None], + ["tasmax", 2, tx10p, "YS", None], + ["tasmax", 2, tx10p, "YS", "standard"], + ["tas", 2, tg10p, "MS", None], + ["tasmax", 98, warm_spell_duration_index, "MS", None], + ["tasmin", 2, cold_spell_duration_index, "MS", None], + ["pr", 99, days_over_precip_thresh, "MS", None], + ["pr", 98, fraction_over_precip_thresh, "MS", None], + ["pr", 98, fraction_over_precip_thresh, "MS", "standard"], ), ) - def test_bootstrap(self, var, p, index, freq, cftime, use_dask, random): + def test_bootstrap(self, var, p, index, freq, calendar, use_dask, random): # -- GIVEN arr = self.ar1( alpha=0.8, n=int(4 * 365.25), random=random, positive_values=(var == "pr") ) climate_var = _test_timeseries( - arr, start="2000-01-01", variable=var, cftime=cftime + arr, start="2000-01-01", variable=var, calendar=calendar ) if use_dask: climate_var = climate_var.chunk(dict(time=50)) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 58b8d5857..7d40e75c9 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -482,13 +482,13 @@ def test_convert_doy(): ) -@pytest.mark.parametrize("cftime", [True, False]) +@pytest.mark.parametrize("calendar", ["standard", None]) @pytest.mark.parametrize( "w,s,m,f,ss", [(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)], ) -def test_stack_periods(tas_series, cftime, w, s, m, f, ss): - da = tas_series(np.arange(365 * 50), start="2000-01-01", cftime=cftime) +def test_stack_periods(tas_series, calendar, w, s, m, f, ss): + da = tas_series(np.arange(365 * 50), start="2000-01-01", calendar=calendar) da_stck = stack_periods( da, window=w, stride=s, min_length=m, freq=f, align_days=False @@ -502,7 +502,7 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss): def test_stack_periods_special(tas_series): - da = tas_series(np.arange(365 * 48 + 12), cftime=True, start="2004-01-01") + da = tas_series(np.arange(365 * 48 + 12), calendar="standard", start="2004-01-01") with pytest.raises(ValueError, match="unaligned day-of-year"): stack_periods(da) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 8ac4e262f..064227052 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -179,10 +179,10 @@ def test_resample_map_passthrough(tas_series): assert not uses_dask(out) -@pytest.mark.parametrize("cftime", [False, True]) -def test_make_hourly_temperature(tasmax_series, tasmin_series, cftime): - tasmax = tasmax_series(np.array([20]), units="degC", cftime=cftime) - tasmin = tasmin_series(np.array([0]), units="degC", cftime=cftime).expand_dims( +@pytest.mark.parametrize("calendar", [None, "standard"]) +def test_make_hourly_temperature(tasmax_series, tasmin_series, calendar): + tasmax = tasmax_series(np.array([20]), units="degC", calendar=calendar) + tasmin = tasmin_series(np.array([0]), units="degC", calendar=calendar).expand_dims( lat=[0] ) diff --git a/tests/test_sdba/test_base.py b/tests/test_sdba/test_base.py index 255ea061e..5619b5d47 100644 --- a/tests/test_sdba/test_base.py +++ b/tests/test_sdba/test_base.py @@ -50,11 +50,21 @@ def test_grouper_group(tas_series, group, window, nvals): @pytest.mark.parametrize( - "group,interp,val90", - [("time", False, True), ("time.month", False, 3), ("time.month", True, 3.5)], + "group,interp,val90,calendar", + [ + ("time", False, True, None), + ("time.month", False, 3, None), + ("time.month", True, 3.5, None), + ("time.season", False, 1, None), + ("time.season", True, 0.8278688524590164, None), + ("time.month", True, 3.533333333333333, "360_day"), + ("time.month", True, 3.533333333333333, "noleap"), + ("time.season", True, 0.8444444444444444, "360_day"), + ("time.season", True, 0.8305936073059361, "noleap"), + ], ) -def test_grouper_get_index(tas_series, group, interp, val90): - tas = tas_series(np.ones(366), start="2000-01-01") +def test_grouper_get_index(tas_series, group, interp, val90, calendar): + tas = tas_series(np.ones(366), start="2000-01-01", calendar=calendar) grouper = Grouper(group) indx = grouper.get_index(tas, interp=interp) # 90 is March 31st