Skip to content

Commit

Permalink
Add biasadjust tests (#366)
Browse files Browse the repository at this point in the history
<!-- Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [ ] This PR addresses an already opened issue (for bug fixes /
features)
    - This PR fixes #xyz
- [ ] (If applicable) Documentation has been added / updated (for bug
fixes / features).
- [x] (If applicable) Tests have been added.
- [x] This PR does not seem to break the templates.
- [x] CHANGES.rst has been updated (with summary of main changes).
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added.

### What kind of change does this PR introduce?

* add bias adjust tests
* fixed a few bugs:
* Removed train from the inside the period for loop. Only need to do it
once and bugged with group if inside loop.
* fix `_add_preprocessing_attr` that was not actually modifying the
attrs
  * Change way of defining default group

### Does this PR introduce a breaking change?
* `adjust` won't fail if dict group and many periods.
* `preprocessing_attrs` will actually be added
* If pass `group=False`, it will actually be False.

### Other information:

1. Is it ok to remove the TODOs ? I'm not sure exactly what they were
referring to, I don't think they are relevant anymore.
2. Should we remove `moving_yearly_window` I am not actually using the
args for info-crue cmip6 and the function has changed name in xclim.
  • Loading branch information
juliettelavoie authored Mar 15, 2024
2 parents 7ca7f56 + db84f19 commit 57f5d85
Show file tree
Hide file tree
Showing 3 changed files with 343 additions and 37 deletions.
7 changes: 6 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@ Changelog

v0.9.0 (unreleased)
-------------------
Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`).
Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user: `juliettelavoie`).

Internal changes
^^^^^^^^^^^^^^^^
* Updated the `cookiecutter` template to the latest version. (:pull:`358`):
* Addresses a handful of misconfigurations in the GitHub Workflows.
* Added a few free `grep`-based hooks for finding unwanted artifacts in the code base.
* Updated `ruff` to v0.2.0 and `black` to v24.2.0.
* Added tests for biasadjust. (:pull:`366`).

Bug fixes
^^^^^^^^^
* Fix ``unstack_dates`` for the new frequency syntax introduced by pandas v2.2. (:pull:`359`).
* ``subset_warming_level`` will not return partial subsets if the warming level is reached at the end of the timeseries. (:issue:`360`, :pull:`359`).
* Loading of training in `adjust` is now done outside of the periods loop. (:pull:`366`).
* Fixed bug for adding the preprocessing attributes inside the `adjust` function. (:pull:`366`).
* Fixed a bug to accept `group = False` in `adjust` function. (:pull:`366`).


v0.8.3 (2024-02-28)
-------------------
Expand Down
319 changes: 319 additions & 0 deletions tests/test_biasadjust.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
import numpy as np
import pytest
import xarray as xr
import xclim as xc
from conftest import notebooks
from xclim.testing.helpers import test_timeseries as timeseries

import xscen as xs

xc.set_options(
sdba_encode_cf=False
) # FIXME: A temporary bug fix waiting for xclim 0.49


class TestTrain:
dref = timeseries(
np.ones(365 * 3), variable="tas", start="2001-01-01", freq="D", as_dataset=True
)

dhist = timeseries(
np.concatenate([np.ones(365 * 2) * 2, np.ones(365) * 3]),
variable="tas",
start="2001-01-01",
freq="D",
as_dataset=True,
)
dhist.attrs["cat:xrfreq"] = "D"
dhist.attrs["cat:domain"] = "one_point"
dhist.attrs["cat:id"] = "fake_id"

@pytest.mark.parametrize(
"var, period",
[("tas", ["2001", "2002"]), (["tas"], ["2003", "2003"])],
)
def test_basic_train(self, var, period):
out = xs.train(self.dref, self.dhist, var=var, period=period)

assert out.attrs["cat:xrfreq"] == "D"
assert out.attrs["cat:domain"] == "one_point"
assert out.attrs["cat:id"] == "fake_id"
assert out.attrs["cat:processing_level"] == "training_tas"

assert "dayofyear" in out
assert "quantiles" in out
result = [-1] * 365 if period == ["2001", "2002"] else [-2] * 365
np.testing.assert_array_equal(out["scaling"], result)

def test_preprocess(self):

dref360 = xc.core.calendar.convert_calendar(
self.dref, "360_day", align_on="year"
)

out = xs.train(
dref360,
self.dhist,
var="tas",
period=["2001", "2002"],
adapt_freq={"thresh": "2 K"},
jitter_over={"upper_bnd": "3 K", "thresh": "2 K"},
jitter_under={"thresh": "2 K"},
)

assert out.attrs["train_params"] == {
"maximal_calendar": "noleap",
"adapt_freq": {"thresh": "2 K"},
"jitter_over": {"upper_bnd": "3 K", "thresh": "2 K"},
"jitter_under": {"thresh": "2 K"},
"var": ["tas"],
}

assert "pth" in out
assert "dP0" in out
assert "dayofyear" in out
assert "quantiles" in out

def test_group(self):
out = xs.train(
self.dref,
self.dhist,
var="tas",
period=["2001", "2002"],
group={"group": "time.month", "window": 1},
)

out1 = xs.train(
self.dref,
self.dhist,
var="tas",
period=["2001", "2002"],
group="time.month",
)

assert "month" in out
assert "quantiles" in out
assert out1.equals(out)

def test_errors(self):
with pytest.raises(ValueError):
xs.train(self.dref, self.dhist, var=["tas", "pr"], period=["2001", "2002"])


class TestAdjust:
dref = timeseries(
np.ones((365 * 3) + 1), # leap year
variable="tas",
start="2001-01-01",
freq="D",
as_dataset=True,
)

dsim = timeseries(
np.concatenate([np.ones(365 * 3) * 2, np.ones((365 * 3) + 1) * 4]),
variable="tas",
start="2001-01-01",
freq="D",
as_dataset=True,
)
dsim.attrs["cat:xrfreq"] = "D"
dsim.attrs["cat:domain"] = "one_point"
dsim.attrs["cat:id"] = "fake_id"

@pytest.mark.parametrize(
"periods, to_level, bias_adjust_institution, bias_adjust_project",
[
(["2001", "2006"], None, None, None),
([["2001", "2001"], ["2006", "2006"]], "test", "i", "p"),
],
)
def test_basic(
self, periods, to_level, bias_adjust_institution, bias_adjust_project
):
dtrain = xs.train(
self.dref,
self.dsim.sel(time=slice("2001", "2003")),
var="tas",
period=["2001", "2003"],
)

out = xs.adjust(
dtrain,
self.dsim,
periods=periods,
to_level=to_level,
bias_adjust_institution=bias_adjust_institution,
bias_adjust_project=bias_adjust_project,
)
assert out.attrs["cat:processing_level"] == to_level or "biasadjusted"
assert out.attrs["cat:variable"] == ("tas",)
assert out.attrs["cat:id"] == "fake_id"
assert (
out["tas"].attrs["bias_adjustment"]
== "DetrendedQuantileMapping(group=Grouper("
"name='time.dayofyear', window=31), kind='+'"
").adjust(sim, )"
)
assert xc.core.calendar.get_calendar(out) == "noleap"

if bias_adjust_institution is not None:
assert out.attrs["cat:bias_adjust_institution"] == "i"
if bias_adjust_project is not None:
assert out.attrs["cat:bias_adjust_project"] == "p"

assert out.time.dt.year.values[0] == 2001
assert out.time.dt.year.values[-1] == 2006

if periods == ["2001", "2006"]:
np.testing.assert_array_equal(
out["tas"].values,
np.concatenate(
[np.ones(365 * 3) * 1, np.ones(365 * 3) * 3]
), # -1 for leap year
)
else: # periods==[['2001','2001'], ['2006','2006']]
np.testing.assert_array_equal(
out["tas"].values,
np.concatenate([np.ones(365 * 1) * 1, np.ones(365 * 1) * 3]),
)

def test_write_train(self):
dtrain = xs.train(
self.dref,
self.dsim.sel(time=slice("2001", "2003")),
var="tas",
period=["2001", "2003"],
adapt_freq={"thresh": "2 K"},
jitter_over={"upper_bnd": "3 K", "thresh": "2 K"},
jitter_under={"thresh": "2 K"},
)

root = str(notebooks / "_data")
xs.save_to_zarr(dtrain, f"{root}/test.zarr", mode="o")
dtrain2 = xr.open_dataset(
f"{root}/test.zarr", chunks={"dayofyear": 365, "quantiles": 15}
)

out = xs.adjust(
dtrain,
self.dsim,
periods=["2001", "2006"],
xclim_adjust_args={
"detrend": {
"LoessDetrend": {"f": 0.2, "niter": 1, "d": 0, "weights": "tricube"}
}
},
)

out2 = xs.adjust(
dtrain2,
self.dsim,
periods=["2001", "2006"],
xclim_adjust_args={
"detrend": {
"LoessDetrend": {"f": 0.2, "niter": 1, "d": 0, "weights": "tricube"}
}
},
)

assert (
out.tas.attrs["bias_adjustment"]
== "DetrendedQuantileMapping(group=Grouper(name='time.dayofyear',"
" window=31), kind='+').adjust(sim, detrend=<LoessDetrend>),"
" ref and hist were prepared with jitter_under_thresh(ref, hist,"
" {'thresh': '2 K'}) and jitter_over_thresh(ref, hist, {'upper_bnd':"
" '3 K', 'thresh': '2 K'}) and adapt_freq(ref, hist, {'thresh': '2 K'})"
)

assert (
out2.tas.attrs["bias_adjustment"]
== "DetrendedQuantileMapping(group=Grouper(name='time.dayofyear',"
" window=31), kind='+').adjust(sim, detrend=<LoessDetrend>), ref and"
" hist were prepared with jitter_under_thresh(ref, hist, {'thresh':"
" '2 K'}) and jitter_over_thresh(ref, hist, {'upper_bnd': '3 K',"
" 'thresh': '2 K'}) and adapt_freq(ref, hist, {'thresh': '2 K'})"
)

assert out.equals(out2)

def test_xclim_vs_xscen(
self,
): # should give the same results using xscen and xclim
dref = (
timeseries(
np.random.randint(0, high=10, size=(365 * 3) + 1),
variable="pr",
start="2001-01-01",
freq="D",
as_dataset=True,
)
.astype("float32")
.chunk({"time": -1})
)

dsim = (
timeseries(
np.random.randint(0, high=10, size=365 * 6 + 1),
variable="pr",
start="2001-01-01",
freq="D",
as_dataset=True,
)
.astype("float32")
.chunk({"time": -1})
)
dhist = dsim.sel(time=slice("2001", "2003")).chunk({"time": -1})

# xscen version
dtrain_xscen = xs.train(
dref,
dhist,
var="pr",
period=["2001", "2003"],
adapt_freq={"thresh": "1 mm d-1"},
xclim_train_args={"kind": "*", "nquantiles": 50},
)

out_xscen = xs.adjust(
dtrain_xscen,
dsim,
periods=["2001", "2006"],
xclim_adjust_args={
"detrend": {
"LoessDetrend": {"f": 0.2, "niter": 1, "d": 0, "weights": "tricube"}
},
"interp": "nearest",
"extrapolation": "constant",
},
)

# xclim version
with xc.set_options(sdba_extra_output=True):
group = xc.sdba.Grouper(group="time.dayofyear", window=31)

drefx = xc.core.calendar.convert_calendar(
dref.sel(time=slice("2001", "2003")), "noleap"
)
dhistx = xc.core.calendar.convert_calendar(
dhist.sel(time=slice("2001", "2003")), "noleap"
)
dsimx = xc.core.calendar.convert_calendar(
dsim.sel(time=slice("2001", "2006")), "noleap"
)

dhist_ad, pth, dP0 = xc.sdba.processing.adapt_freq(
drefx["pr"], dhistx["pr"], group=group, thresh="1 mm d-1"
)

QM = xc.sdba.DetrendedQuantileMapping.train(
drefx["pr"], dhist_ad, group=group, kind="*", nquantiles=50
)

detrend = xc.sdba.detrending.LoessDetrend(
f=0.2, niter=1, d=0, weights="tricube", group=group, kind="*"
)
out_xclim = QM.adjust(
dsimx["pr"], detrend=detrend, interp="nearest", extrapolation="constant"
).rename({"scen": "pr"})

assert out_xscen.equals(out_xclim)
Loading

0 comments on commit 57f5d85

Please sign in to comment.