From 38c3e7da0ca826bf80b791e35126268123b6a61b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 4 Mar 2024 12:04:18 -0500 Subject: [PATCH 1/8] Fix wl_bounds and unstack_dates --- xscen/extract.py | 8 ++++---- xscen/utils.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xscen/extract.py b/xscen/extract.py index 0a29e6ad..1c384e39 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -1107,7 +1107,7 @@ def _get_warming_level(model): if isinstance(realization, xr.DataArray): if return_horizon: return xr.DataArray( - out, dims=(realization.dims[0], "bounds"), coords=realization.coords + out, dims=(realization.dims[0], "wl_bounds"), coords=realization.coords ) return xr.DataArray(out, dims=(realization.dims[0],), coords=realization.coords) @@ -1251,7 +1251,7 @@ def subset_warming_level( data.expand_dims(warminglevel=wl_crd).assign_coords( time=fake_time[: data.time.size], warminglevel_bounds=( - ("realization", "warminglevel", "bounds"), + ("realization", "warminglevel", "wl_bounds"), [[bnds_crd]], ), ) @@ -1271,10 +1271,10 @@ def subset_warming_level( # WL not reached or not completely inside ds time if start_yr is None or ds_wl.time.size == 0: ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.NaN - wlbnds = (("warminglevel", "bounds"), [[np.NaN, np.NaN]]) + wlbnds = (("warminglevel", "wl_bounds"), [[np.NaN, np.NaN]]) else: wlbnds = ( - ("warminglevel", "bounds"), + ("warminglevel", "wl_bounds"), [ [ date_cls(int(start_yr), 1, 1), diff --git a/xscen/utils.py b/xscen/utils.py index c629f295..88c3d866 100644 --- a/xscen/utils.py +++ b/xscen/utils.py @@ -1082,7 +1082,7 @@ def unstack_dates( ) # Fast track for annual - if base == "A": + if base in "YA": if seasons: seaname = seasons[first.month] elif anchor == "JAN": From 0759bff96f2ce1ed25f684c2dcfbcb831a040f61 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 7 Mar 2024 09:47:28 -0500 Subject: [PATCH 2/8] better check for periods --- tests/test_extract.py | 19 +++++++++++++------ xscen/extract.py | 8 ++++---- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 1eb1b87e..8db10fe7 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -405,7 +405,7 @@ class TestSubsetWarmingLevel: np.tile(np.arange(1, 2), 50), variable="tas", start="2000-01-01", - freq="AS-JAN", + freq="YS-JAN", as_dataset=True, ).assign_attrs( { @@ -440,11 +440,18 @@ def test_kwargs(self): assert ds_sub.warminglevel.attrs["baseline"] == "1981-2010" assert ds_sub.attrs["cat:processing_level"] == "tests" - def test_outofrange(self): - assert xs.subset_warming_level(self.ds, wl=5) is None - - def test_none(self): - assert xs.subset_warming_level(self.ds, wl=20) is None + @pytest.mark.parametrize("wl", [3.5, 5, 20, [2, 3.5, 5, 20], [3.5, 5, 20]]) + def test_outofrange(self, wl): + # 3.5 is only partially covered by ds, 5 is out of range but within the csv, 20 is fully out of range + if not isinstance(wl, list): + assert xs.subset_warming_level(self.ds, wl=wl) is None + else: + ds = xs.subset_warming_level(self.ds, wl=wl) + assert ds.warminglevel.size == len(wl) + if len(wl) == 3: + np.testing.assert_array_equal(ds.tas.isnull().all(), [True]) + else: + np.testing.assert_array_equal(ds.tas.isnull().all(dim="time"), [False, True, True, True]) def test_multireals(self): ds = self.ds.expand_dims( diff --git a/xscen/extract.py b/xscen/extract.py index 1c384e39..d378e43e 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -1263,13 +1263,13 @@ def subset_warming_level( # cut the window selected above and expand dims with wl_crd ds_wl = ds.sel(time=slice(start_yr, end_yr)) if fake_time is None: - # WL not reached or completely outside ds time - if start_yr is None or ds_wl.time.size == 0: + # WL not reached or not fully contained in ds.time + if (start_yr is None) or (ds_wl.time.size == 0) or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window): return None ds_wl = ds_wl.expand_dims(warminglevel=wl_crd) else: - # WL not reached or not completely inside ds time - if start_yr is None or ds_wl.time.size == 0: + # WL not reached or not fully contained in ds.time + if (start_yr is None) or (ds_wl.time.size == 0) or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window): ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.NaN wlbnds = (("warminglevel", "wl_bounds"), [[np.NaN, np.NaN]]) else: From 428fcf05780b01158b38cb563be14e448311fcf7 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 7 Mar 2024 09:49:50 -0500 Subject: [PATCH 3/8] better test for period coverage --- tests/test_extract.py | 1 + xscen/extract.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 8db10fe7..37f91166 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -453,6 +453,7 @@ def test_outofrange(self, wl): else: np.testing.assert_array_equal(ds.tas.isnull().all(dim="time"), [False, True, True, True]) + def test_multireals(self): ds = self.ds.expand_dims( realization=[ diff --git a/xscen/extract.py b/xscen/extract.py index d378e43e..e7d15ad5 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -1263,12 +1263,12 @@ def subset_warming_level( # cut the window selected above and expand dims with wl_crd ds_wl = ds.sel(time=slice(start_yr, end_yr)) if fake_time is None: - # WL not reached or not fully contained in ds.time + # WL not reached, not in ds, or not fully contained in ds.time if (start_yr is None) or (ds_wl.time.size == 0) or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window): return None ds_wl = ds_wl.expand_dims(warminglevel=wl_crd) else: - # WL not reached or not fully contained in ds.time + # WL not reached, not in ds, or not fully contained in ds.time if (start_yr is None) or (ds_wl.time.size == 0) or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window): ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.NaN wlbnds = (("warminglevel", "wl_bounds"), [[np.NaN, np.NaN]]) From d012eae3bf888f97b7a5a14e7c8f7f8dd22d66c3 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 7 Mar 2024 09:56:36 -0500 Subject: [PATCH 4/8] precommit hook --- tests/test_extract.py | 5 +++-- xscen/extract.py | 12 ++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 37f91166..fce18a1b 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -451,8 +451,9 @@ def test_outofrange(self, wl): if len(wl) == 3: np.testing.assert_array_equal(ds.tas.isnull().all(), [True]) else: - np.testing.assert_array_equal(ds.tas.isnull().all(dim="time"), [False, True, True, True]) - + np.testing.assert_array_equal( + ds.tas.isnull().all(dim="time"), [False, True, True, True] + ) def test_multireals(self): ds = self.ds.expand_dims( diff --git a/xscen/extract.py b/xscen/extract.py index e7d15ad5..a0524161 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -1264,12 +1264,20 @@ def subset_warming_level( ds_wl = ds.sel(time=slice(start_yr, end_yr)) if fake_time is None: # WL not reached, not in ds, or not fully contained in ds.time - if (start_yr is None) or (ds_wl.time.size == 0) or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window): + if ( + (start_yr is None) + or (ds_wl.time.size == 0) + or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window) + ): return None ds_wl = ds_wl.expand_dims(warminglevel=wl_crd) else: # WL not reached, not in ds, or not fully contained in ds.time - if (start_yr is None) or (ds_wl.time.size == 0) or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window): + if ( + (start_yr is None) + or (ds_wl.time.size == 0) + or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window) + ): ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.NaN wlbnds = (("warminglevel", "wl_bounds"), [[np.NaN, np.NaN]]) else: From e2b16c957b75018ed1f1f3e22cad6a2a70d06ef1 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 7 Mar 2024 10:14:08 -0500 Subject: [PATCH 5/8] fix tests --- tests/test_extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index fce18a1b..5f7ad21b 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -484,7 +484,7 @@ def test_multilevels(self): ds_sub.warminglevel, ["+1Cvs1850-1900", "+2Cvs1850-1900", "+3Cvs1850-1900", "+20Cvs1850-1900"], ) - np.testing.assert_array_equal(ds_sub.tas.isnull().sum("time"), [10, 0, 1, 20]) + np.testing.assert_array_equal(ds_sub.tas.isnull().sum("time"), [20, 0, 20, 20]) class TestResample: From cd0813d61648cef457d9d09034e6b7ca6aefe269 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 7 Mar 2024 10:38:02 -0500 Subject: [PATCH 6/8] wl not reached for vectorized subsetting - try to lower duplication - fix to doc --- xscen/extract.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/xscen/extract.py b/xscen/extract.py index a0524161..0d703365 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -983,7 +983,7 @@ def get_warming_level( # noqa: C901 ------- dict, list or str If `realization` is not a sequence, the output will follow the format indicated by `return_horizon`. - If `realization` is a sequence, the output will be a list or dictionary depending on `output`, + If `realization` is a sequence, the output will be of the same type, with values following the format indicated by `return_horizon`. """ tas_src = tas_src or Path(__file__).parent / "data" / "IPCC_annual_global_tas.nc" @@ -1235,13 +1235,20 @@ def subset_warming_level( reals = [] for real in bounds.realization.values: start, end = bounds.sel(realization=real).values - if start is not None: - data = ds.sel(realization=[real], time=slice(start, end)) + data = ds.sel(realization=[real], time=slice(start, end)) + wl_not_reached = ( + (start is None) + or (data.time.size == 0) + or ((data.time.dt.year[-1] - data.time.dt.year[0] + 1) != window) + ) + if not wl_not_reached: bnds_crd = [ date_cls(int(start), 1, 1), date_cls(int(end) + 1, 1, 1) - datetime.timedelta(seconds=1), ] else: + # In the case of not reaching the WL, data might be too short + # We create it again with the proper length data = ( ds.sel(realization=[real]).isel(time=slice(0, fake_time.size)) * np.NaN @@ -1262,22 +1269,19 @@ def subset_warming_level( start_yr, end_yr = get_warming_level(ds, wl=wl, return_horizon=True, **kwargs) # cut the window selected above and expand dims with wl_crd ds_wl = ds.sel(time=slice(start_yr, end_yr)) + wl_not_reached = ( + (start_yr is None) + or (ds_wl.time.size == 0) + or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window) + ) if fake_time is None: # WL not reached, not in ds, or not fully contained in ds.time - if ( - (start_yr is None) - or (ds_wl.time.size == 0) - or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window) - ): + if wl_not_reached: return None ds_wl = ds_wl.expand_dims(warminglevel=wl_crd) else: # WL not reached, not in ds, or not fully contained in ds.time - if ( - (start_yr is None) - or (ds_wl.time.size == 0) - or ((ds_wl.time.dt.year[-1] - ds_wl.time.dt.year[0] + 1) != window) - ): + if wl_not_reached: ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.NaN wlbnds = (("warminglevel", "wl_bounds"), [[np.NaN, np.NaN]]) else: From fdb5058498944b49559807cea289f0afc0b37093 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 7 Mar 2024 10:39:41 -0500 Subject: [PATCH 7/8] add doc mention of new criterion --- xscen/extract.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xscen/extract.py b/xscen/extract.py index 0d703365..85a64465 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -1127,6 +1127,7 @@ def subset_warming_level( r""" Subsets the input dataset with only the window of time over which the requested level of global warming is first reached, using the IPCC Atlas method. + A warming level is considered reached only if the full `window` years are available in the dataset. Parameters ---------- From d70a41d4d1a0b6943aabce5d1e79c568339d5506 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 7 Mar 2024 10:52:06 -0500 Subject: [PATCH 8/8] Fix test --- CHANGES.rst | 7 ++++++- tests/test_extract.py | 4 ++-- xscen/extract.py | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 22952443..e79b6373 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changelog v0.9.0 (unreleased) ------------------- -Contributors to this version: Trevor James Smith (:user:`Zeitsperre`). +Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`). Internal changes ^^^^^^^^^^^^^^^^ @@ -13,6 +13,11 @@ Internal changes * Added a few free `grep`-based hooks for finding unwanted artifacts in the code base. * Updated `ruff` to v0.2.0 and `black` to v24.2.0. +Bug fixes +^^^^^^^^^ +* Fix ``unstack_dates`` for the new frequency syntax introduced by pandas v2.2. (:pull:`359`). +* ``subset_warming_level`` will not return partial subsets if the warming level is reached at the end of the timeseries. (:issue:`360`, :pull:`359`). + v0.8.3 (2024-02-28) ------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Pascal Bourgault (:user:`aulemahal`). diff --git a/tests/test_extract.py b/tests/test_extract.py index 5f7ad21b..fc8c0569 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -465,12 +465,12 @@ def test_multireals(self): ) ds_sub = xs.subset_warming_level( ds, - wl=1, + wl=1.5, to_level="tests", ) np.testing.assert_array_equal(ds_sub.time.dt.year, np.arange(1000, 1020)) np.testing.assert_array_equal( - ds_sub.warminglevel_bounds[:2].dt.year, [[[1990, 2009]], [[1990, 2009]]] + ds_sub.warminglevel_bounds[:2].dt.year, [[[2004, 2023]], [[2004, 2023]]] ) assert ds_sub.warminglevel_bounds[2].isnull().all() diff --git a/xscen/extract.py b/xscen/extract.py index 85a64465..8f4fc4ca 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -1252,9 +1252,9 @@ def subset_warming_level( # We create it again with the proper length data = ( ds.sel(realization=[real]).isel(time=slice(0, fake_time.size)) - * np.NaN + * np.nan ) - bnds_crd = [np.NaN, np.NaN] + bnds_crd = [np.nan, np.nan] reals.append( data.expand_dims(warminglevel=wl_crd).assign_coords( time=fake_time[: data.time.size],