pydata · dcherian · Jun 28, 2024 · Sep 20, 2024 · Sep 22, 2024 · Sep 22, 2024
diff --git a/doc/api.rst b/doc/api.rst
@@ -1139,6 +1139,8 @@ Grouper Objects
    groupers.BinGrouper
    groupers.UniqueGrouper
    groupers.TimeResampler
+   groupers.SeasonGrouper
+   groupers.SeasonResampler
 
 
 Rolling objects

diff --git a/properties/test_properties.py b/properties/test_properties.py
@@ -1,11 +1,15 @@
+import itertools
+
 import pytest
 
 pytest.importorskip("hypothesis")
 
+import hypothesis.strategies as st
 from hypothesis import given
 
 import xarray as xr
 import xarray.testing.strategies as xrst
+from xarray.groupers import season_to_month_tuple
 
 
 @given(attrs=xrst.simple_attrs)
@@ -15,3 +19,30 @@ def test_assert_identical(attrs):
 
     ds = xr.Dataset(attrs=attrs)
     xr.testing.assert_identical(ds, ds.copy(deep=True))
+
+
+@given(
+    roll=st.integers(min_value=0, max_value=12),
+    breaks=st.lists(
+        st.integers(min_value=0, max_value=11), min_size=1, max_size=12, unique=True
+    ),
+)
+def test_property_season_month_tuple(roll, breaks):
+    chars = list("JFMAMJJASOND")
+    months = tuple(range(1, 13))
+
+    rolled_chars = chars[roll:] + chars[:roll]
+    rolled_months = months[roll:] + months[:roll]
+    breaks = sorted(breaks)
+    if breaks[0] != 0:
+        breaks = [0] + breaks
+    if breaks[-1] != 12:
+        breaks = breaks + [12]
+    seasons = tuple(
+        "".join(rolled_chars[start:stop]) for start, stop in itertools.pairwise(breaks)
+    )
+    actual = season_to_month_tuple(seasons)
+    expected = tuple(
+        rolled_months[start:stop] for start, stop in itertools.pairwise(breaks)
+    )
+    assert expected == actual
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -6850,7 +6850,7 @@ def groupby(
 
         >>> da.groupby("letters")
         <DataArrayGroupBy, grouped over 1 grouper(s), 2 groups in total:
-            'letters': 2/2 groups present with labels 'a', 'b'>
+            'letters': UniqueGrouper('letters'), 2/2 groups with labels 'a', 'b'>
 
         Execute a reduction
 
@@ -6866,8 +6866,8 @@ def groupby(
 
         >>> da.groupby(["letters", "x"])
         <DataArrayGroupBy, grouped over 2 grouper(s), 8 groups in total:
-            'letters': 2/2 groups present with labels 'a', 'b'
-            'x': 4/4 groups present with labels 10, 20, 30, 40>
+            'letters': UniqueGrouper('letters'), 2/2 groups with labels 'a', 'b'
+            'x': UniqueGrouper('x'), 4/4 groups with labels 10, 20, 30, 40>
 
         Use Grouper objects to express more complicated GroupBy operations
 

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -10448,7 +10448,7 @@ def groupby(
 
         >>> ds.groupby("letters")
         <DatasetGroupBy, grouped over 1 grouper(s), 2 groups in total:
-            'letters': 2/2 groups present with labels 'a', 'b'>
+            'letters': UniqueGrouper('letters'), 2/2 groups with labels 'a', 'b'>
 
         Execute a reduction
 
@@ -10465,8 +10465,8 @@ def groupby(
 
         >>> ds.groupby(["letters", "x"])
         <DatasetGroupBy, grouped over 2 grouper(s), 8 groups in total:
-            'letters': 2/2 groups present with labels 'a', 'b'
-            'x': 4/4 groups present with labels 10, 20, 30, 40>
+            'letters': UniqueGrouper('letters'), 2/2 groups with labels 'a', 'b'
+            'x': UniqueGrouper('x'), 4/4 groups with labels 10, 20, 30, 40>
 
         Use Grouper objects to express more complicated GroupBy operations
 

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -253,6 +253,8 @@ def _ensure_1d(
     from xarray.core.dataarray import DataArray
 
     if isinstance(group, DataArray):
+        for dim in set(group.dims) - set(obj.dims):
+            obj = obj.expand_dims(dim)
         # try to stack the dims of the group into a single dim
         orig_dims = group.dims
         stacked_dim = "stacked_" + "_".join(map(str, orig_dims))
@@ -750,7 +752,10 @@ def __repr__(self) -> str:
         for grouper in self.groupers:
             coord = grouper.unique_coord
             labels = ", ".join(format_array_flat(coord, 30).split())
-            text += f"\n    {grouper.name!r}: {coord.size}/{grouper.full_index.size} groups present with labels {labels}"
+            text += (
+                f"\n    {grouper.name!r}: {type(grouper.grouper).__name__}({grouper.group.name!r}), "
+                f"{coord.size}/{grouper.full_index.size} groups with labels {labels}"
+            )
         return text + ">"
 
     def _iter_grouped(self) -> Iterator[T_Xarray]:
@@ -974,7 +979,7 @@ def _flox_reduce(
             parsed_dim_list = list()
             # preserve order
             for dim_ in itertools.chain(
-                *(grouper.group.dims for grouper in self.groupers)
+                *(grouper.codes.dims for grouper in self.groupers)
             ):
                 if dim_ not in parsed_dim_list:
                     parsed_dim_list.append(dim_)
@@ -988,7 +993,7 @@ def _flox_reduce(
         # Better to control it here than in flox.
         for grouper in self.groupers:
             if any(
-                d not in grouper.group.dims and d not in obj.dims for d in parsed_dim
+                d not in grouper.codes.dims and d not in obj.dims for d in parsed_dim
             ):
                 raise ValueError(f"cannot reduce over dimensions {dim}.")
 
@@ -1232,9 +1237,6 @@ def quantile(
            "Sample quantiles in statistical packages,"
            The American Statistician, 50(4), pp. 361-365, 1996
         """
-        if dim is None:
-            dim = (self._group_dim,)
-
         # Dataset.quantile does this, do it for flox to ensure same output.
         q = np.asarray(q, dtype=np.float64)
 
@@ -1253,7 +1255,7 @@ def quantile(
                 self._obj.__class__.quantile,
                 shortcut=False,
                 q=q,
-                dim=dim,
+                dim=dim or self._group_dim,
                 method=method,
                 keep_attrs=keep_attrs,
                 skipna=skipna,

diff --git a/xarray/core/toolzcompat.py b/xarray/core/toolzcompat.py
@@ -0,0 +1,56 @@
+# This file contains functions copied from the toolz library in accordance
+# with its license. The original copyright notice is duplicated below.
+
+# Copyright (c) 2013 Matthew Rocklin
+
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+
+#   a. Redistributions of source code must retain the above copyright notice,
+#      this list of conditions and the following disclaimer.
+#   b. Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#   c. Neither the name of toolz nor the names of its contributors
+#      may be used to endorse or promote products derived from this software
+#      without specific prior written permission.
+
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+# DAMAGE.
+
+
+def sliding_window(n, seq):
+    """A sequence of overlapping subsequences
+
+    >>> list(sliding_window(2, [1, 2, 3, 4]))
+    [(1, 2), (2, 3), (3, 4)]
+
+    This function creates a sliding window suitable for transformations like
+    sliding means / smoothing
+
+    >>> mean = lambda seq: float(sum(seq)) / len(seq)
+    >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
+    [1.5, 2.5, 3.5]
+    """
+    import collections
+    import itertools
+
+    return zip(
+        *(
+            collections.deque(itertools.islice(it, i), 0) or it
+            for i, it in enumerate(itertools.tee(seq, n))
+        ),
+        strict=False,
+    )