Merge branch 'main' of github.com:scitools/iris into cache-dask-arrays

bouweandela · Feb 12, 2025 · fc118e3 · fc118e3
2 parents 0533c05 + ba9a6e2
commit fc118e3
Show file tree

Hide file tree

Showing 80 changed files with 1,923 additions and 1,109 deletions.
diff --git a/.github/workflows/ci-manifest.yml b/.github/workflows/ci-manifest.yml
@@ -23,4 +23,4 @@ concurrency:
 jobs:
   manifest:
     name: "check-manifest"
-    uses: scitools/workflows/.github/workflows/ci-manifest.yml@2024.12.0
+    uses: scitools/workflows/.github/workflows/ci-manifest.yml@2025.02.0
diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml
@@ -14,5 +14,5 @@ on:
 
 jobs:
   refresh_lockfiles:
-    uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2024.12.0
+    uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2025.02.0
     secrets: inherit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
     -   id: no-commit-to-branch
 
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.8.2"
+    rev: "v0.9.6"
     hooks:
     -   id: ruff
         types: [file, python]
@@ -38,7 +38,7 @@ repos:
         types: [file, python]
 
 -   repo: https://github.com/codespell-project/codespell
-    rev: "v2.3.0"
+    rev: "v2.4.1"
     hooks:
     -   id: codespell
         types_or: [asciidoc, python, markdown, rst]
@@ -63,7 +63,7 @@ repos:
         types: [file, python]
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.13.0'
+    rev: 'v1.15.0'
     hooks:
     -   id: mypy
         additional_dependencies:

diff --git a/docs/src/community/iris_xarray.rst b/docs/src/community/iris_xarray.rst
@@ -141,7 +141,7 @@ output is not fully CF compliant (as-per `the cf checker <https://cfchecker.ncas
   approach in Iris, and means that the use of the "coordinates" attribute in output is
   often not CF compliant.
 * dates are converted to datetime-like objects internally.  There are special features
-  providing `support for  non-standard calendars <https://docs.xarray.dev/en/stable/user-guide/weather-climate.html#non-standard-calendars-and-dates-outside-the-nanosecond-precision-range>`_,
+  providing `support for  non-standard calendars <https://docs.xarray.dev/en/stable/user-guide/weather-climate.html#non-standard-calendars-and-dates-outside-the-precision-range>`_,
   however date units may not always be saved correctly.
 * CF-style coordinate bounds variables are not fully understood.  The CF approach
   where bounds variables do not usually define their units or standard_names can cause

diff --git a/docs/src/conf.py b/docs/src/conf.py
@@ -141,7 +141,7 @@ def _dotv(version):
 .. |python_version| replace:: {build_python_version}
 .. |python_support| replace:: {python_support}
 .. |iris_version| replace:: v{version}
-.. |build_date| replace:: ({datetime.datetime.now().strftime('%d %b %Y')})
+.. |build_date| replace:: ({datetime.datetime.now().strftime("%d %b %Y")})
 """
 
 # Add any Sphinx extension module names here, as strings. They can be

diff --git a/docs/src/further_topics/ugrid/other_meshes.rst b/docs/src/further_topics/ugrid/other_meshes.rst
@@ -360,5 +360,5 @@ dimensions into a single mesh dimension.  Since Iris cubes don't support a "resh
 
 
 .. _WAVEWATCH III: https://github.com/NOAA-EMC/WW3
-.. _FESOM 1.4: https://www.fesom.de/models/fesom14/
+.. _FESOM 1.4: https://fesom.de/models/fesom14/
 .. _NEMO: https://www.nemo-ocean.eu/
diff --git a/docs/src/index.rst b/docs/src/index.rst
@@ -125,8 +125,7 @@ For more information see :ref:`why_iris`.
                 Voted Issues
 
 
-Icons made by `FreePik <https://www.freepik.com>`_ from
-`Flaticon <https://www.flaticon.com/>`_
+Icons made by FreePik from `Flaticon <https://www.flaticon.com/>`_
 
 
 .. _iris_support:

diff --git a/docs/src/userguide/interpolation_and_regridding.rst b/docs/src/userguide/interpolation_and_regridding.rst
@@ -29,9 +29,9 @@ The following are the regridding schemes that are currently available in Iris:
 * point in cell regridding (:class:`iris.analysis.PointInCell`) and
 * area-weighted regridding (:class:`iris.analysis.AreaWeighted`, first-order conservative).
 
-The linear, nearest-neighbor, and area-weighted regridding schemes support
-lazy regridding, i.e. if the source cube has lazy data, the resulting cube
-will also have lazy data.
+The linear and nearest-neighbour interpolation schemes, and the linear, nearest-neighbour,
+and area-weighted regridding schemes support lazy regridding, i.e. if the source cube has lazy data,
+the resulting cube will also have lazy data.
 See :doc:`real_and_lazy_data` for an introduction to lazy data.
 See :doc:`../further_topics/which_regridder_to_use` for a more in depth overview of the different regridders.
 
@@ -194,46 +194,6 @@ For example, to mask values that lie beyond the range of the original data:
    [-- 494.44451904296875 588.888916015625 683.333251953125 777.77783203125
     872.2222290039062 966.666748046875 1061.111083984375 1155.555419921875 --]
 
-
-.. _caching_an_interpolator:
-
-Caching an Interpolator
-^^^^^^^^^^^^^^^^^^^^^^^
-
-If you need to interpolate a cube on multiple sets of sample points you can
-'cache' an interpolator to be used for each of these interpolations. This can
-shorten the execution time of your code as the most computationally
-intensive part of an interpolation is setting up the interpolator.
-
-To cache an interpolator you must set up an interpolator scheme and call the
-scheme's interpolator method. The interpolator method takes as arguments:
-
-#. a cube to be interpolated, and
-#. an iterable of coordinate names or coordinate instances of the coordinates that are to be interpolated over.
-
-For example:
-
-    >>> air_temp = iris.load_cube(iris.sample_data_path('air_temp.pp'))
-    >>> interpolator = iris.analysis.Nearest().interpolator(air_temp, ['latitude', 'longitude'])
-
-When this cached interpolator is called you must pass it an iterable of sample points
-that have the same form as the iterable of coordinates passed to the constructor.
-So, to use the cached interpolator defined above:
-
-    >>> latitudes = np.linspace(48, 60, 13)
-    >>> longitudes = np.linspace(-11, 2, 14)
-    >>> for lat, lon in zip(latitudes, longitudes):
-    ...     result = interpolator([lat, lon])
-
-In each case ``result`` will be a cube interpolated from the ``air_temp`` cube we
-passed to interpolator.
-
-Note that you must specify the required extrapolation mode when setting up the cached interpolator.
-For example::
-
-    >>> interpolator = iris.analysis.Nearest(extrapolation_mode='nan').interpolator(cube, coords)
-
-
 .. _regridding:
 
 Regridding
@@ -417,24 +377,24 @@ In each case ``result`` will be the input cube regridded to the grid defined by
 the target grid cube (in this case ``rotated_psl``) that we used to define the
 cached regridder.
 
-Regridding Lazy Data
-^^^^^^^^^^^^^^^^^^^^
+Interpolating and Regridding Lazy Data
+--------------------------------------
 
-If you are working with large cubes, especially when you are regridding to a
-high resolution target grid, you may run out of memory when trying to
-regrid a cube. When this happens, make sure the input cube has lazy data
+If you are working with large cubes, you may run out of memory when trying to
+interpolate or regrid a cube. For instance, this might happen when regridding to a
+high resolution target grid. When this happens, make sure the input cube has lazy data
 
     >>> air_temp = iris.load_cube(iris.sample_data_path('A1B_north_america.nc'))
     >>> air_temp
     <iris 'Cube' of air_temperature / (K) (time: 240; latitude: 37; longitude: 49)>
     >>> air_temp.has_lazy_data()
     True
 
-and the regridding scheme supports lazy data. All regridding schemes described
-here support lazy data. If you still run out of memory even while using lazy
-data, inspect the
-`chunks <https://docs.dask.org/en/latest/array-chunks.html>`__
-:
+and the interpolation or regridding scheme supports lazy data. All interpolation and
+regridding schemes described here with exception of :class:`iris.analysis.PointInCell`
+(point-in-cell regridder) and :class:`iris.analysis.UnstructuredNearest` (nearest-neighbour
+regridder) support lazy data. If you still run out of memory even while using lazy data,
+inspect the `chunks <https://docs.dask.org/en/latest/array-chunks.html>`__ :
 
     >>> air_temp.lazy_data().chunks
     ((240,), (37,), (49,))
@@ -455,6 +415,6 @@ dimension, to regrid it in 8 chunks of 30 timesteps at a time:
 Assuming that Dask is configured such that it processes only a few chunks of
 the data array at a time, this will further reduce memory use.
 
-Note that chunking in the horizontal dimensions is not supported by the
-regridding schemes. Chunks in these dimensions will automatically be combined
+Note that chunking in the horizontal dimensions is not supported by the interpolation
+and regridding schemes. Chunks in these dimensions will automatically be combined
 before regridding.
diff --git a/docs/src/userguide/plotting_examples/cube_brewer_cite_contourf.py b/docs/src/userguide/plotting_examples/cube_brewer_cite_contourf.py
@@ -1,4 +1,4 @@
-"""Addind a citation for a plot using iris.plot.citation()."""
+"""Adding a citation for a plot using iris.plot.citation()."""
 
 import matplotlib.pyplot as plt
 

diff --git a/docs/src/whatsnew/3.11.rst b/docs/src/whatsnew/3.11.rst
@@ -34,6 +34,20 @@ This document explains the changes made to Iris for this release
    And finally, get in touch with us on :issue:`GitHub<new/choose>` if you have
    any issues or feature requests for improving Iris. Enjoy!
 
+v3.11.1 (19 Dec 2024)
+===========================
+
+.. dropdown:: |iris_version| Patches
+   :color: primary
+   :icon: alert
+   :animate: fade-in
+
+   The patches in this release of Iris include:
+
+   #. We added in a :class:`~iris.Future` flag - ``date_microseconds``, which
+      prevents floating point problems arisen from :class:`cf_units.Unit` v3.3.
+
+   #. We pinned dask to <2024.9 to prevent an indexing bug.
 
 📢 Announcements
 ================
@@ -73,6 +87,14 @@ This document explains the changes made to Iris for this release
 #. `@ESadek-MO`_ updated to the latest CF Standard Names Table v86
    (5 September 2024). (:pull:`6200`)
 
+#. `@trexfeathers`_ added a new :class:`~iris.Future` flag -
+   ``date_microseconds`` - which sets whether Iris should use the new
+   microsecond-precision units (see :class:`cf_units.Unit`, microseconds
+   introduced in version 3.3) when the unit
+   is a time unit. The previous maximum precision was seconds. You should check
+   your code for new floating point problems if activating this (e.g. when
+   using the :class:`~iris.Constraint` API). (:pull:`6260`)
+
 🐛 Bugs Fixed
 =============
 
@@ -116,6 +138,10 @@ This document explains the changes made to Iris for this release
    the concatenation axis. This issue can be avoided by disabling the
    problematic check. (:pull:`5926` and :pull:`6187`)
 
+#. Note that due to the new ``date_microseconds`` :class:`~iris.Future` flag,
+   the time coordinate categorisation speedup introduced above
+   will only be available when ``iris.FUTURE.date_microseconds == True``.
+
 🔥 Deprecations
 ===============
 
@@ -135,6 +161,9 @@ This document explains the changes made to Iris for this release
 
    * `NumPy v2 changed scalar printing`_
 
+#. `@stephenworsley`_ pinned dask to <2024.9 due to an indexing bug. (:issue:`6251`,
+   :pull:`6255`)
+
 
 📚 Documentation
 ================

diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -32,11 +32,21 @@ This document explains the changes made to Iris for this release
 
 #. N/A
 
+#. `@pp-mo`_ added a new utility function :func:`~iris.util.equalise_cubes`, to help
+   with aligning cubes so they can merge / concatenate.
+   (:issue:`6248`, :pull:`6257`)
+
+
+#. `@fnattino`_ added the lazy median aggregator :class:`iris.analysis.MEDIAN`
+   based on the implementation discussed by `@rcomer`_ and `@stefsmeets`_ in
+   :issue:`4039` (:pull:`6167`).
+
 
 🐛 Bugs Fixed
 =============
 
-#. N/A
+#. `@rcomer`_ added handling for string stash codes when saving pp files.
+   (:issue:`6239`, :pull:`6289`)
 
 
 💣 Incompatible Changes
@@ -50,7 +60,15 @@ This document explains the changes made to Iris for this release
 🚀 Performance Enhancements
 ===========================
 
-#. N/A
+#. `@bouweandela`_ made loading :class:`~iris.cube.Cube`s from small NetCDF
+   files faster. (:pull:`6229`)
+
+#. `@fnattino`_ enabled lazy cube interpolation using the linear and
+   nearest-neighbour interpolators (:class:`iris.analysis.Linear` and
+   :class:`iris.analysis.Nearest`). Note that this implementation removes
+   performance benefits linked to caching an interpolator object. While this does
+   not break previously suggested code (instantiating and re-using an interpolator
+   object remains possible), this is no longer an advertised feature. (:pull:`6084`)
 
 
 🔥 Deprecations
@@ -88,13 +106,19 @@ This document explains the changes made to Iris for this release
    :doc:`/developers_guide/release_do_nothing` to be more thorough and apply
    lessons learned from recent releases. (:pull:`6062`)
 
+#. `@schlunma`_ made lazy [smart
+   weights](https://github.com/SciTools/iris/pull/5084) used for cube
+   aggregations have the same chunks as their parent cube if broadcasting is
+   necessary. (:issue:`6285`, :pull:`6288`)
+
 
 .. comment
     Whatsnew author names (@github name) in alphabetical order. Note that,
     core dev names are automatically included by the common_links.inc:
 
-
-
+.. _@fnattino: https://github.com/fnattino
+.. _@jrackham-mo: https://github.com/jrackham-mo
+.. _@stefsmeets: https://github.com/stefsmeets
 
 .. comment
-    Whatsnew resources in alphabetical order:
+    Whatsnew resources in alphabetical order:
diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py
@@ -143,7 +143,13 @@ def callback(cube, field, filename):
 class Future(threading.local):
     """Run-time configuration controller."""
 
-    def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=False):
+    def __init__(
+        self,
+        datum_support=False,
+        pandas_ndim=False,
+        save_split_attrs=False,
+        date_microseconds=False,
+    ):
         """Container for run-time options controls.
 
         To adjust the values simply update the relevant attribute from
@@ -169,6 +175,13 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
             different ways :  "global" ones are saved as dataset attributes, where
             possible, while "local" ones are saved as data-variable attributes.
             See :func:`iris.fileformats.netcdf.saver.save`.
+        date_microseconds : bool, default=False
+            Newer versions of cftime and cf-units support microsecond precision
+            for dates, compared to the legacy behaviour that only works with
+            seconds. Enabling microsecond precision will alter core Iris
+            behaviour, such as when using :class:`~iris.Constraint`, and you
+            may need to defend against floating point precision issues where
+            you didn't need to before.
 
         """
         # The flag 'example_future_flag' is provided as a reference for the
@@ -181,6 +194,7 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
         self.__dict__["datum_support"] = datum_support
         self.__dict__["pandas_ndim"] = pandas_ndim
         self.__dict__["save_split_attrs"] = save_split_attrs
+        self.__dict__["date_microseconds"] = date_microseconds
 
         # TODO: next major release: set IrisDeprecation to subclass
         #  DeprecationWarning instead of UserWarning.
@@ -189,7 +203,12 @@ def __repr__(self):
         # msg = ('Future(example_future_flag={})')
         # return msg.format(self.example_future_flag)
         msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
-        return msg.format(self.datum_support, self.pandas_ndim, self.save_split_attrs)
+        return msg.format(
+            self.datum_support,
+            self.pandas_ndim,
+            self.save_split_attrs,
+            self.date_microseconds,
+        )
 
     # deprecated_options = {'example_future_flag': 'warning',}
     deprecated_options: dict[str, Literal["error", "warning"]] = {}

diff --git a/lib/iris/_representation/cube_printout.py b/lib/iris/_representation/cube_printout.py
@@ -66,7 +66,7 @@ def add_row(self, cols, aligns, i_col_unlimited=None):
         """
         n_cols = len(cols)
         if len(aligns) != n_cols:
-            msg = f"Number of aligns ({len(aligns)})" f" != number of cols ({n_cols})"
+            msg = f"Number of aligns ({len(aligns)}) != number of cols ({n_cols})"
             raise ValueError(msg)
         if self.n_columns is not None:
             # For now, all rows must have same number of columns
@@ -106,7 +106,7 @@ def formatted_as_strings(self):
                 elif align == "right":
                     col_text = col.rjust(width)
                 else:
-                    msg = f'Unknown alignment "{align}" ' 'not in ("left", "right")'
+                    msg = f'Unknown alignment "{align}" not in ("left", "right")'
                     raise ValueError(msg)
                 col_texts.append(col_text)