diff --git a/.github/workflows/ci-manifest.yml b/.github/workflows/ci-manifest.yml
index 73977f7d4c..527356ff35 100644
--- a/.github/workflows/ci-manifest.yml
+++ b/.github/workflows/ci-manifest.yml
@@ -23,4 +23,4 @@ concurrency:
jobs:
manifest:
name: "check-manifest"
- uses: scitools/workflows/.github/workflows/ci-manifest.yml@2024.08.1
+ uses: scitools/workflows/.github/workflows/ci-manifest.yml@2024.08.3
diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml
index dd4d8bdc30..f3755f7709 100644
--- a/.github/workflows/refresh-lockfiles.yml
+++ b/.github/workflows/refresh-lockfiles.yml
@@ -14,5 +14,5 @@ on:
jobs:
refresh_lockfiles:
- uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2024.08.1
+ uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2024.08.3
secrets: inherit
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 49168e7281..911d5f7833 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -118,6 +118,12 @@ repeats _between_ `setup()` calls using the `repeat` attribute.
`warmup_time = 0` is also advisable since ASV performs independent re-runs to
estimate run-time, and these will still be subject to the original problem.
+### Custom benchmarks
+
+Iris benchmarking implements custom benchmark types, such as a `tracemalloc`
+benchmark to measure memory growth. See [custom_bms/](./custom_bms) for more
+detail.
+
### Scaling / non-Scaling Performance Differences
**(We no longer advocate the below for benchmarks run during CI, given the
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 13e7256b83..2857c90ad7 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -53,9 +53,12 @@
"command_comment": [
"We know that the Nox command takes care of installation in each",
"environment, and in the case of Iris no specialised uninstall or",
- "build commands are needed to get it working."
+ "build commands are needed to get it working.",
+
+ "We do however need to install the custom benchmarks for them to be",
+ "usable."
],
"install_command": [],
"uninstall_command": [],
- "build_command": []
+ "build_command": ["python {conf_dir}/custom_bms/install.py"]
}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
index 378c26332d..30a991a879 100644
--- a/benchmarks/benchmarks/__init__.py
+++ b/benchmarks/benchmarks/__init__.py
@@ -37,111 +37,6 @@ def disable_repeat_between_setup(benchmark_object):
return benchmark_object
-class TrackAddedMemoryAllocation:
- """Measures by how much process resident memory grew, during execution.
-
- Context manager which measures by how much process resident memory grew,
- during execution of its enclosed code block.
-
- Obviously limited as to what it actually measures : Relies on the current
- process not having significant unused (de-allocated) memory when the
- tested codeblock runs, and only reliable when the code allocates a
- significant amount of new memory.
-
- Example:
- with TrackAddedMemoryAllocation() as mb:
- initial_call()
- other_call()
- result = mb.addedmem_mb()
-
- Attributes
- ----------
- RESULT_MINIMUM_MB : float
- The smallest result that should ever be returned, in Mb. Results
- fluctuate from run to run (usually within 1Mb) so if a result is
- sufficiently small this noise will produce a before-after ratio over
- AVD's detection threshold and be treated as 'signal'. Results
- smaller than this value will therefore be returned as equal to this
- value, ensuring fractionally small noise / no noise at all.
- Defaults to 1.0
-
- RESULT_ROUND_DP : int
- Number of decimal places of rounding on result values (in Mb).
- Defaults to 1
-
- """
-
- RESULT_MINIMUM_MB = 0.2
- RESULT_ROUND_DP = 1 # I.E. to nearest 0.1 Mb
-
- def __enter__(self):
- tracemalloc.start()
- return self
-
- def __exit__(self, *_):
- _, peak_mem_bytes = tracemalloc.get_traced_memory()
- tracemalloc.stop()
- # Save peak-memory allocation, scaled from bytes to Mb.
- self._peak_mb = peak_mem_bytes * (2.0**-20)
-
- def addedmem_mb(self):
- """Return measured memory growth, in Mb."""
- result = self._peak_mb
- # Small results are too vulnerable to noise being interpreted as signal.
- result = max(self.RESULT_MINIMUM_MB, result)
- # Rounding makes results easier to read.
- result = np.round(result, self.RESULT_ROUND_DP)
- return result
-
- @staticmethod
- def decorator(decorated_func):
- """Benchmark to track growth in resident memory during execution.
-
- Intended for use on ASV ``track_`` benchmarks. Applies the
- :class:`TrackAddedMemoryAllocation` context manager to the benchmark
- code, sets the benchmark ``unit`` attribute to ``Mb``.
-
- """
-
- def _wrapper(*args, **kwargs):
- assert decorated_func.__name__[:6] == "track_"
- # Run the decorated benchmark within the added memory context
- # manager.
- with TrackAddedMemoryAllocation() as mb:
- decorated_func(*args, **kwargs)
- return mb.addedmem_mb()
-
- decorated_func.unit = "Mb"
- return _wrapper
-
- @staticmethod
- def decorator_repeating(repeats=3):
- """Benchmark to track growth in resident memory during execution.
-
- Tracks memory for repeated calls of decorated function.
-
- Intended for use on ASV ``track_`` benchmarks. Applies the
- :class:`TrackAddedMemoryAllocation` context manager to the benchmark
- code, sets the benchmark ``unit`` attribute to ``Mb``.
-
- """
-
- def decorator(decorated_func):
- def _wrapper(*args, **kwargs):
- assert decorated_func.__name__[:6] == "track_"
- # Run the decorated benchmark within the added memory context
- # manager.
- with TrackAddedMemoryAllocation() as mb:
- for _ in range(repeats):
- decorated_func(*args, **kwargs)
- return mb.addedmem_mb()
-
- decorated_func.unit = "Mb"
- return _wrapper
-
- return decorator
-
-
def on_demand_benchmark(benchmark_object):
"""Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set.
diff --git a/benchmarks/benchmarks/cperf/save.py b/benchmarks/benchmarks/cperf/save.py
index 2d60f920c4..6dcd0b3bcf 100644
--- a/benchmarks/benchmarks/cperf/save.py
+++ b/benchmarks/benchmarks/cperf/save.py
@@ -6,7 +6,7 @@
from iris import save
-from .. import TrackAddedMemoryAllocation, on_demand_benchmark
+from .. import on_demand_benchmark
from ..generate_data.ugrid import make_cube_like_2d_cubesphere, make_cube_like_umfield
from . import _N_CUBESPHERE_UM_EQUIVALENT, _UM_DIMS_YX
@@ -36,6 +36,5 @@ def _save_data(self, cube):
def time_save_data_netcdf(self, data_type):
self._save_data(self.cube)
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_save_data_netcdf(self, data_type):
+ def tracemalloc_save_data_netcdf(self, data_type):
self._save_data(self.cube)
diff --git a/benchmarks/benchmarks/merge_concat.py b/benchmarks/benchmarks/merge_concat.py
index cac43698a4..2d3738683a 100644
--- a/benchmarks/benchmarks/merge_concat.py
+++ b/benchmarks/benchmarks/merge_concat.py
@@ -11,7 +11,6 @@
from iris.cube import CubeList
from iris.warnings import IrisVagueMetadataWarning
-from . import TrackAddedMemoryAllocation
from .generate_data.stock import realistic_4d_w_everything
@@ -37,10 +36,11 @@ def setup(self):
def time_merge(self):
_ = self.cube_list.merge_cube()
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_mem_merge(self):
+ def tracemalloc_merge(self):
_ = self.cube_list.merge_cube()
+ tracemalloc_merge.number = 3 # type: ignore[attr-defined]
+
class Concatenate:
# TODO: Improve coverage.
@@ -66,6 +66,7 @@ def setup(self, lazy_run: bool):
def time_concatenate(self, _):
_ = self.cube_list.concatenate_cube()
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_mem_merge(self, _):
+ def tracemalloc_concatenate(self, _):
_ = self.cube_list.concatenate_cube()
+
+ tracemalloc_concatenate.number = 3 # type: ignore[attr-defined]
diff --git a/benchmarks/benchmarks/mesh/utils/regions_combine.py b/benchmarks/benchmarks/mesh/utils/regions_combine.py
index 1a1a43a622..a61deea56d 100644
--- a/benchmarks/benchmarks/mesh/utils/regions_combine.py
+++ b/benchmarks/benchmarks/mesh/utils/regions_combine.py
@@ -17,7 +17,6 @@
from iris import load, load_cube, save
from iris.mesh.utils import recombine_submeshes
-from ... import TrackAddedMemoryAllocation
from ...generate_data.ugrid import make_cube_like_2d_cubesphere
@@ -169,8 +168,7 @@ def setup(self, n_cubesphere):
def time_create_combined_cube(self, n_cubesphere):
self.recombine()
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_create_combined_cube(self, n_cubesphere):
+ def tracemalloc_create_combined_cube(self, n_cubesphere):
self.recombine()
@@ -180,8 +178,7 @@ class CombineRegionsComputeRealData(MixinCombineRegions):
def time_compute_data(self, n_cubesphere):
_ = self.recombined_cube.data
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_compute_data(self, n_cubesphere):
+ def tracemalloc_compute_data(self, n_cubesphere):
_ = self.recombined_cube.data
@@ -199,8 +196,7 @@ def time_save(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
save(self.recombined_cube, "tmp.nc")
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_save(self, n_cubesphere):
+ def tracemalloc_save(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
def track_filesize_saved(self, n_cubesphere):
@@ -227,6 +223,5 @@ def time_stream_file2file(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
save(self.recombined_cube, "tmp.nc")
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_stream_file2file(self, n_cubesphere):
+ def tracemalloc_stream_file2file(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py
index 4cfda05ad1..e227da0ec6 100644
--- a/benchmarks/benchmarks/regridding.py
+++ b/benchmarks/benchmarks/regridding.py
@@ -14,8 +14,6 @@
from iris.analysis import AreaWeighted, PointInCell
from iris.coords import AuxCoord
-from . import TrackAddedMemoryAllocation
-
class HorizontalChunkedRegridding:
def setup(self) -> None:
@@ -53,20 +51,22 @@ def time_regrid_area_w_new_grid(self) -> None:
# Realise data
out.data
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_mem_regrid_area_w(self) -> None:
+ def tracemalloc_regrid_area_w(self) -> None:
# Regrid the chunked cube
out = self.cube.regrid(self.template_cube, self.scheme_area_w)
# Realise data
out.data
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_mem_regrid_area_w_new_grid(self) -> None:
+ tracemalloc_regrid_area_w.number = 3 # type: ignore[attr-defined]
+
+ def tracemalloc_regrid_area_w_new_grid(self) -> None:
# Regrid the chunked cube
out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w)
# Realise data
out.data
+ tracemalloc_regrid_area_w_new_grid.number = 3 # type: ignore[attr-defined]
+
class CurvilinearRegridding:
def setup(self) -> None:
@@ -110,9 +110,10 @@ def time_regrid_pic(self) -> None:
# Realise the data
out.data
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_mem_regrid_pic(self) -> None:
+ def tracemalloc_regrid_pic(self) -> None:
# Regrid the cube onto the template.
out = self.cube.regrid(self.template_cube, self.scheme_pic)
# Realise the data
out.data
+
+ tracemalloc_regrid_pic.number = 3 # type: ignore[attr-defined]
diff --git a/benchmarks/benchmarks/save.py b/benchmarks/benchmarks/save.py
index aaa8480d64..4bac1b1450 100644
--- a/benchmarks/benchmarks/save.py
+++ b/benchmarks/benchmarks/save.py
@@ -7,7 +7,6 @@
from iris import save
from iris.mesh import save_mesh
-from . import TrackAddedMemoryAllocation, on_demand_benchmark
from .generate_data.ugrid import make_cube_like_2d_cubesphere
@@ -38,8 +37,7 @@ def time_netcdf_save_mesh(self, n_cubesphere, is_unstructured):
if is_unstructured:
self._save_mesh(self.cube)
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_netcdf_save(self, n_cubesphere, is_unstructured):
+ def tracemalloc_netcdf_save(self, n_cubesphere, is_unstructured):
# Don't need to copy the cube here since track_ benchmarks don't
# do repeats between self.setup() calls.
self._save_data(self.cube, do_copy=False)
diff --git a/benchmarks/benchmarks/sperf/combine_regions.py b/benchmarks/benchmarks/sperf/combine_regions.py
index b106befcae..591b7bb9be 100644
--- a/benchmarks/benchmarks/sperf/combine_regions.py
+++ b/benchmarks/benchmarks/sperf/combine_regions.py
@@ -12,7 +12,7 @@
from iris import load, load_cube, save
from iris.mesh.utils import recombine_submeshes
-from .. import TrackAddedMemoryAllocation, on_demand_benchmark
+from .. import on_demand_benchmark
from ..generate_data.ugrid import BENCHMARK_DATA, make_cube_like_2d_cubesphere
@@ -175,8 +175,7 @@ def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=False):
def time_create_combined_cube(self, n_cubesphere):
self.recombine()
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_create_combined_cube(self, n_cubesphere):
+ def tracemalloc_create_combined_cube(self, n_cubesphere):
self.recombine()
@@ -187,8 +186,7 @@ class ComputeRealData(Mixin):
def time_compute_data(self, n_cubesphere):
_ = self.recombined_cube.data
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_compute_data(self, n_cubesphere):
+ def tracemalloc_compute_data(self, n_cubesphere):
_ = self.recombined_cube.data
@@ -206,8 +204,7 @@ def time_save(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
self.save_recombined_cube()
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_save(self, n_cubesphere):
+ def tracemalloc_save(self, n_cubesphere):
self.save_recombined_cube()
def track_filesize_saved(self, n_cubesphere):
@@ -233,6 +230,5 @@ def time_stream_file2file(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
self.save_recombined_cube()
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_stream_file2file(self, n_cubesphere):
+ def tracemalloc_stream_file2file(self, n_cubesphere):
self.save_recombined_cube()
diff --git a/benchmarks/benchmarks/sperf/save.py b/benchmarks/benchmarks/sperf/save.py
index d8a03798f0..a715ec2424 100644
--- a/benchmarks/benchmarks/sperf/save.py
+++ b/benchmarks/benchmarks/sperf/save.py
@@ -9,7 +9,7 @@
from iris import save
from iris.mesh import save_mesh
-from .. import TrackAddedMemoryAllocation, on_demand_benchmark
+from .. import on_demand_benchmark
from ..generate_data.ugrid import make_cube_like_2d_cubesphere
@@ -36,8 +36,7 @@ def _save_mesh(self, cube):
def time_save_cube(self, n_cubesphere, is_unstructured):
self._save_cube(self.cube)
- @TrackAddedMemoryAllocation.decorator
- def track_addedmem_save_cube(self, n_cubesphere, is_unstructured):
+ def tracemalloc_save_cube(self, n_cubesphere, is_unstructured):
self._save_cube(self.cube)
def time_save_mesh(self, n_cubesphere, is_unstructured):
diff --git a/benchmarks/benchmarks/stats.py b/benchmarks/benchmarks/stats.py
index 1f5262bf4c..fbab12cd4b 100644
--- a/benchmarks/benchmarks/stats.py
+++ b/benchmarks/benchmarks/stats.py
@@ -8,8 +8,6 @@
from iris.analysis.stats import pearsonr
import iris.tests
-from . import TrackAddedMemoryAllocation
-
class PearsonR:
def setup(self):
@@ -32,10 +30,11 @@ def setup(self):
def time_real(self):
pearsonr(self.cube_a, self.cube_b, weights=self.weights)
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_real(self):
+ def tracemalloc_real(self):
pearsonr(self.cube_a, self.cube_b, weights=self.weights)
+ tracemalloc_real.number = 3 # type: ignore[attr-defined]
+
def time_lazy(self):
for cube in self.cube_a, self.cube_b:
cube.data = cube.lazy_data()
@@ -43,10 +42,11 @@ def time_lazy(self):
result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
result.data
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_lazy(self):
+ def tracemalloc_lazy(self):
for cube in self.cube_a, self.cube_b:
cube.data = cube.lazy_data()
result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
result.data
+
+ tracemalloc_lazy.number = 3 # type: ignore[attr-defined]
diff --git a/benchmarks/benchmarks/trajectory.py b/benchmarks/benchmarks/trajectory.py
index a31552eb9a..77825ef2f2 100644
--- a/benchmarks/benchmarks/trajectory.py
+++ b/benchmarks/benchmarks/trajectory.py
@@ -13,8 +13,6 @@
import iris
from iris.analysis.trajectory import interpolate
-from . import TrackAddedMemoryAllocation
-
class TrajectoryInterpolation:
def setup(self) -> None:
@@ -35,22 +33,24 @@ def time_trajectory_linear(self) -> None:
# Realise the data
out_cube.data
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_trajectory_linear(self) -> None:
+ def tracemalloc_trajectory_linear(self) -> None:
# Regrid the cube onto the template.
out_cube = interpolate(self.cube, self.sample_points, method="linear")
# Realise the data
out_cube.data
+ tracemalloc_trajectory_linear.number = 3 # type: ignore[attr-defined]
+
def time_trajectory_nearest(self) -> None:
# Regrid the cube onto the template.
out_cube = interpolate(self.cube, self.sample_points, method="nearest")
# Realise the data
out_cube.data
- @TrackAddedMemoryAllocation.decorator_repeating()
- def track_trajectory_nearest(self) -> None:
+ def tracemalloc_trajectory_nearest(self) -> None:
# Regrid the cube onto the template.
out_cube = interpolate(self.cube, self.sample_points, method="nearest")
# Realise the data
out_cube.data
+
+ tracemalloc_trajectory_nearest.number = 3 # type: ignore[attr-defined]
diff --git a/benchmarks/custom_bms/README.md b/benchmarks/custom_bms/README.md
new file mode 100644
index 0000000000..eea85d74fe
--- /dev/null
+++ b/benchmarks/custom_bms/README.md
@@ -0,0 +1,11 @@
+# Iris custom benchmarks
+
+To be recognised by ASV, these benchmarks must be packaged and installed in
+line with the
+[ASV guidelines](https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html).
+This is achieved using the custom build in [install.py](./install.py).
+
+Installation is into the environment where the benchmarks are run (i.e. not
+the environment containing ASV + Nox, but the one built to the same
+specifications as the Tests environment). This is done via `build_command`
+in [asv.conf.json](../asv.conf.json).
diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py
new file mode 100644
index 0000000000..59d27a0b43
--- /dev/null
+++ b/benchmarks/custom_bms/install.py
@@ -0,0 +1,55 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Install Iris' custom benchmarks for detection by ASV.
+
+See the requirements for being detected as an ASV plugin:
+https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html
+"""
+
+from pathlib import Path
+import shutil
+from subprocess import run
+from tempfile import TemporaryDirectory
+
+this_dir = Path(__file__).parent
+
+
+def package_files(new_dir: Path) -> None:
+ """Package Iris' custom benchmarks for detection by ASV.
+
+ Parameters
+ ----------
+ new_dir : Path
+ The directory to package the custom benchmarks in.
+ """
+ asv_bench_iris = new_dir / "asv_bench_iris"
+ benchmarks = asv_bench_iris / "benchmarks"
+ benchmarks.mkdir(parents=True)
+ (asv_bench_iris / "__init__.py").touch()
+
+ for py_file in this_dir.glob("*.py"):
+ if py_file != Path(__file__):
+ shutil.copy2(py_file, benchmarks)
+
+ # Create this on the fly, as having multiple pyproject.toml files in 1
+ # project causes problems.
+ py_project = new_dir / "pyproject.toml"
+ py_project.write_text(
+ """
+ [project]
+ name = "asv_bench_iris"
+ version = "0.1"
+ """
+ )
+
+
+def main():
+ with TemporaryDirectory() as temp_dir:
+ package_files(Path(temp_dir))
+ run(["python", "-m", "pip", "install", temp_dir])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py
new file mode 100644
index 0000000000..486c67aeb9
--- /dev/null
+++ b/benchmarks/custom_bms/tracemallocbench.py
@@ -0,0 +1,196 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+
+"""Benchmark for growth in process resident memory, repeating for accuracy.
+
+Uses a modified version of the repeat logic in
+:class:`asv_runner.benchmarks.time.TimeBenchmark`.
+"""
+
+import re
+from timeit import Timer
+import tracemalloc
+from typing import Callable
+
+from asv_runner.benchmarks.time import TimeBenchmark, wall_timer
+
+
+class TracemallocBenchmark(TimeBenchmark):
+ """Benchmark for growth in process resident memory, repeating for accuracy.
+
+ Obviously limited as to what it actually measures : Relies on the current
+ process not having significant unused (de-allocated) memory when the
+ tested codeblock runs, and only reliable when the code allocates a
+ significant amount of new memory.
+
+ Benchmark operations prefixed with ``tracemalloc_`` or ``Tracemalloc`` will
+ use this benchmark class.
+
+ Inherits behaviour from :class:`asv_runner.benchmarks.time.TimeBenchmark`,
+ with modifications for memory measurement. See the below Attributes section
+ and https://asv.readthedocs.io/en/stable/writing_benchmarks.html#timing-benchmarks.
+
+ Attributes
+ ----------
+ Mostly identical to :class:`asv_runner.benchmarks.time.TimeBenchmark`. See
+ https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks
+ Make sure to use the inherited ``repeat`` attribute if greater accuracy
+ is needed. Below are the attributes where inherited behaviour is
+ overridden.
+
+ number : int
+ The number of times the benchmarked operation will be called per
+ ``repeat``. Memory growth is measured after ALL calls -
+ i.e. `number` should make no difference to the result if the operation
+ has perfect garbage collection. The parent class's intelligent
+ modification of `number` is NOT inherited. A minimum value of ``1`` is
+ enforced.
+ warmup_time, sample_time, min_run_count, timer
+ Not used.
+ type : str = "tracemalloc"
+ The name of this benchmark type.
+ unit : str = "bytes"
+ The units of the measured metric (i.e. the growth in memory).
+
+ """
+
+ name_regex = re.compile("^(Tracemalloc[A-Z_].+)|(tracemalloc_.+)$")
+
+ param: tuple
+
+ def __init__(self, name: str, func: Callable, attr_sources: list) -> None:
+ """Initialize a new instance of `TracemallocBenchmark`.
+
+ Parameters
+ ----------
+ name : str
+ The name of the benchmark.
+ func : callable
+ The function to benchmark.
+ attr_sources : list
+ A list of objects from which to draw attributes.
+ """
+ super().__init__(name, func, attr_sources)
+ self.type = "tracemalloc"
+ self.unit = "bytes"
+
+ def _load_vars(self):
+ """Load benchmark variables from attribute sources.
+
+ Downstream handling of ``number`` is not the same as in the parent, so
+ need to make sure it is at least 1.
+ """
+ super()._load_vars()
+ self.number = max(1, self.number)
+
+ def run(self, *param: tuple) -> dict:
+ """Run the benchmark with the given parameters.
+
+ Downstream handling of ``param`` is not the same as in the parent, so
+ need to store it now.
+
+ Parameters
+ ----------
+ *param : tuple
+ The parameters to pass to the benchmark function.
+
+ Returns
+ -------
+ dict
+ A dictionary with the benchmark results. It contains the samples
+ taken, and "the number of times the function was called in each
+ sample" - for this benchmark that is always ``1`` to avoid the
+ parent class incorrectly modifying the results.
+ """
+ self.param = param
+ return super().run(*param)
+
+ def benchmark_timing(
+ self,
+ timer: Timer,
+ min_repeat: int,
+ max_repeat: int,
+ max_time: float,
+ warmup_time: float,
+ number: int,
+ min_run_count: int,
+ ) -> tuple[list[int], int]:
+ """Benchmark the timing of the function execution.
+
+ Heavily modified from the parent method
+ - Directly performs setup and measurement (parent used timeit).
+ - `number` used differently (see Parameters).
+ - No warmup phase.
+
+ Parameters
+ ----------
+ timer : timeit.Timer
+ Not used.
+ min_repeat : int
+ The minimum number of times to repeat the function execution.
+ max_repeat : int
+ The maximum number of times to repeat the function execution.
+ max_time : float
+ The maximum total time to spend on the benchmarking.
+ warmup_time : float
+ Not used.
+ number : int
+ The number of times the benchmarked operation will be called per
+ repeat. Memory growth is measured after ALL calls - i.e. `number`
+ should make no difference to the result if the operation
+ has perfect garbage collection. The parent class's intelligent
+ modification of `number` is NOT inherited.
+ min_run_count : int
+ Not used.
+
+ Returns
+ -------
+ list
+ A list of the measured memory growths, in bytes.
+ int = 1
+ Part of the inherited return signature. Must be 1 to avoid
+ the parent incorrectly modifying the results.
+ """
+ start_time = wall_timer()
+ samples: list[int] = []
+
+ def too_slow(num_samples) -> bool:
+ """Stop taking samples if limits exceeded.
+
+ Parameters
+ ----------
+ num_samples : int
+ The number of samples taken so far.
+
+ Returns
+ -------
+ bool
+ True if the benchmark should stop, False otherwise.
+ """
+ if num_samples < min_repeat:
+ return False
+ return wall_timer() > start_time + max_time
+
+ # Collect samples
+ while len(samples) < max_repeat:
+ self.redo_setup()
+ tracemalloc.start()
+ for _ in range(number):
+ __ = self.func(*self.param)
+ _, peak_mem_bytes = tracemalloc.get_traced_memory()
+ tracemalloc.stop()
+
+ samples.append(peak_mem_bytes)
+
+ if too_slow(len(samples)):
+ break
+
+ # ``number`` is not used in the same way as in the parent class. Must
+ # be returned as 1 to avoid parent incorrectly modifying the results.
+ return samples, 1
+
+
+# https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html
+export_as_benchmark = [TracemallocBenchmark]
diff --git a/docs/src/conf.py b/docs/src/conf.py
index 60f760c37f..4c8f59564f 100644
--- a/docs/src/conf.py
+++ b/docs/src/conf.py
@@ -246,6 +246,7 @@ def _dotv(version):
# See https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
intersphinx_mapping = {
"cartopy": ("https://scitools.org.uk/cartopy/docs/latest/", None),
+ "cftime": ("https://unidata.github.io/cftime/", None),
"dask": ("https://docs.dask.org/en/stable/", None),
"iris-esmf-regrid": ("https://iris-esmf-regrid.readthedocs.io/en/stable/", None),
"matplotlib": ("https://matplotlib.org/stable/", None),
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
index ae2c08d588..f76e7fe2b5 100644
--- a/docs/src/whatsnew/latest.rst
+++ b/docs/src/whatsnew/latest.rst
@@ -38,6 +38,9 @@ This document explains the changes made to Iris for this release
#. N/A
+#. `@rcomer`_ enabled partial collapse of multi-dimensional string coordinates,
+ fixing :issue:`3653`. (:pull:`5955`)
+
💣 Incompatible Changes
=======================
@@ -48,6 +51,12 @@ This document explains the changes made to Iris for this release
🚀 Performance Enhancements
===========================
+#. `@bouweandela`_ made the time coordinate categorisation functions in
+ :mod:`~iris.coord_categorisation` faster. Anyone using
+ :func:`~iris.coord_categorisation.add_categorised_coord`
+ with cftime :class:`~cftime.datetime` objects can benefit from the same
+ improvement by adding a type hint to their category funcion. (:pull:`5999`)
+
#. `@bouweandela`_ made :meth:`iris.cube.CubeList.concatenate` faster if more
than two cubes are concatenated. (:pull:`5926`)
@@ -72,7 +81,9 @@ This document explains the changes made to Iris for this release
💼 Internal
===========
-#. N/A
+#. `@trexfeathers`_ improved the new ``tracemalloc`` benchmarking (introduced
+ in Iris v3.10.0, :pull:`5948`) to use the same statistical repeat strategy
+ as timing benchmarks. (:pull:`5981`)
.. comment
diff --git a/lib/iris/coord_categorisation.py b/lib/iris/coord_categorisation.py
index 7ccee4fca8..770f8327a1 100644
--- a/lib/iris/coord_categorisation.py
+++ b/lib/iris/coord_categorisation.py
@@ -17,13 +17,23 @@
import calendar
import collections
+import inspect
+from typing import Callable
+import cftime
import numpy as np
import iris.coords
+import iris.cube
-def add_categorised_coord(cube, name, from_coord, category_function, units="1"):
+def add_categorised_coord(
+ cube: iris.cube.Cube,
+ name: str,
+ from_coord: iris.coords.Coord | str,
+ category_function: Callable,
+ units: str = "1",
+) -> None:
"""Add a new coordinate to a cube, by categorising an existing one.
Make a new :class:`iris.coords.AuxCoord` from mapped values, and add
@@ -32,31 +42,38 @@ def add_categorised_coord(cube, name, from_coord, category_function, units="1"):
Parameters
----------
cube : :class:`iris.cube.Cube`
- The cube containing 'from_coord'. The new coord will be added into it.
+ The cube containing 'from_coord'. The new coord will be added into it.
name : str
Name of the created coordinate.
from_coord : :class:`iris.coords.Coord` or str
Coordinate in 'cube', or the name of one.
category_function : callable
- Function(coordinate, value), returning a category value for a
- coordinate point-value.
+ Function(coordinate, value), returning a category value for a coordinate
+ point-value. If ``value`` has a type hint :obj:`cftime.datetime`, the
+ coordinate points are translated to :obj:`cftime.datetime` s before
+ calling ``category_function``.
units : str, default="1"
Units of the category value, typically 'no_unit' or '1'.
-
"""
# Interpret coord, if given as a name
- if isinstance(from_coord, str):
- from_coord = cube.coord(from_coord)
+ coord = cube.coord(from_coord) if isinstance(from_coord, str) else from_coord
if len(cube.coords(name)) > 0:
msg = 'A coordinate "%s" already exists in the cube.' % name
raise ValueError(msg)
+ # Translate the coordinate points to cftime datetimes if requested.
+ value_param = list(inspect.signature(category_function).parameters.values())[1]
+ if issubclass(value_param.annotation, cftime.datetime):
+ points = coord.units.num2date(coord.points, only_use_cftime_datetimes=True)
+ else:
+ points = coord.points
+
# Construct new coordinate by mapping values, using numpy.vectorize to
# support multi-dimensional coords.
# Test whether the result contains strings. If it does we must manually
# force the dtype because of a numpy bug (see numpy #3270 on GitHub).
- result = category_function(from_coord, from_coord.points.ravel()[0])
+ result = category_function(coord, points.ravel()[0])
if isinstance(result, str):
str_vectorised_fn = np.vectorize(category_function, otypes=[object])
@@ -67,14 +84,14 @@ def vectorised_fn(*args):
else:
vectorised_fn = np.vectorize(category_function)
new_coord = iris.coords.AuxCoord(
- vectorised_fn(from_coord, from_coord.points),
+ vectorised_fn(coord, points),
units=units,
- attributes=from_coord.attributes.copy(),
+ attributes=coord.attributes.copy(),
)
new_coord.rename(name)
# Add into the cube
- cube.add_aux_coord(new_coord, cube.coord_dims(from_coord))
+ cube.add_aux_coord(new_coord, cube.coord_dims(coord))
# ======================================
@@ -84,78 +101,62 @@ def vectorised_fn(*args):
# coordinates only
#
-
-# Private "helper" function
-def _pt_date(coord, time):
- """Return the datetime of a time-coordinate point.
-
- Parameters
- ----------
- coord : Coord
- Coordinate (must be Time-type).
- time : float
- Value of a coordinate point.
-
- Returns
- -------
- cftime.datetime
-
- """
- # NOTE: All of the currently defined categorisation functions are
- # calendar operations on Time coordinates.
- return coord.units.num2date(time, only_use_cftime_datetimes=True)
-
-
# --------------------------------------------
# Time categorisations : calendar date components
def add_year(cube, coord, name="year"):
"""Add a categorical calendar-year coordinate."""
- add_categorised_coord(cube, name, coord, lambda coord, x: _pt_date(coord, x).year)
+
+ def get_year(_, value: cftime.datetime) -> int:
+ return value.year
+
+ add_categorised_coord(cube, name, coord, get_year)
def add_month_number(cube, coord, name="month_number"):
"""Add a categorical month coordinate, values 1..12."""
- add_categorised_coord(cube, name, coord, lambda coord, x: _pt_date(coord, x).month)
+
+ def get_month_number(_, value: cftime.datetime) -> int:
+ return value.month
+
+ add_categorised_coord(cube, name, coord, get_month_number)
def add_month_fullname(cube, coord, name="month_fullname"):
"""Add a categorical month coordinate, values 'January'..'December'."""
- add_categorised_coord(
- cube,
- name,
- coord,
- lambda coord, x: calendar.month_name[_pt_date(coord, x).month],
- units="no_unit",
- )
+
+ def get_month_fullname(_, value: cftime.datetime) -> str:
+ return calendar.month_name[value.month]
+
+ add_categorised_coord(cube, name, coord, get_month_fullname, units="no_unit")
def add_month(cube, coord, name="month"):
"""Add a categorical month coordinate, values 'Jan'..'Dec'."""
- add_categorised_coord(
- cube,
- name,
- coord,
- lambda coord, x: calendar.month_abbr[_pt_date(coord, x).month],
- units="no_unit",
- )
+
+ def get_month_abbr(_, value: cftime.datetime) -> str:
+ return calendar.month_abbr[value.month]
+
+ add_categorised_coord(cube, name, coord, get_month_abbr, units="no_unit")
def add_day_of_month(cube, coord, name="day_of_month"):
"""Add a categorical day-of-month coordinate, values 1..31."""
- add_categorised_coord(cube, name, coord, lambda coord, x: _pt_date(coord, x).day)
+
+ def get_day_of_month(_, value: cftime.datetime) -> int:
+ return value.day
+
+ add_categorised_coord(cube, name, coord, get_day_of_month)
def add_day_of_year(cube, coord, name="day_of_year"):
"""Add a categorical day-of-year coordinate, values 1..365 (1..366 in leap years)."""
- # Note: cftime.datetime objects return a normal tuple from timetuple(),
- # unlike datetime.datetime objects that return a namedtuple.
- # Index the time tuple (element 7 is day of year) instead of using named
- # element tm_yday.
- add_categorised_coord(
- cube, name, coord, lambda coord, x: _pt_date(coord, x).timetuple()[7]
- )
+
+ def get_day_of_year(_, value: cftime.datetime) -> int:
+ return value.timetuple().tm_yday
+
+ add_categorised_coord(cube, name, coord, get_day_of_year)
# --------------------------------------------
@@ -164,31 +165,29 @@ def add_day_of_year(cube, coord, name="day_of_year"):
def add_weekday_number(cube, coord, name="weekday_number"):
"""Add a categorical weekday coordinate, values 0..6 [0=Monday]."""
- add_categorised_coord(
- cube, name, coord, lambda coord, x: _pt_date(coord, x).dayofwk
- )
+
+ def get_weekday_number(_, value: cftime.datetime) -> int:
+ return value.dayofwk
+
+ add_categorised_coord(cube, name, coord, get_weekday_number)
def add_weekday_fullname(cube, coord, name="weekday_fullname"):
"""Add a categorical weekday coordinate, values 'Monday'..'Sunday'."""
- add_categorised_coord(
- cube,
- name,
- coord,
- lambda coord, x: calendar.day_name[_pt_date(coord, x).dayofwk],
- units="no_unit",
- )
+
+ def get_weekday_fullname(_, value: cftime.datetime) -> str:
+ return calendar.day_name[value.dayofwk]
+
+ add_categorised_coord(cube, name, coord, get_weekday_fullname, units="no_unit")
def add_weekday(cube, coord, name="weekday"):
"""Add a categorical weekday coordinate, values 'Mon'..'Sun'."""
- add_categorised_coord(
- cube,
- name,
- coord,
- lambda coord, x: calendar.day_abbr[_pt_date(coord, x).dayofwk],
- units="no_unit",
- )
+
+ def get_weekday(_, value: cftime.datetime) -> str:
+ return calendar.day_abbr[value.dayofwk]
+
+ add_categorised_coord(cube, name, coord, get_weekday, units="no_unit")
# --------------------------------------------
@@ -197,7 +196,11 @@ def add_weekday(cube, coord, name="weekday"):
def add_hour(cube, coord, name="hour"):
"""Add a categorical hour coordinate, values 0..23."""
- add_categorised_coord(cube, name, coord, lambda coord, x: _pt_date(coord, x).hour)
+
+ def get_hour(_, value: cftime.datetime) -> int:
+ return value.hour
+
+ add_categorised_coord(cube, name, coord, get_hour)
# ----------------------------------------------
@@ -319,9 +322,8 @@ def add_season(cube, coord, name="season", seasons=("djf", "mam", "jja", "son"))
month_season_numbers = _month_season_numbers(seasons)
# Define a categorisation function.
- def _season(coord, value):
- dt = _pt_date(coord, value)
- return seasons[month_season_numbers[dt.month]]
+ def _season(_, value: cftime.datetime) -> str:
+ return seasons[month_season_numbers[value.month]]
# Apply the categorisation.
add_categorised_coord(cube, name, coord, _season, units="no_unit")
@@ -357,9 +359,8 @@ def add_season_number(
month_season_numbers = _month_season_numbers(seasons)
# Define a categorisation function.
- def _season_number(coord, value):
- dt = _pt_date(coord, value)
- return month_season_numbers[dt.month]
+ def _season_number(_, value: cftime.datetime) -> int:
+ return month_season_numbers[value.month]
# Apply the categorisation.
add_categorised_coord(cube, name, coord, _season_number)
@@ -401,10 +402,9 @@ def add_season_year(
)
# Define a categorisation function.
- def _season_year(coord, value):
- dt = _pt_date(coord, value)
- year = dt.year
- year += month_year_adjusts[dt.month]
+ def _season_year(_, value: cftime.datetime) -> int:
+ year = value.year
+ year += month_year_adjusts[value.month]
return year
# Apply the categorisation.
@@ -432,10 +432,7 @@ def add_season_membership(cube, coord, season, name="season_membership"):
"""
months = _months_in_season(season)
- def _season_membership(coord, value):
- dt = _pt_date(coord, value)
- if dt.month in months:
- return True
- return False
+ def _season_membership(_, value: cftime.datetime) -> bool:
+ return value.month in months
add_categorised_coord(cube, name, coord, _season_membership)
diff --git a/lib/iris/coords.py b/lib/iris/coords.py
index d2f5b05f89..8afe9dad41 100644
--- a/lib/iris/coords.py
+++ b/lib/iris/coords.py
@@ -2115,22 +2115,39 @@ def collapsed(self, dims_to_collapse=None):
if np.issubdtype(self.dtype, np.str_):
# Collapse the coordinate by serializing the points and
# bounds as strings.
- def serialize(x):
- return "|".join([str(i) for i in x.flatten()])
+ def serialize(x, axis):
+ if axis is None:
+ return "|".join(str(i) for i in x.flatten())
+
+ # np.apply_along_axis combined with str.join will truncate strings in
+ # some cases (https://github.com/numpy/numpy/issues/8352), so we need to
+ # loop through the array directly. First move (possibly multiple) axis
+ # of interest to trailing dim(s), then make a 2D array we can loop
+ # through.
+ work_array = np.moveaxis(x, axis, range(-len(axis), 0))
+ out_shape = work_array.shape[: -len(axis)]
+ work_array = work_array.reshape(np.prod(out_shape, dtype=int), -1)
+
+ joined = []
+ for arr_slice in work_array:
+ joined.append(serialize(arr_slice, None))
+
+ return np.array(joined).reshape(out_shape)
bounds = None
if self.has_bounds():
- shape = self._bounds_dm.shape[1:]
- bounds = []
- for index in np.ndindex(shape):
- index_slice = (slice(None),) + tuple(index)
- bounds.append(serialize(self.bounds[index_slice]))
- dtype = np.dtype("U{}".format(max(map(len, bounds))))
- bounds = np.array(bounds, dtype=dtype).reshape((1,) + shape)
- points = serialize(self.points)
- dtype = np.dtype("U{}".format(len(points)))
+ # Express dims_to_collapse as non-negative integers.
+ if dims_to_collapse is None:
+ dims_to_collapse = range(self.ndim)
+ else:
+ dims_to_collapse = tuple(
+ dim % self.ndim for dim in dims_to_collapse
+ )
+ bounds = serialize(self.bounds, dims_to_collapse)
+
+ points = serialize(self.points, dims_to_collapse)
# Create the new collapsed coordinate.
- coord = self.copy(points=np.array(points, dtype=dtype), bounds=bounds)
+ coord = self.copy(points=np.array(points), bounds=bounds)
else:
# Collapse the coordinate by calculating the bounded extremes.
if self.ndim > 1:
diff --git a/lib/iris/cube.py b/lib/iris/cube.py
index 54e086937d..bc90443a51 100644
--- a/lib/iris/cube.py
+++ b/lib/iris/cube.py
@@ -92,8 +92,8 @@ def from_cubes(cubes, constraints=None):
constraints = iris._constraints.list_of_constraints(constraints)
pairs = [_CubeFilter(constraint) for constraint in constraints]
collection = _CubeFilterCollection(pairs)
- for cube in cubes:
- collection.add_cube(cube)
+ for c in cubes:
+ collection.add_cube(c)
return collection
def __init__(self, pairs):
@@ -132,8 +132,8 @@ def __init__(self, *args, **kwargs):
# Do whatever a list does, to initialise ourself "as a list"
super().__init__(*args, **kwargs)
# Check that all items in the list are cubes.
- for cube in self:
- self._assert_is_cube(cube)
+ for c in self:
+ self._assert_is_cube(c)
def __str__(self):
"""Run short :meth:`Cube.summary` on every cube."""
@@ -308,9 +308,9 @@ def _extract_and_merge(cubes, constraints, strict=False, return_single_cube=Fals
constraint_groups = dict(
[(constraint, CubeList()) for constraint in constraints]
)
- for cube in cubes:
+ for c in cubes:
for constraint, cube_list in constraint_groups.items():
- sub_cube = constraint.extract(cube)
+ sub_cube = constraint.extract(c)
if sub_cube is not None:
cube_list.append(sub_cube)
@@ -394,8 +394,8 @@ def merge_cube(self):
# Register each of our cubes with a single ProtoCube.
proto_cube = iris._merge.ProtoCube(self[0])
- for cube in self[1:]:
- proto_cube.register(cube, error_on_mismatch=True)
+ for c in self[1:]:
+ proto_cube.register(c, error_on_mismatch=True)
# Extract the merged cube from the ProtoCube.
(merged_cube,) = proto_cube.merge()
@@ -471,18 +471,18 @@ def merge(self, unique=True):
"""
# Register each of our cubes with its appropriate ProtoCube.
proto_cubes_by_name = {}
- for cube in self:
- name = cube.standard_name
+ for c in self:
+ name = c.standard_name
proto_cubes = proto_cubes_by_name.setdefault(name, [])
proto_cube = None
for target_proto_cube in proto_cubes:
- if target_proto_cube.register(cube):
+ if target_proto_cube.register(c):
proto_cube = target_proto_cube
break
if proto_cube is None:
- proto_cube = iris._merge.ProtoCube(cube)
+ proto_cube = iris._merge.ProtoCube(c)
proto_cubes.append(proto_cube)
# Emulate Python 2 behaviour.
@@ -3175,8 +3175,33 @@ def create_coords(src_coords, add_coord):
add_coord(result_coord, dims)
coord_mapping[id(src_coord)] = result_coord
+ def create_metadata(src_metadatas, add_metadata, get_metadata):
+ for src_metadata in src_metadatas:
+ dims = src_metadata.cube_dims(self)
+ if dim in dims:
+ dim_within_coord = dims.index(dim)
+ data = np.concatenate(
+ [
+ get_metadata(chunk, src_metadata.name()).core_data()
+ for chunk in chunks
+ ],
+ dim_within_coord,
+ )
+ result_coord = src_metadata.copy(values=data)
+ else:
+ result_coord = src_metadata.copy()
+ add_metadata(result_coord, dims)
+
create_coords(self.dim_coords, result.add_dim_coord)
create_coords(self.aux_coords, result.add_aux_coord)
+ create_metadata(
+ self.cell_measures(), result.add_cell_measure, Cube.cell_measure
+ )
+ create_metadata(
+ self.ancillary_variables(),
+ result.add_ancillary_variable,
+ Cube.ancillary_variable,
+ )
for factory in self.aux_factories:
result.add_aux_factory(factory.updated(coord_mapping))
return result
diff --git a/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata.cml b/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata.cml
index f1f37e23b9..54d1f2311b 100644
--- a/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata.cml
+++ b/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata.cml
@@ -62,6 +62,32 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata_wrapped.cml b/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata_wrapped.cml
index 48f0fa1aaa..4b2e03ad30 100644
--- a/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata_wrapped.cml
+++ b/lib/iris/tests/results/unit/cube/Cube/intersection__Metadata/metadata_wrapped.cml
@@ -65,6 +65,32 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py
index c63261f95c..97429f58f8 100644
--- a/lib/iris/tests/unit/coords/test_Coord.py
+++ b/lib/iris/tests/unit/coords/test_Coord.py
@@ -16,6 +16,7 @@
import cf_units
import dask.array as da
import numpy as np
+import numpy.ma as ma
import pytest
import iris
@@ -701,6 +702,112 @@ def test_lazy_3_bounds(self):
self.assertArrayAlmostEqual(collapsed_coord.points, da.array([2.0]))
self.assertArrayAlmostEqual(collapsed_coord.bounds, da.array([[0.0, 4.0]]))
+ def test_string_masked(self):
+ points = ma.array(["foo", "bar", "bing"], mask=[0, 1, 0], dtype=str)
+ coord = AuxCoord(points)
+
+ collapsed_coord = coord.collapsed(0)
+
+ expected = "foo|--|bing"
+ self.assertEqual(collapsed_coord.points, expected)
+
+ def test_string_nd_first(self):
+ self.setupTestArrays((3, 4))
+ coord = AuxCoord(self.pts_real.astype(str))
+
+ collapsed_coord = coord.collapsed(0)
+ expected = [
+ "0.0|40.0|80.0",
+ "10.0|50.0|90.0",
+ "20.0|60.0|100.0",
+ "30.0|70.0|110.0",
+ ]
+
+ self.assertArrayEqual(collapsed_coord.points, expected)
+
+ def test_string_nd_second(self):
+ self.setupTestArrays((3, 4))
+ coord = AuxCoord(self.pts_real.astype(str))
+
+ collapsed_coord = coord.collapsed(1)
+ expected = [
+ "0.0|10.0|20.0|30.0",
+ "40.0|50.0|60.0|70.0",
+ "80.0|90.0|100.0|110.0",
+ ]
+
+ self.assertArrayEqual(collapsed_coord.points, expected)
+
+ def test_string_nd_both(self):
+ self.setupTestArrays((3, 4))
+ coord = AuxCoord(self.pts_real.astype(str))
+
+ collapsed_coord = coord.collapsed()
+ expected = ["0.0|10.0|20.0|30.0|40.0|50.0|60.0|70.0|80.0|90.0|100.0|110.0"]
+
+ self.assertArrayEqual(collapsed_coord.points, expected)
+
+ def test_string_nd_bounds_first(self):
+ self.setupTestArrays((3, 4))
+ coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str))
+
+ collapsed_coord = coord.collapsed(0)
+
+ # Points handling is as for non bounded case. So just check bounds.
+ expected_lower = [
+ "-2.0|38.0|78.0",
+ "8.0|48.0|88.0",
+ "18.0|58.0|98.0",
+ "28.0|68.0|108.0",
+ ]
+
+ expected_upper = [
+ "2.0|42.0|82.0",
+ "12.0|52.0|92.0",
+ "22.0|62.0|102.0",
+ "32.0|72.0|112.0",
+ ]
+
+ self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower)
+ self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper)
+
+ def test_string_nd_bounds_second(self):
+ self.setupTestArrays((3, 4))
+ coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str))
+
+ collapsed_coord = coord.collapsed(1)
+
+ # Points handling is as for non bounded case. So just check bounds.
+ expected_lower = [
+ "-2.0|8.0|18.0|28.0",
+ "38.0|48.0|58.0|68.0",
+ "78.0|88.0|98.0|108.0",
+ ]
+
+ expected_upper = [
+ "2.0|12.0|22.0|32.0",
+ "42.0|52.0|62.0|72.0",
+ "82.0|92.0|102.0|112.0",
+ ]
+
+ self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower)
+ self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper)
+
+ def test_string_nd_bounds_both(self):
+ self.setupTestArrays((3, 4))
+ coord = AuxCoord(self.pts_real.astype(str), bounds=self.bds_real.astype(str))
+
+ collapsed_coord = coord.collapsed()
+
+ # Points handling is as for non bounded case. So just check bounds.
+ expected_lower = ["-2.0|8.0|18.0|28.0|38.0|48.0|58.0|68.0|78.0|88.0|98.0|108.0"]
+ expected_upper = [
+ "2.0|12.0|22.0|32.0|42.0|52.0|62.0|72.0|82.0|92.0|102.0|112.0"
+ ]
+
+ self.assertArrayEqual(collapsed_coord.bounds[:, 0], expected_lower)
+ self.assertArrayEqual(collapsed_coord.bounds[:, 1], expected_upper)
+
class Test_is_compatible(tests.IrisTest):
def setUp(self):
diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py
index 878a793448..8c36240fb6 100644
--- a/lib/iris/tests/unit/cube/test_Cube.py
+++ b/lib/iris/tests/unit/cube/test_Cube.py
@@ -1207,6 +1207,22 @@ def create_cube(lon_min, lon_max, bounds=False):
),
2,
)
+ cube.add_cell_measure(
+ iris.coords.CellMeasure(
+ np.arange(0, n_lons * 3).reshape(3, -1),
+ "cell_area",
+ units="m2",
+ ),
+ data_dims=[1, 2],
+ )
+ cube.add_ancillary_variable(
+ iris.coords.AncillaryVariable(
+ np.arange(0, n_lons * 3).reshape(3, -1),
+ "land_area_fraction",
+ units="%",
+ ),
+ data_dims=[1, 2],
+ )
if bounds:
cube.coord("longitude").guess_bounds()
cube.add_aux_coord(