QA conformance (#50)

* isort fixes * black fixes * small black changes * Fix filespecs in pre-commit config. * codespell fixes * flake8 fixes * Update flake8. * pydocstyle fixes.
pp-mo · Jan 22, 2024 · 2188bb7 · 2188bb7
1 parent 654f8f6
commit 2188bb7
Show file tree

Hide file tree

Showing 24 changed files with 161 additions and 150 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,11 +3,9 @@
 
 files: |
     (?x)(
-        noxfile\.py|
-        setup\.py|
         docs\/.+\.py|
         lib\/.+\.py|
-        benchmarks\/.+\.py
+        tests\/.+\.py
     )
 minimum_pre_commit_version: 1.21.0
 
@@ -43,7 +41,7 @@ repos:
         args: [--config=./pyproject.toml, .]
 
 -   repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.0.0
     hooks:
     -   id: flake8
         types: [file, python]

diff --git a/README.md b/README.md
@@ -46,15 +46,15 @@ For example:
 from ncdata.iris_xarray import cubes_to_xarray, cubes_from_xarray
 
 # Apply Iris regridder to xarray data
-dataset = xarray.open_dataset('file1.nc', chunks='auto')
-cube, = cubes_from_xarray(dataset)
+dataset = xarray.open_dataset("file1.nc", chunks="auto")
+(cube,) = cubes_from_xarray(dataset)
 cube2 = cube.regrid(grid_cube, iris.analysis.PointInCell)
 dataset2 = cubes_to_xarray(cube2)
 
 # Apply Xarray statistic to Iris data
-cubes = iris.load('file1.nc')
+cubes = iris.load("file1.nc")
 dataset = cubes_to_xarray(cubes)
-dataset2 = dataset.group_by('time.dayofyear').argmin()
+dataset2 = dataset.group_by("time.dayofyear").argmin()
 cubes2 = cubes_from_xarray(dataset2)
 ``` 
   * data conversion is equivalent to writing to a file with one library, and reading it
@@ -82,26 +82,20 @@ from ncdata.iris import to_iris
 from ncdata.netcdf4 import to_nc4, from_nc4
 
 # Rename a dimension in xarray output
-dataset = xr.open_dataset('file1.nc')
+dataset = xr.open_dataset("file1.nc")
 xr_ncdata = from_xarray(dataset)
-dim = xr_ncdata.dimensions.pop('dim0')
-dim.name = 'newdim'
-xr_ncdata.dimensions['newdim'] = dim
+dim = xr_ncdata.dimensions.pop("dim0")
+dim.name = "newdim"
+xr_ncdata.dimensions["newdim"] = dim
 for var in xr_ncdata.variables.values():
-    var.dimensions = [
-        'newdim' if dim == 'dim0' else dim
-        for dim in var.dimensions
-    ]
-to_nc4(ncdata, 'file_2a.nc')
+    var.dimensions = ["newdim" if dim == "dim0" else dim for dim in var.dimensions]
+to_nc4(ncdata, "file_2a.nc")
 
 # Fix chunking in Iris input
-ncdata = from_nc4('file1.nc')
+ncdata = from_nc4("file1.nc")
 for var in ncdata.variables:
     # custom chunking() mimics the file chunks we want
-    var.chunking = lambda: (
-        100.e6 if dim == 'dim0' else -1
-        for dim in var.dimensions
-    )
+    var.chunking = lambda: (100.0e6 if dim == "dim0" else -1 for dim in var.dimensions)
 cubes = to_iris(ncdata)
 ``` 
 

diff --git a/lib/ncdata/dataset_like.py b/lib/ncdata/dataset_like.py
@@ -1,5 +1,8 @@
 r"""
-An adaptor layer allowing an :class:`~ncdata.NcData` to masquerade as a :class:`netCDF4.Dataset` object.
+An adaptor layer making a NcData appear like a :class:`netCDF4.Dataset`.
+
+Allows an :class:`~ncdata.NcData` to masquerade as a
+:class:`netCDF4.Dataset` object.
 
 Note:
     This is a low-level interface, exposed publicly for extended experimental uses.
@@ -96,6 +99,7 @@ class Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
     file_format = "NETCDF4"
 
     def __init__(self, ncdata: NcData = None):
+        """Create an    Nc4DatasetLike, wrapping an NcData."""
         if ncdata is None:
             ncdata = NcData()  # an empty dataset
         #: the contained dataset.  If not provided, a new, empty dataset is created.

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -13,12 +13,14 @@ class MonitoredArray:
     """
 
     def __init__(self, data):
+        """Create a MonitoredArray, containing a numpy array."""
         self.dtype = data.dtype
         self.shape = data.shape
         self.ndim = data.ndim
         self._data = data
         self._accesses = []
 
     def __getitem__(self, keys):
+        """Fetch indexed data section."""
         self._accesses.append(keys)
         return self._data[keys]
diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py
@@ -108,9 +108,9 @@ def _compare_name_lists(
 
 def _isncdata(obj):
     """
-    A crude test to distinguish NcData objects from similar netCDF4 ones.
+    Distinguish NcData objects from similar netCDF4 ones.
 
-    Used to support comparisons on either type of data.
+    A crude test, used to support comparisons on either type of data.
     """
     return hasattr(obj, "_print_content")
 

diff --git a/tests/data_testcase_schemas.py b/tests/data_testcase_schemas.py
@@ -9,14 +9,11 @@
 specs.  This enables us to perform various translation tests on standard testfiles from
 the Iris and Xarray test suites.
 """
-import re
-import shutil
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, Iterable, Tuple, Union
+from typing import Dict, Union
 
 import iris.tests
-import netCDF4
 import netCDF4 as nc
 import numpy as np
 import pytest
@@ -88,7 +85,6 @@ def _write_nc4_dataset(
     Separated for recursion and to keep dataset open/close in separate wrapper routine.
     """
 
-    # Convenience function
     def objmap(objs):
         """Create a map from a list of dim-specs, indexing by their names."""
         return {obj["name"]: obj for obj in objs}
@@ -170,10 +166,11 @@ def objmap(objs):
 
 def make_testcase_dataset(filepath, spec):
     """
-    Generic routine for converting a test dataset 'spec' into an actual netcdf file.
+    Convert a test dataset 'spec' into an actual netcdf file.
 
-    Rather frustratingly similar to ncdata.to_nc4, but it needs to remain separate as
-    we use it to test that code (!)
+    A generic routine interpreting "specs" provided as dictionaries.
+    This is rather frustratingly similar to ncdata.to_nc4, but it needs to remain
+    separate as we use it to test that code (!)
 
     specs are just a structure of dicts and lists...
     group_spec = {
@@ -302,6 +299,12 @@ def make_testcase_dataset(filepath, spec):
 # It automatically **calls** the wrapped function, and adds all the results into the
 # global "_Standard_Testcases" dictionary.
 def standard_testcases_func(func):
+    """
+    Include the results of the wrapped function in the 'standard testcases'.
+
+    A decorator for spec-generating routines.  It automatically **calls** the wrapped
+    function, and adds the results into the global "_Standard_Testcases" dictionary.
+    """
     global _Standard_Testcases
     _Standard_Testcases.update(func())
 
@@ -371,7 +374,7 @@ def _define_iris_testdata_testcases():
 def _define_unit_singleitem_testcases():
     testcases = {}
     if ADD_UNIT_TESTS:
-        # Add selected targetted test datasets.
+        # Add selected targeted test datasets.
 
         # dataset with a single attribute
         testcases["ds__singleattr"] = {"attrs": {"attr1": 1}}
@@ -449,12 +452,15 @@ def _define_unit_dtype_testcases():
 
 @pytest.fixture(scope="session")
 def session_testdir(tmp_path_factory):
+    """Provide a common temporary-files directory path."""
     tmp_dir = tmp_path_factory.mktemp("standard_schema_testfiles")
     return tmp_dir
 
 
 @dataclass
 class TestcaseSchema:
+    """The type of information object returned by the "standard testcase" fixture."""
+
     name: str = ""
     spec: dict = None
     filepath: Path = None
@@ -463,7 +469,9 @@ class TestcaseSchema:
 @pytest.fixture(params=list(_Standard_Testcases.keys()))
 def standard_testcase(request, session_testdir):
     """
-    A fixture which iterates over a set of "standard" dataset testcases.
+    Provide a set of "standard" dataset testcases.
+
+    A fixture returning a parameterised sequence of TestCaseSchema objects.
 
     Some of these are based on a 'testcase spec', from which it builds an actual netcdf
     testfile : these files are created in a temporary directory provided by pytest

diff --git a/tests/integration/example_scripts/ex_dataset_print.py b/tests/integration/example_scripts/ex_dataset_print.py
@@ -1,6 +1,4 @@
 """Temporary integrational proof-of-concept example for dataset printout."""
-from pathlib import Path
-
 import iris
 
 import ncdata.iris as nci

diff --git a/tests/integration/example_scripts/ex_iris_saveto_ncdata.py b/tests/integration/example_scripts/ex_iris_saveto_ncdata.py
@@ -10,6 +10,7 @@
 
 
 def example_ncdata_from_iris():
+    """Demonstrate loading from iris and printing the NcData object."""
     print("")
     print("==============")
     print("TEMPORARY: iris save-to-ncdata test")

diff --git a/tests/integration/example_scripts/ex_iris_xarray_conversion.py b/tests/integration/example_scripts/ex_iris_xarray_conversion.py
@@ -3,8 +3,8 @@
 
 Showing conversion from Xarray to Iris, and back again.
 """
-import iris
 import dask.array as da
+import iris
 import numpy as np
 import xarray as xr
 

diff --git a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py
@@ -17,7 +17,7 @@
 
 
 def example_nc4_load_save_roundtrip():  # noqa: D103
-    # Load an existing file, save-netcdf4 : check same (with Iris for now)
+    """Load an existing file ; save to netcdf4 ; check they are the same."""
     print("\n----\nNetcdf4 load-save example.")
 
     filepath = testdata_dir / "toa_brightness_temperature.nc"
@@ -40,7 +40,7 @@ def example_nc4_load_save_roundtrip():  # noqa: D103
 
 
 def example_nc4_save_reload_unlimited_roundtrip():
-    # Create arbitrary ncdata, save to netcdf4, re-load and check.
+    """Create arbitrary ncdata ; save to netcdf4 ; re-load and check similarities."""
     print("\n----\nNetcdf4 save-load example.")
 
     ncdata = NcData()

diff --git a/tests/integration/roundtrips_utils.py b/tests/integration/roundtrips_utils.py
@@ -1,11 +1,17 @@
+"""
+Utility routines for conversion equivalence testing.
+
+Used by routines in tests/integration which attempt to show that conversions between
+ncdata and other types of data preserve information.
+"""
 import dask.array as da
 import numpy as np
 import pytest
 
 
 def cubes_equal__corrected(c1, c2):
     """
-    A special cube equality test which works around some equality problems.
+    Perform a cube equality test, working around some specific equality problems.
 
     (1) If cubes contain string (char) data, replace them with booleans which duplicate
     the correct pointwise equivalence.
@@ -107,10 +113,14 @@ def set_tiny_chunks(on, size_spec="20Kib"):
     _USE_TINY_CHUNKS = on
 
 
-# this fixture can be referenced by anything, and will make all chunks small for that
-# item, if enabled via the global setting.
 @pytest.fixture
 def adjust_chunks():
+    """
+    Enable use of "tiny chunks", if enabled.
+
+    This fixture can be referenced by any test class or function, and will make all
+    chunks small for that item, if enabled via the global setting.
+    """
     import dask.config as dcfg
 
     global _USE_TINY_CHUNKS, _CHUNKSIZE_SPEC
@@ -149,6 +159,7 @@ def nanmask_array(array):
 
 
 def nanmask_cube(cube):
+    """Replace all NaNs with masked points, in cube data and coords."""
     cube.data = nanmask_array(cube.core_data())
     for coord in cube.coords():
         coord.points = nanmask_array(coord.core_points())
@@ -160,9 +171,10 @@ def nanmask_cube(cube):
 # Horrible code to list the properties of a netCDF4.Variable object
 #
 import inspect
-from pathlib import Path
-import tempfile
 import shutil
+import tempfile
+from pathlib import Path
+
 import netCDF4 as nc
 
 dirpath = Path(tempfile.mkdtemp())
@@ -179,6 +191,12 @@ def nanmask_cube(cube):
 
 
 def prune_attrs_varproperties(attrs):
+    """
+    Remove invalid attributes from a attributes dictionary.
+
+    Invalid attributes are any whose names match an attribute of a netCDF.Variable.
+    Any such attributes are deleted, and a set of all names removed is returned.
+    """
     names = set()
     for propname in _NCVAR_PROPERTY_NAMES:
         if propname in attrs:
@@ -188,6 +206,11 @@ def prune_attrs_varproperties(attrs):
 
 
 def prune_cube_varproperties(cube_or_cubes):
+    """
+    Remove invalid attributes from a cube or cubes.
+
+    A set of all names of removed attributes is returned.
+    """
     if hasattr(cube_or_cubes, "add_aux_coord"):
         cube_or_cubes = [cube_or_cubes]
 
@@ -207,17 +230,27 @@ def prune_cube_varproperties(cube_or_cubes):
 
 #
 # Remove any "no-units" units, as these are not SAVED correctly.
-# (see
+# See : https://github.com/SciTools/iris/issues/5368
 #
 import cf_units
 
 
 def remove_element_nounits(obj):
+    """
+    Remove an Iris 'no-unit' unit value.
+
+    We replace 'no-unit' with 'unknown unit', since Iris save-and-load confuses them.
+    """
     if obj.units == cf_units._NO_UNIT_STRING:
         obj.units = None
 
 
 def remove_cube_nounits(cube_or_cubes):
+    """
+    Remove any 'no-units' from a cube or cubes.
+
+    Also from all cube components with a unit, i.e. _DimensionalMetadata components.
+    """
     if hasattr(cube_or_cubes, "add_aux_coord"):
         cube_or_cubes = [cube_or_cubes]
 
@@ -240,4 +273,9 @@ def _cube_metadata_key(cube):
 
 
 def namesort_cubes(cubes):
+    """
+    Sort an iterable of cubes into name order.
+
+    Ordering is by the (name(), long_name, var_name) tuple.
+    """
     return sorted(cubes, key=_cube_metadata_key)
diff --git a/tests/integration/test_roundtrips_iris.py b/tests/integration/test_roundtrips_iris.py
@@ -6,12 +6,8 @@
 (2) check equivalence of files : iris -> file VS iris->ncdata->file
 """
 from subprocess import check_output
-from unittest import mock
 
-import dask.array as da
 import iris
-import iris.fileformats.netcdf._thread_safe_nc as iris_threadsafe
-import numpy as np
 import pytest
 
 from ncdata.netcdf4 import from_nc4, to_nc4