Skip to content

Commit

Permalink
QA conformance (#50)
Browse files Browse the repository at this point in the history
* isort fixes

* black fixes

* small black changes

* Fix filespecs in pre-commit config.

* codespell fixes

* flake8 fixes

* Update flake8.

* pydocstyle fixes.
  • Loading branch information
pp-mo authored Jan 22, 2024
1 parent 654f8f6 commit 2188bb7
Show file tree
Hide file tree
Showing 24 changed files with 161 additions and 150 deletions.
6 changes: 2 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@

files: |
(?x)(
noxfile\.py|
setup\.py|
docs\/.+\.py|
lib\/.+\.py|
benchmarks\/.+\.py
tests\/.+\.py
)
minimum_pre_commit_version: 1.21.0

Expand Down Expand Up @@ -43,7 +41,7 @@ repos:
args: [--config=./pyproject.toml, .]

- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 7.0.0
hooks:
- id: flake8
types: [file, python]
Expand Down
30 changes: 12 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ For example:
from ncdata.iris_xarray import cubes_to_xarray, cubes_from_xarray

# Apply Iris regridder to xarray data
dataset = xarray.open_dataset('file1.nc', chunks='auto')
cube, = cubes_from_xarray(dataset)
dataset = xarray.open_dataset("file1.nc", chunks="auto")
(cube,) = cubes_from_xarray(dataset)
cube2 = cube.regrid(grid_cube, iris.analysis.PointInCell)
dataset2 = cubes_to_xarray(cube2)

# Apply Xarray statistic to Iris data
cubes = iris.load('file1.nc')
cubes = iris.load("file1.nc")
dataset = cubes_to_xarray(cubes)
dataset2 = dataset.group_by('time.dayofyear').argmin()
dataset2 = dataset.group_by("time.dayofyear").argmin()
cubes2 = cubes_from_xarray(dataset2)
```
* data conversion is equivalent to writing to a file with one library, and reading it
Expand Down Expand Up @@ -82,26 +82,20 @@ from ncdata.iris import to_iris
from ncdata.netcdf4 import to_nc4, from_nc4

# Rename a dimension in xarray output
dataset = xr.open_dataset('file1.nc')
dataset = xr.open_dataset("file1.nc")
xr_ncdata = from_xarray(dataset)
dim = xr_ncdata.dimensions.pop('dim0')
dim.name = 'newdim'
xr_ncdata.dimensions['newdim'] = dim
dim = xr_ncdata.dimensions.pop("dim0")
dim.name = "newdim"
xr_ncdata.dimensions["newdim"] = dim
for var in xr_ncdata.variables.values():
var.dimensions = [
'newdim' if dim == 'dim0' else dim
for dim in var.dimensions
]
to_nc4(ncdata, 'file_2a.nc')
var.dimensions = ["newdim" if dim == "dim0" else dim for dim in var.dimensions]
to_nc4(ncdata, "file_2a.nc")

# Fix chunking in Iris input
ncdata = from_nc4('file1.nc')
ncdata = from_nc4("file1.nc")
for var in ncdata.variables:
# custom chunking() mimics the file chunks we want
var.chunking = lambda: (
100.e6 if dim == 'dim0' else -1
for dim in var.dimensions
)
var.chunking = lambda: (100.0e6 if dim == "dim0" else -1 for dim in var.dimensions)
cubes = to_iris(ncdata)
```

Expand Down
6 changes: 5 additions & 1 deletion lib/ncdata/dataset_like.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
r"""
An adaptor layer allowing an :class:`~ncdata.NcData` to masquerade as a :class:`netCDF4.Dataset` object.
An adaptor layer making a NcData appear like a :class:`netCDF4.Dataset`.
Allows an :class:`~ncdata.NcData` to masquerade as a
:class:`netCDF4.Dataset` object.
Note:
This is a low-level interface, exposed publicly for extended experimental uses.
Expand Down Expand Up @@ -96,6 +99,7 @@ class Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
file_format = "NETCDF4"

def __init__(self, ncdata: NcData = None):
"""Create an Nc4DatasetLike, wrapping an NcData."""
if ncdata is None:
ncdata = NcData() # an empty dataset
#: the contained dataset. If not provided, a new, empty dataset is created.
Expand Down
2 changes: 2 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ class MonitoredArray:
"""

def __init__(self, data):
"""Create a MonitoredArray, containing a numpy array."""
self.dtype = data.dtype
self.shape = data.shape
self.ndim = data.ndim
self._data = data
self._accesses = []

def __getitem__(self, keys):
"""Fetch indexed data section."""
self._accesses.append(keys)
return self._data[keys]
4 changes: 2 additions & 2 deletions tests/_compare_nc_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ def _compare_name_lists(

def _isncdata(obj):
"""
A crude test to distinguish NcData objects from similar netCDF4 ones.
Distinguish NcData objects from similar netCDF4 ones.
Used to support comparisons on either type of data.
A crude test, used to support comparisons on either type of data.
"""
return hasattr(obj, "_print_content")

Expand Down
28 changes: 18 additions & 10 deletions tests/data_testcase_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@
specs. This enables us to perform various translation tests on standard testfiles from
the Iris and Xarray test suites.
"""
import re
import shutil
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, Tuple, Union
from typing import Dict, Union

import iris.tests
import netCDF4
import netCDF4 as nc
import numpy as np
import pytest
Expand Down Expand Up @@ -88,7 +85,6 @@ def _write_nc4_dataset(
Separated for recursion and to keep dataset open/close in separate wrapper routine.
"""

# Convenience function
def objmap(objs):
"""Create a map from a list of dim-specs, indexing by their names."""
return {obj["name"]: obj for obj in objs}
Expand Down Expand Up @@ -170,10 +166,11 @@ def objmap(objs):

def make_testcase_dataset(filepath, spec):
"""
Generic routine for converting a test dataset 'spec' into an actual netcdf file.
Convert a test dataset 'spec' into an actual netcdf file.
Rather frustratingly similar to ncdata.to_nc4, but it needs to remain separate as
we use it to test that code (!)
A generic routine interpreting "specs" provided as dictionaries.
This is rather frustratingly similar to ncdata.to_nc4, but it needs to remain
separate as we use it to test that code (!)
specs are just a structure of dicts and lists...
group_spec = {
Expand Down Expand Up @@ -302,6 +299,12 @@ def make_testcase_dataset(filepath, spec):
# It automatically **calls** the wrapped function, and adds all the results into the
# global "_Standard_Testcases" dictionary.
def standard_testcases_func(func):
"""
Include the results of the wrapped function in the 'standard testcases'.
A decorator for spec-generating routines. It automatically **calls** the wrapped
function, and adds the results into the global "_Standard_Testcases" dictionary.
"""
global _Standard_Testcases
_Standard_Testcases.update(func())

Expand Down Expand Up @@ -371,7 +374,7 @@ def _define_iris_testdata_testcases():
def _define_unit_singleitem_testcases():
testcases = {}
if ADD_UNIT_TESTS:
# Add selected targetted test datasets.
# Add selected targeted test datasets.

# dataset with a single attribute
testcases["ds__singleattr"] = {"attrs": {"attr1": 1}}
Expand Down Expand Up @@ -449,12 +452,15 @@ def _define_unit_dtype_testcases():

@pytest.fixture(scope="session")
def session_testdir(tmp_path_factory):
"""Provide a common temporary-files directory path."""
tmp_dir = tmp_path_factory.mktemp("standard_schema_testfiles")
return tmp_dir


@dataclass
class TestcaseSchema:
"""The type of information object returned by the "standard testcase" fixture."""

name: str = ""
spec: dict = None
filepath: Path = None
Expand All @@ -463,7 +469,9 @@ class TestcaseSchema:
@pytest.fixture(params=list(_Standard_Testcases.keys()))
def standard_testcase(request, session_testdir):
"""
A fixture which iterates over a set of "standard" dataset testcases.
Provide a set of "standard" dataset testcases.
A fixture returning a parameterised sequence of TestCaseSchema objects.
Some of these are based on a 'testcase spec', from which it builds an actual netcdf
testfile : these files are created in a temporary directory provided by pytest
Expand Down
2 changes: 0 additions & 2 deletions tests/integration/example_scripts/ex_dataset_print.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""Temporary integrational proof-of-concept example for dataset printout."""
from pathlib import Path

import iris

import ncdata.iris as nci
Expand Down
1 change: 1 addition & 0 deletions tests/integration/example_scripts/ex_iris_saveto_ncdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


def example_ncdata_from_iris():
"""Demonstrate loading from iris and printing the NcData object."""
print("")
print("==============")
print("TEMPORARY: iris save-to-ncdata test")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
Showing conversion from Xarray to Iris, and back again.
"""
import iris
import dask.array as da
import iris
import numpy as np
import xarray as xr

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


def example_nc4_load_save_roundtrip(): # noqa: D103
# Load an existing file, save-netcdf4 : check same (with Iris for now)
"""Load an existing file ; save to netcdf4 ; check they are the same."""
print("\n----\nNetcdf4 load-save example.")

filepath = testdata_dir / "toa_brightness_temperature.nc"
Expand All @@ -40,7 +40,7 @@ def example_nc4_load_save_roundtrip(): # noqa: D103


def example_nc4_save_reload_unlimited_roundtrip():
# Create arbitrary ncdata, save to netcdf4, re-load and check.
"""Create arbitrary ncdata ; save to netcdf4 ; re-load and check similarities."""
print("\n----\nNetcdf4 save-load example.")

ncdata = NcData()
Expand Down
50 changes: 44 additions & 6 deletions tests/integration/roundtrips_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""
Utility routines for conversion equivalence testing.
Used by routines in tests/integration which attempt to show that conversions between
ncdata and other types of data preserve information.
"""
import dask.array as da
import numpy as np
import pytest


def cubes_equal__corrected(c1, c2):
"""
A special cube equality test which works around some equality problems.
Perform a cube equality test, working around some specific equality problems.
(1) If cubes contain string (char) data, replace them with booleans which duplicate
the correct pointwise equivalence.
Expand Down Expand Up @@ -107,10 +113,14 @@ def set_tiny_chunks(on, size_spec="20Kib"):
_USE_TINY_CHUNKS = on


# this fixture can be referenced by anything, and will make all chunks small for that
# item, if enabled via the global setting.
@pytest.fixture
def adjust_chunks():
"""
Enable use of "tiny chunks", if enabled.
This fixture can be referenced by any test class or function, and will make all
chunks small for that item, if enabled via the global setting.
"""
import dask.config as dcfg

global _USE_TINY_CHUNKS, _CHUNKSIZE_SPEC
Expand Down Expand Up @@ -149,6 +159,7 @@ def nanmask_array(array):


def nanmask_cube(cube):
"""Replace all NaNs with masked points, in cube data and coords."""
cube.data = nanmask_array(cube.core_data())
for coord in cube.coords():
coord.points = nanmask_array(coord.core_points())
Expand All @@ -160,9 +171,10 @@ def nanmask_cube(cube):
# Horrible code to list the properties of a netCDF4.Variable object
#
import inspect
from pathlib import Path
import tempfile
import shutil
import tempfile
from pathlib import Path

import netCDF4 as nc

dirpath = Path(tempfile.mkdtemp())
Expand All @@ -179,6 +191,12 @@ def nanmask_cube(cube):


def prune_attrs_varproperties(attrs):
"""
Remove invalid attributes from a attributes dictionary.
Invalid attributes are any whose names match an attribute of a netCDF.Variable.
Any such attributes are deleted, and a set of all names removed is returned.
"""
names = set()
for propname in _NCVAR_PROPERTY_NAMES:
if propname in attrs:
Expand All @@ -188,6 +206,11 @@ def prune_attrs_varproperties(attrs):


def prune_cube_varproperties(cube_or_cubes):
"""
Remove invalid attributes from a cube or cubes.
A set of all names of removed attributes is returned.
"""
if hasattr(cube_or_cubes, "add_aux_coord"):
cube_or_cubes = [cube_or_cubes]

Expand All @@ -207,17 +230,27 @@ def prune_cube_varproperties(cube_or_cubes):

#
# Remove any "no-units" units, as these are not SAVED correctly.
# (see
# See : https://github.com/SciTools/iris/issues/5368
#
import cf_units


def remove_element_nounits(obj):
"""
Remove an Iris 'no-unit' unit value.
We replace 'no-unit' with 'unknown unit', since Iris save-and-load confuses them.
"""
if obj.units == cf_units._NO_UNIT_STRING:
obj.units = None


def remove_cube_nounits(cube_or_cubes):
"""
Remove any 'no-units' from a cube or cubes.
Also from all cube components with a unit, i.e. _DimensionalMetadata components.
"""
if hasattr(cube_or_cubes, "add_aux_coord"):
cube_or_cubes = [cube_or_cubes]

Expand All @@ -240,4 +273,9 @@ def _cube_metadata_key(cube):


def namesort_cubes(cubes):
"""
Sort an iterable of cubes into name order.
Ordering is by the (name(), long_name, var_name) tuple.
"""
return sorted(cubes, key=_cube_metadata_key)
4 changes: 0 additions & 4 deletions tests/integration/test_roundtrips_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,8 @@
(2) check equivalence of files : iris -> file VS iris->ncdata->file
"""
from subprocess import check_output
from unittest import mock

import dask.array as da
import iris
import iris.fileformats.netcdf._thread_safe_nc as iris_threadsafe
import numpy as np
import pytest

from ncdata.netcdf4 import from_nc4, to_nc4
Expand Down
Loading

0 comments on commit 2188bb7

Please sign in to comment.