diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index ee7c9c7..b764cfe 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -11,6 +11,7 @@ import pytest import numpy as np +import netCDF4 import mule import mule.ff @@ -1231,3 +1232,36 @@ def test_fix_forecast_reference_time_proleptic_gregorian(forecast_cube, time_coord): msg = "Is time.units.calendar == 'proleptic_gregorian' branch & testing required?" raise NotImplementedError(msg) + + +@pytest.mark.parametrize( + "cube_data, expected_fill_val", + [ + (np.array([1.1, 2.1], dtype="float32"), + np.float32(um2nc.DEFAULT_FILL_VAL_FLOAT)), + (np.array([1.1, 2.1], dtype="float64"), + np.float64(um2nc.DEFAULT_FILL_VAL_FLOAT)), + (np.array([1.1, 2.1], dtype="complex64"), + np.complex64(netCDF4.default_fillvals["c8"])), + (np.array([1, 2], dtype="int32"), + np.int32(netCDF4.default_fillvals["i4"])), + (np.array([1, 2], dtype="int64"), + np.int64(netCDF4.default_fillvals["i8"])) + ] +) +def test_fix_fill_value_defaults(cube_data, expected_fill_val): + """ + Check that correct default fill values are found based + on a cube's data's type. + """ + fake_cube = DummyCube(12345, "fake_var") + fake_cube.data = cube_data + + fill_value = um2nc.fix_fill_value(fake_cube) + + assert fill_value == expected_fill_val + # Check new fill value type matches cube's data's type + assert fill_value.dtype == cube_data.dtype + + # Check that missing value attribute set to expected fill_value + assert fake_cube.attributes["missing_value"][0] == expected_fill_val diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index f0c9cca..4028403 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -22,7 +22,7 @@ import numpy as np import cftime import cf_units -from netCDF4 import default_fillvals +import netCDF4 import iris.util import iris.exceptions @@ -83,6 +83,8 @@ FORECAST_PERIOD = "forecast_period" TIME = "time" +DEFAULT_FILL_VAL_FLOAT = 1.e20 + class PostProcessingError(Exception): """Generic class for um2nc specific errors.""" @@ -304,6 +306,70 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon): fix_lon_coord_name(longitude_coordinate, grid_type, dlon) +def get_default_fill_value(cube): + """ + Get the default fill value for a cube's data's type. + + Parameters + ---------- + cube: Iris cube object. + + Returns + ------- + fill_value: numpy scalar with type matching cube.data + """ + if cube.data.dtype.kind == 'f': + fill_value = DEFAULT_FILL_VAL_FLOAT + + else: + fill_value = netCDF4.default_fillvals[ + f"{cube.data.dtype.kind:s}{cube.data.dtype.itemsize:1d}" + ] + + # NB: the `_FillValue` attribute appears to be converted to match the + # cube data's type externally (likely in the netCDF4 library). It's not + # strictly necessary to do the conversion here. However, we need + # the separate `missing_value` attribute to have the correct type + # and so it's cleaner to set the type for both here. + + # NB: The following type conversion approach is similar to the netCDF4 + # package: + # https://github.com/Unidata/netcdf4-python/blob/5ccb3bb67ebf2cc803744707ff654b17ac506d99/src/netCDF4/_netCDF4.pyx#L4413 + # Update if a cleaner method is found. + return np.array([fill_value], dtype=cube.data.dtype)[0] + + +def fix_fill_value(cube, custom_fill_value=None): + """ + Set a cube's missing_value attribute and return the value + for later use. + + Parameters + ---------- + cube: Iris cube object (modified in place). + custom_fill_value: Fill value to use in place of + defaults (not implemented). + + Returns + ------- + fill_value: Fill value to use when writing to netCDF. + """ + if custom_fill_value is not None: + msg = "Custom fill values are not currently implemented." + raise NotImplementedError(msg) + + # NB: If custom fill values are added, we should check that + # their type matches the cube.data.dtype + + fill_value = get_default_fill_value(cube) + + # TODO: Is placing the fill value in an array still necessary, + # given the type conversion in get_default_fill_value() + cube.attributes['missing_value'] = np.array([fill_value], + cube.data.dtype) + return fill_value + + # TODO: split cube ops into functions, this will likely increase process() workflow steps def cubewrite(cube, sman, compression, use64bit, verbose): # TODO: move into process() AND if a new cube is returned, swap into filtered cube list @@ -313,15 +379,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose): if not use64bit: convert_32_bit(cube) - # Set the missing_value attribute. Use an array to force the type to match - # the data type - if cube.data.dtype.kind == 'f': - fill_value = 1.e20 - else: - # Use netCDF defaults - fill_value = default_fillvals['%s%1d' % (cube.data.dtype.kind, cube.data.dtype.itemsize)] - - cube.attributes['missing_value'] = np.array([fill_value], cube.data.dtype) + fill_value = fix_fill_value(cube) fix_forecast_reference_time(cube)