Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract fill value fixes from cubewrite() #147

Merged
merged 1 commit into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions test/test_um2netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pytest
import numpy as np
import netCDF4

import mule
import mule.ff
Expand Down Expand Up @@ -1231,3 +1232,36 @@ def test_fix_forecast_reference_time_proleptic_gregorian(forecast_cube,
time_coord):
msg = "Is time.units.calendar == 'proleptic_gregorian' branch & testing required?"
raise NotImplementedError(msg)


@pytest.mark.parametrize(
"cube_data, expected_fill_val",
[
(np.array([1.1, 2.1], dtype="float32"),
np.float32(um2nc.DEFAULT_FILL_VAL_FLOAT)),
(np.array([1.1, 2.1], dtype="float64"),
np.float64(um2nc.DEFAULT_FILL_VAL_FLOAT)),
(np.array([1.1, 2.1], dtype="complex64"),
np.complex64(netCDF4.default_fillvals["c8"])),
(np.array([1, 2], dtype="int32"),
np.int32(netCDF4.default_fillvals["i4"])),
(np.array([1, 2], dtype="int64"),
np.int64(netCDF4.default_fillvals["i8"]))
]
)
def test_fix_fill_value_defaults(cube_data, expected_fill_val):
"""
Check that correct default fill values are found based
on a cube's data's type.
"""
fake_cube = DummyCube(12345, "fake_var")
fake_cube.data = cube_data

fill_value = um2nc.fix_fill_value(fake_cube)

assert fill_value == expected_fill_val
# Check new fill value type matches cube's data's type
assert fill_value.dtype == cube_data.dtype

# Check that missing value attribute set to expected fill_value
assert fake_cube.attributes["missing_value"][0] == expected_fill_val
78 changes: 68 additions & 10 deletions umpost/um2netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import numpy as np
import cftime
import cf_units
from netCDF4 import default_fillvals
import netCDF4

import iris.util
import iris.exceptions
Expand Down Expand Up @@ -83,6 +83,8 @@
FORECAST_PERIOD = "forecast_period"
TIME = "time"

DEFAULT_FILL_VAL_FLOAT = 1.e20


class PostProcessingError(Exception):
"""Generic class for um2nc specific errors."""
Expand Down Expand Up @@ -304,6 +306,70 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon):
fix_lon_coord_name(longitude_coordinate, grid_type, dlon)


def get_default_fill_value(cube):
"""
Get the default fill value for a cube's data's type.

Parameters
----------
cube: Iris cube object.

Returns
-------
fill_value: numpy scalar with type matching cube.data
"""
if cube.data.dtype.kind == 'f':
fill_value = DEFAULT_FILL_VAL_FLOAT

else:
fill_value = netCDF4.default_fillvals[
f"{cube.data.dtype.kind:s}{cube.data.dtype.itemsize:1d}"
]

# NB: the `_FillValue` attribute appears to be converted to match the
# cube data's type externally (likely in the netCDF4 library). It's not
# strictly necessary to do the conversion here. However, we need
# the separate `missing_value` attribute to have the correct type
# and so it's cleaner to set the type for both here.

# NB: The following type conversion approach is similar to the netCDF4
# package:
# https://github.com/Unidata/netcdf4-python/blob/5ccb3bb67ebf2cc803744707ff654b17ac506d99/src/netCDF4/_netCDF4.pyx#L4413
# Update if a cleaner method is found.
return np.array([fill_value], dtype=cube.data.dtype)[0]


def fix_fill_value(cube, custom_fill_value=None):
"""
Set a cube's missing_value attribute and return the value
for later use.

Parameters
----------
cube: Iris cube object (modified in place).
custom_fill_value: Fill value to use in place of
defaults (not implemented).

Returns
-------
fill_value: Fill value to use when writing to netCDF.
"""
if custom_fill_value is not None:
msg = "Custom fill values are not currently implemented."
raise NotImplementedError(msg)

# NB: If custom fill values are added, we should check that
# their type matches the cube.data.dtype

fill_value = get_default_fill_value(cube)

# TODO: Is placing the fill value in an array still necessary,
# given the type conversion in get_default_fill_value()
cube.attributes['missing_value'] = np.array([fill_value],
cube.data.dtype)
return fill_value


# TODO: split cube ops into functions, this will likely increase process() workflow steps
def cubewrite(cube, sman, compression, use64bit, verbose):
# TODO: move into process() AND if a new cube is returned, swap into filtered cube list
Expand All @@ -313,15 +379,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
if not use64bit:
convert_32_bit(cube)

# Set the missing_value attribute. Use an array to force the type to match
# the data type
if cube.data.dtype.kind == 'f':
fill_value = 1.e20
else:
# Use netCDF defaults
fill_value = default_fillvals['%s%1d' % (cube.data.dtype.kind, cube.data.dtype.itemsize)]

cube.attributes['missing_value'] = np.array([fill_value], cube.data.dtype)
fill_value = fix_fill_value(cube)

fix_forecast_reference_time(cube)

Expand Down
Loading