Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean, test & refactor 64 to 32 bit conversion step. #96

Merged
merged 11 commits into from
Sep 12, 2024
26 changes: 26 additions & 0 deletions test/test_um2netcdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest.mock as mock
from dataclasses import dataclass
from collections import namedtuple
import operator

import umpost.um2netcdf as um2nc

Expand Down Expand Up @@ -397,6 +398,7 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None):
self.standard_name = None
self.long_name = None
self.coord = {}
self.data = None

def name(self):
# mimic iris API
Expand Down Expand Up @@ -740,3 +742,27 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
m_cube = mock.Mock(iris.cube.Cube)
m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError
um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data)


# int64 to int32 data conversion tests
# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38
@pytest.mark.parametrize("array,_operator,bound",
[([100, 10, 1, 0, -10], None, None),
([3000000000], operator.gt, np.iinfo(np.int32).max),
([-3000000000], operator.lt, np.iinfo(np.int32).min)])
def test_convert_32_bit(ua_plev_cube, array, _operator, bound):
ua_plev_cube.data = np.array(array, dtype=np.int64)
um2nc.convert_32_bit(ua_plev_cube)

if _operator:
assert _operator(array[0], bound)

assert ua_plev_cube.data.dtype == np.int32


# test float conversion separately, otherwise parametrize block is ugly
def test_convert_32_bit_with_float64(ua_plev_cube):
array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
ua_plev_cube.data = array
um2nc.convert_32_bit(ua_plev_cube)
assert ua_plev_cube.data.dtype == np.float32
truth-quark marked this conversation as resolved.
Show resolved Hide resolved
40 changes: 36 additions & 4 deletions umpost/um2netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,9 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
except iris.exceptions.CoordinateNotFoundError:
pass

# TODO: flag warnings as an error for the driver script?
if not use64bit:
if cube.data.dtype == 'float64':
cube.data = cube.data.astype(np.float32)
elif cube.data.dtype == 'int64':
cube.data = cube.data.astype(np.int32)
convert_32_bit(cube)

# Set the missing_value attribute. Use an array to force the type to match
# the data type
Expand Down Expand Up @@ -736,6 +734,40 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
c_sigma.var_name = 'sigma_theta'


MAX_NP_INT32 = np.iinfo(np.int32).max
MIN_NP_INT32 = np.iinfo(np.int32).min


def convert_32_bit(cube):
"""
Convert 64 bit int/float data to 32 bit (in place).

Parameters
----------
cube : iris.cube object to modify.
truth-quark marked this conversation as resolved.
Show resolved Hide resolved

Warns
-----
RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow.
"""
if cube.data.dtype == 'float64':
cube.data = cube.data.astype(np.float32)
elif cube.data.dtype == 'int64':
_max = np.max(cube.data)
_min = np.min(cube.data)

msg = (f"32 bit under/overflow converting {cube.var_name}! Output data "
f"likely invalid. Use '--64' option to retain data integrity.")

if _max > MAX_NP_INT32:
warnings.warn(msg, category=RuntimeWarning)

if _min < MIN_NP_INT32:
warnings.warn(msg, category=RuntimeWarning)

cube.data = cube.data.astype(np.int32)


def parse_args():
parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf")
parser.add_argument('-k', dest='nckind', required=False, type=int,
Expand Down
Loading