From f30df231cf691d18710a678bd75cf0209d75b184 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 12:34:49 +1000 Subject: [PATCH 01/11] Extract 64 --> 32 bit data conversion. --- umpost/um2netcdf.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 80f55ef..bb1c70e 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -160,10 +160,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose): pass if not use64bit: - if cube.data.dtype == 'float64': - cube.data = cube.data.astype(np.float32) - elif cube.data.dtype == 'int64': - cube.data = cube.data.astype(np.int32) + convert_32_bit(cube) # Set the missing_value attribute. Use an array to force the type to match # the data type @@ -736,6 +733,13 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): c_sigma.var_name = 'sigma_theta' +def convert_32_bit(cube): + if cube.data.dtype == 'float64': + cube.data = cube.data.astype(np.float32) + elif cube.data.dtype == 'int64': + cube.data = cube.data.astype(np.int32) + + def parse_args(): parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf") parser.add_argument('-k', dest='nckind', required=False, type=int, From df74000dc51bf83c72757c5f02bc3b98c7db969d Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 12:44:12 +1000 Subject: [PATCH 02/11] Add basic 64 --> 32 bit data conversion tests. --- test/test_um2netcdf.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index c1a86f0..ecb3fd4 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -397,6 +397,7 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None): self.standard_name = None self.long_name = None self.coord = {} + self.data = None def name(self): # mimic iris API @@ -740,3 +741,19 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): m_cube = mock.Mock(iris.cube.Cube) m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data) + + +# 64 to 32 bit data conversion tests + +def test_64_to_32_int(ua_plev_cube): + array = np.array([100, 10, 1, 0, -10], dtype=np.int64) + ua_plev_cube.data = array + um2nc.convert_32_bit(ua_plev_cube) + assert ua_plev_cube.data.dtype == np.int32 + + +def test_64_to_32_float(ua_plev_cube): + array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) + ua_plev_cube.data = array + um2nc.convert_32_bit(ua_plev_cube) + assert ua_plev_cube.data.dtype == np.float32 From 9fc3c4d5e04e494080f86d9a094a783387aa12fd Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 12:54:44 +1000 Subject: [PATCH 03/11] Add warnings for integer conversion over/under flow. --- test/test_um2netcdf.py | 24 +++++++++++++++++++++++- umpost/um2netcdf.py | 14 ++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index ecb3fd4..94da4bb 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -745,13 +745,35 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): # 64 to 32 bit data conversion tests -def test_64_to_32_int(ua_plev_cube): +def test_convert_32_bit_with_int64(ua_plev_cube): array = np.array([100, 10, 1, 0, -10], dtype=np.int64) ua_plev_cube.data = array um2nc.convert_32_bit(ua_plev_cube) assert ua_plev_cube.data.dtype == np.int32 +def test_convert_32_bit_overflow_with_int64(ua_plev_cube): + array = np.array([3000000000], dtype=np.int64) + assert array[0] > np.iinfo(np.int32).max + ua_plev_cube.data = array + + with pytest.warns(): + um2nc.convert_32_bit(ua_plev_cube) + + assert ua_plev_cube.data.dtype == np.int32 + + +def test_convert_32_bit_underflow_with_int64(ua_plev_cube): + array = np.array([-3000000000], dtype=np.int64) + assert array[0] < np.iinfo(np.int32).max + ua_plev_cube.data = array + + with pytest.warns(): + um2nc.convert_32_bit(ua_plev_cube) + + assert ua_plev_cube.data.dtype == np.int32 + + def test_64_to_32_float(ua_plev_cube): array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) ua_plev_cube.data = array diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index bb1c70e..f1a4e76 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -733,10 +733,24 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): c_sigma.var_name = 'sigma_theta' +MAX_NP_INT32 = np.iinfo(np.int32).max +MIN_NP_INT32 = np.iinfo(np.int32).min + + def convert_32_bit(cube): if cube.data.dtype == 'float64': cube.data = cube.data.astype(np.float32) elif cube.data.dtype == 'int64': + _max = np.max(cube.data) + _min = np.min(cube.data) + + if _max > MAX_NP_INT32: + msg = f"Converting {cube.var_name} causes a 32 bit overflow!" + warnings.warn(msg) + elif _min < MIN_NP_INT32: + msg = f"Converting {cube.var_name} causes a 32 bit underflow!" + warnings.warn(msg) + cube.data = cube.data.astype(np.int32) From 0cf8fd4d0fc153ac8b94e4ac4b4c59981ebab44d Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 14:20:51 +1000 Subject: [PATCH 04/11] Fix test function name & add explanation for skipping float32 overflow. --- test/test_um2netcdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index 94da4bb..386f7e7 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -744,6 +744,7 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): # 64 to 32 bit data conversion tests +# NB: skip float63 to float32 overflow as float32 min/max is -/+ 3.40e+38 def test_convert_32_bit_with_int64(ua_plev_cube): array = np.array([100, 10, 1, 0, -10], dtype=np.int64) @@ -774,7 +775,7 @@ def test_convert_32_bit_underflow_with_int64(ua_plev_cube): assert ua_plev_cube.data.dtype == np.int32 -def test_64_to_32_float(ua_plev_cube): +def test_convert_32_bit_with_float64(ua_plev_cube): array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) ua_plev_cube.data = array um2nc.convert_32_bit(ua_plev_cube) From 05238cd1fe43ce0c767ab19a5557d54ff9ff08a0 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Fri, 6 Sep 2024 14:23:20 +1000 Subject: [PATCH 05/11] Add conversion docstring. --- umpost/um2netcdf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index f1a4e76..9515fc1 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -738,6 +738,13 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): def convert_32_bit(cube): + """ + Convert 64 bit int/float data to 32 bit (in place). + + Parameters + ---------- + cube : iris.cube object to modify. + """ if cube.data.dtype == 'float64': cube.data = cube.data.astype(np.float32) elif cube.data.dtype == 'int64': From b21698af3f7c4136cdd9e5ff7a018ef3a455aadd Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 11:47:46 +1000 Subject: [PATCH 06/11] Refactor 64 to 32 bit testing with parametrize. --- test/test_um2netcdf.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py index 386f7e7..82860d0 100644 --- a/test/test_um2netcdf.py +++ b/test/test_um2netcdf.py @@ -1,6 +1,7 @@ import unittest.mock as mock from dataclasses import dataclass from collections import namedtuple +import operator import umpost.um2netcdf as um2nc @@ -743,38 +744,23 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data): um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data) -# 64 to 32 bit data conversion tests -# NB: skip float63 to float32 overflow as float32 min/max is -/+ 3.40e+38 - -def test_convert_32_bit_with_int64(ua_plev_cube): - array = np.array([100, 10, 1, 0, -10], dtype=np.int64) - ua_plev_cube.data = array +# int64 to int32 data conversion tests +# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38 +@pytest.mark.parametrize("array,_operator,bound", + [([100, 10, 1, 0, -10], None, None), + ([3000000000], operator.gt, np.iinfo(np.int32).max), + ([-3000000000], operator.lt, np.iinfo(np.int32).min)]) +def test_convert_32_bit(ua_plev_cube, array, _operator, bound): + ua_plev_cube.data = np.array(array, dtype=np.int64) um2nc.convert_32_bit(ua_plev_cube) - assert ua_plev_cube.data.dtype == np.int32 - - -def test_convert_32_bit_overflow_with_int64(ua_plev_cube): - array = np.array([3000000000], dtype=np.int64) - assert array[0] > np.iinfo(np.int32).max - ua_plev_cube.data = array - - with pytest.warns(): - um2nc.convert_32_bit(ua_plev_cube) - - assert ua_plev_cube.data.dtype == np.int32 - -def test_convert_32_bit_underflow_with_int64(ua_plev_cube): - array = np.array([-3000000000], dtype=np.int64) - assert array[0] < np.iinfo(np.int32).max - ua_plev_cube.data = array - - with pytest.warns(): - um2nc.convert_32_bit(ua_plev_cube) + if _operator: + assert _operator(array[0], bound) assert ua_plev_cube.data.dtype == np.int32 +# test float conversion separately, otherwise parametrize block is ugly def test_convert_32_bit_with_float64(ua_plev_cube): array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64) ua_plev_cube.data = array From 15274011a250ec1cea59193a28df59bccc03589b Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 12:00:08 +1000 Subject: [PATCH 07/11] Fix convert_32_bit() to emit RuntimeWarning. --- umpost/um2netcdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 9515fc1..b9f1e39 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -753,10 +753,10 @@ def convert_32_bit(cube): if _max > MAX_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit overflow!" - warnings.warn(msg) + warnings.warn(msg, category=RuntimeWarning) elif _min < MIN_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit underflow!" - warnings.warn(msg) + warnings.warn(msg, category=RuntimeWarning) cube.data = cube.data.astype(np.int32) From d282d45a7f9fbdd75691439af61fbcbc2a8c5f56 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 12:01:42 +1000 Subject: [PATCH 08/11] Fix convert_32_bit() logic for under & overflow checks. --- umpost/um2netcdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index b9f1e39..c680b6c 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -754,7 +754,8 @@ def convert_32_bit(cube): if _max > MAX_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit overflow!" warnings.warn(msg, category=RuntimeWarning) - elif _min < MIN_NP_INT32: + + if _min < MIN_NP_INT32: msg = f"Converting {cube.var_name} causes a 32 bit underflow!" warnings.warn(msg, category=RuntimeWarning) From 9bd6f270005518060b37d0bb77e4fbf30c4bf672 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 12:17:35 +1000 Subject: [PATCH 09/11] Update docstring for warning. --- umpost/um2netcdf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index c680b6c..115d792 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -744,6 +744,10 @@ def convert_32_bit(cube): Parameters ---------- cube : iris.cube object to modify. + + Warns + ----- + RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow. """ if cube.data.dtype == 'float64': cube.data = cube.data.astype(np.float32) From 6c02f7d944b8fc31dd02e5fa5568c1db6fdbfb01 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 14:51:47 +1000 Subject: [PATCH 10/11] Update warnings, recommend --64 option to prevent integer under/overflows. --- umpost/um2netcdf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 115d792..35a47f2 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -755,12 +755,13 @@ def convert_32_bit(cube): _max = np.max(cube.data) _min = np.min(cube.data) + msg = (f"32 bit under/overflow converting {cube.var_name}! Output data " + f"likely invalid. Use '--64' option to retain data integrity.") + if _max > MAX_NP_INT32: - msg = f"Converting {cube.var_name} causes a 32 bit overflow!" warnings.warn(msg, category=RuntimeWarning) if _min < MIN_NP_INT32: - msg = f"Converting {cube.var_name} causes a 32 bit underflow!" warnings.warn(msg, category=RuntimeWarning) cube.data = cube.data.astype(np.int32) From 02bc2317db2b0d1abab077652f4439fa484c3f28 Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 10 Sep 2024 14:53:13 +1000 Subject: [PATCH 11/11] Add task for future work. --- umpost/um2netcdf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 35a47f2..999844a 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -159,6 +159,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose): except iris.exceptions.CoordinateNotFoundError: pass + # TODO: flag warnings as an error for the driver script? if not use64bit: convert_32_bit(cube)