ACCESS-NRI · truth-quark · Sep 12, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
@@ -397,6 +397,7 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None):
         self.standard_name = None
         self.long_name = None
         self.coord = {}
+        self.data = None
 
     def name(self):
         # mimic iris API
@@ -740,3 +741,42 @@ def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
     m_cube = mock.Mock(iris.cube.Cube)
     m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError
     um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data)
+
+
+# 64 to 32 bit data conversion tests
+# NB: skip float63 to float32 overflow as float32 min/max is  -/+ 3.40e+38
+
+def test_convert_32_bit_with_int64(ua_plev_cube):
+    array = np.array([100, 10, 1, 0, -10], dtype=np.int64)
+    ua_plev_cube.data = array
+    um2nc.convert_32_bit(ua_plev_cube)
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+def test_convert_32_bit_overflow_with_int64(ua_plev_cube):
+    array = np.array([3000000000], dtype=np.int64)
+    assert array[0] > np.iinfo(np.int32).max
+    ua_plev_cube.data = array
+
+    with pytest.warns():
+        um2nc.convert_32_bit(ua_plev_cube)
+
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+def test_convert_32_bit_underflow_with_int64(ua_plev_cube):
+    array = np.array([-3000000000], dtype=np.int64)
+    assert array[0] < np.iinfo(np.int32).max
+    ua_plev_cube.data = array
+
+    with pytest.warns():
+        um2nc.convert_32_bit(ua_plev_cube)
+
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+def test_convert_32_bit_with_float64(ua_plev_cube):
+    array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
+    ua_plev_cube.data = array
+    um2nc.convert_32_bit(ua_plev_cube)
+    assert ua_plev_cube.data.dtype == np.float32
diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
@@ -160,10 +160,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
         pass
 
     if not use64bit:
-        if cube.data.dtype == 'float64':
-            cube.data = cube.data.astype(np.float32)
-        elif cube.data.dtype == 'int64':
-            cube.data = cube.data.astype(np.int32)
+        convert_32_bit(cube)
 
     # Set the missing_value attribute. Use an array to force the type to match
     # the data type
@@ -736,6 +733,34 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
                 c_sigma.var_name = 'sigma_theta'
 
 
+MAX_NP_INT32 = np.iinfo(np.int32).max
+MIN_NP_INT32 = np.iinfo(np.int32).min
+
+
+def convert_32_bit(cube):
+    """
+    Convert 64 bit int/float data to 32 bit (in place).
+
+    Parameters
+    ----------
+    cube : iris.cube object to modify.
+    """
+    if cube.data.dtype == 'float64':
+        cube.data = cube.data.astype(np.float32)
+    elif cube.data.dtype == 'int64':
+        _max = np.max(cube.data)
+        _min = np.min(cube.data)
+
+        if _max > MAX_NP_INT32:
+            msg = f"Converting {cube.var_name} causes a 32 bit overflow!"
+            warnings.warn(msg)
+        elif _min < MIN_NP_INT32:
+            msg = f"Converting {cube.var_name} causes a 32 bit underflow!"
+            warnings.warn(msg)
+
+        cube.data = cube.data.astype(np.int32)
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf")
     parser.add_argument('-k', dest='nckind', required=False, type=int,