From abc89d68376b2194e01f8c3d2f8ea32862848411 Mon Sep 17 00:00:00 2001 From: Jacob Mims <122570226+jtmims@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:11:07 -0600 Subject: [PATCH] Fix various pp issues related to running seaice_suite (#721) * fix pp issues for seaice_suite * fix arg issue * rename functions * add default return for conversion function --- data/fieldlist_GFDL.jsonc | 8 ++- .../seaice_suite_sic_mean_sigma.py | 2 +- src/data_sources.py | 47 +++++++++++++- src/preprocessor.py | 64 +++++++++++-------- src/units.py | 2 + src/xr_parser.py | 19 +++--- 6 files changed, 101 insertions(+), 41 deletions(-) diff --git a/data/fieldlist_GFDL.jsonc b/data/fieldlist_GFDL.jsonc index 49f024d3f..16c225a38 100644 --- a/data/fieldlist_GFDL.jsonc +++ b/data/fieldlist_GFDL.jsonc @@ -163,7 +163,13 @@ "realm": "atmos", "units": "1", "ndim": 3 - }, + }, + "siconc": { + "standard_name": "sea_ice_area_fraction", + "realm": "seaIce", + "units": "0-1", + "ndim": 3 + }, "IWP": { "standard_name": "atmosphere_mass_content_of_cloud_ice", "long_name": "Ice water path", diff --git a/diagnostics/seaice_suite/seaice_suite_sic_mean_sigma.py b/diagnostics/seaice_suite/seaice_suite_sic_mean_sigma.py index 043581b0b..6ef11127f 100644 --- a/diagnostics/seaice_suite/seaice_suite_sic_mean_sigma.py +++ b/diagnostics/seaice_suite/seaice_suite_sic_mean_sigma.py @@ -91,7 +91,7 @@ def readindata(file, varname='siconc', firstyr='1979', lastyr='2014'): # 1) Loading model data files: -input_file = "{DATADIR}/mon/{CASENAME}.{siconc_var}.mon.nc".format(**os.environ) +input_file = os.environ['SICONC_FILE'] obsoutput_dir = "{WORK_DIR}/obs/".format(**os.environ) modoutput_dir = "{WORK_DIR}/model/".format(**os.environ) figures_dir = "{WORK_DIR}/model/".format(**os.environ) diff --git a/src/data_sources.py b/src/data_sources.py index e862c0a0e..a8db76c1e 100644 --- a/src/data_sources.py +++ b/src/data_sources.py @@ -64,6 +64,36 @@ def read_varlist(self, parent, append_vars: bool=False): def set_date_range(self, startdate: str, enddate: str): self.date_range = util.DateRange(start=startdate, end=enddate) + + def set_query(self, var: varlist_util.VarlistEntry, path_regex: str): + realm_regex = var.realm + '*' + date_range = var.T.range + var_id = var.name + standard_name = var.standard_name + if var.translation.convention is not None: + var_id = var.translation.name + standard_name = var.translation.standard_name + if any(var.translation.alternate_standard_names): + standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names + date_range = var.translation.T.range + if var.is_static: + date_range = None + freq = "fx" + else: + freq = var.T.frequency + if not isinstance(freq, str): + freq = freq.format_local() + if freq == 'hr': + freq = '1hr' + + # define initial query dictionary with variable settings requirements that do not change if + # the variable is translated + self.query['frequency'] = freq + self.query['path'] = path_regex + self.query['realm'] = realm_regex + self.query['standard_name'] = standard_name + self.query['variable_id'] = var_id + def translate_varlist(self, var: varlist_util.VarlistEntry, @@ -94,7 +124,10 @@ class CMIPDataSource(DataSourceBase): # col_spec = sampleLocalFileDataSource_col_spec # varlist = diagnostic.varlist convention: str = "CMIP" - + + def set_query(self, var: varlist_util.VarlistEntry, path_regex: str): + super().set_query(var, path_regex) + return @data_source.maker class CESMDataSource(DataSourceBase): @@ -105,7 +138,10 @@ class CESMDataSource(DataSourceBase): # col_spec = sampleLocalFileDataSource_col_spec # varlist = diagnostic.varlist convention: str = "CESM" - + + def set_query(self, var: varlist_util.VarlistEntry, path_regex: str): + super().set_query(var, path_regex) + return @data_source.maker class GFDLDataSource(DataSourceBase): @@ -116,3 +152,10 @@ class GFDLDataSource(DataSourceBase): # col_spec = sampleLocalFileDataSource_col_spec # varlist = diagnostic.varlist convention: str = "GFDL" + + def set_query(self, var: varlist_util.VarlistEntry, path_regex: str): + super().set_query(var, path_regex) + # this is hacky, but prevents the framework from grabbing from ice_1x1deg + if self.query['realm'] == 'seaIce*': + self.query['realm'] = 'ice' + return diff --git a/src/preprocessor.py b/src/preprocessor.py index aac1b8ca7..c15fc5c3d 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -97,6 +97,36 @@ def execute(self, var: varlist_util.VarlistEntry, pass +class PercentConversionFunction(PreprocessorFunctionBase): + """A PreprocessorFunction which convers the dependent variable's units and values, + for the specific case of percentages. ``0-1`` are not defined in the UDUNITS-2 + library. So, this function handles the case where we have to convert from + ``0-1`` to ``%``. + """ + + _std_name_tuple = ('0-1', '%') + + def execute(self, var, ds, **kwargs): + var_unit = getattr(var, "units", "") + tv = var.translation #abbreviate + tv_unit = getattr(tv, "units", "") + # 0-1 to % + if str(tv_unit) == self._std_name_tuple[0] and str(var_unit) == self._std_name_tuple[1]: + ds[tv.name].attrs['units'] = '%' + ds[tv.name].values = ds[tv.name].values*100 + return ds + # % to 0-1 + if str(tv_unit) == self._std_name_tuple[1] and str(var_unit) == self._std_name_tuple[0]: + ds[tv.name].attrs['units'] = '0-1' + # sometimes % is [0,1] already + if ds[tv.name].values[:, :, 3].max() < 1.5: + return ds + else: + ds[tv.name].values = ds[tv.name].values/100 + return ds + + return ds + class PrecipRateToFluxFunction(PreprocessorFunctionBase): """A PreprocessorFunction which converts the dependent variable's units, for the specific case of precipitation. Flux and precip rate differ by a factor @@ -694,7 +724,7 @@ def _functions(self): """ # normal operation: run all functions return [ - AssociatedVariablesFunction, + AssociatedVariablesFunction, PercentConversionFunction, PrecipRateToFluxFunction, ConvertUnitsFunction, ExtractLevelFunction, RenameVariablesFunction ] @@ -1012,33 +1042,11 @@ def query_catalog(self, path_regex = [re.compile(r'({})'.format(case_name))] for var in case_d.varlist.iter_vars(): - realm_regex = var.realm + '*' date_range = var.T.range - var_id = var.name - standard_name = var.standard_name - if var.translation.convention is not None: - var_id = var.translation.name - standard_name = var.translation.standard_name - if any(var.translation.alternate_standard_names): - standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names - date_range = var.translation.T.range - if var.is_static: - date_range = None - freq = "fx" - else: - freq = var.T.frequency - if not isinstance(freq, str): - freq = freq.format_local() - if freq == 'hr': - freq = '1hr' - + # define initial query dictionary with variable settings requirements that do not change if # the variable is translated - case_d.query['frequency'] = freq - case_d.query['path'] = path_regex - case_d.query['realm'] = realm_regex - case_d.query['standard_name'] = standard_name - case_d.query['variable_id'] = var_id + case_d.set_query(var, path_regex) # change realm key name if necessary if cat.df.get('modeling_realm', None) is not None: @@ -1047,7 +1055,7 @@ def query_catalog(self, # search catalog for convention specific query object var.log.info("Querying %s for variable %s for case %s.", data_catalog, - var_id, + case_d.query['variable_id'], case_name) cat_subset = cat.search(**case_d.query) if cat_subset.df.empty: @@ -1086,7 +1094,7 @@ def query_catalog(self, f"configuration file.") else: raise util.DataRequestError( - f"Unable to find match or alternate for {var_id}" + f"Unable to find match or alternate for {case_d.query['variable_id']}" f" for case {case_name} in {data_catalog}") # Get files in specified date range @@ -1162,7 +1170,7 @@ def query_catalog(self, # check that the trimmed variable data in the merged dataset matches the desired date range if not var.is_static: try: - self.check_time_bounds(cat_dict[case_name], var.translation, freq) + self.check_time_bounds(cat_dict[case_name], var.translation, var.T.frequency) except LookupError: var.log.error(f'Time bounds in trimmed dataset for {var_id} in case {case_name} do not match' f'requested date_range.') diff --git a/src/units.py b/src/units.py index 7929110ca..57d35334e 100644 --- a/src/units.py +++ b/src/units.py @@ -135,6 +135,8 @@ def conversion_factor(source_unit, dest_unit): *source_unit*, *dest_unit* are coerced to :class:`Units` objects via :func:`to_cfunits`. """ + if str(source_unit) == str(dest_unit): + return 1.0 # bypass function if the units have the same string allowing units like '0-1' to be used source_unit, dest_unit = to_equivalent_units(source_unit, dest_unit) return Units.conform(1.0, source_unit, dest_unit) diff --git a/src/xr_parser.py b/src/xr_parser.py index 371f746b3..f7f20a878 100644 --- a/src/xr_parser.py +++ b/src/xr_parser.py @@ -194,7 +194,7 @@ def _old_axes_dict(self, var_name=None): if len(v) > 1 and var_name is not None: ax = [c for c in v if c in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())] del_ax = [d for d in v if d not in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())] - if del_ax is not None: # remove the entries that are not in the cf.coordinates.values dict + if del_ax is not None and len(del_ax) > 0: # remove the entries that are not in the cf.coordinates.values dict # append entries that are in the cf.coordinates.values dict if they are missing in coords_list # and dims_list if del_ax[0] in coords_list: @@ -208,14 +208,15 @@ def _old_axes_dict(self, var_name=None): if ax is not None: vardict[k] = ax - if ax[0] not in coords_list: - _log.warning(("cf_xarray fix: %s axis %s not in dimensions " - "for %s; dropping."), k, ax[0], var_name) - delete_keys.append(k) - else: - coords_list.remove(ax[0]) - if ax[0] in dims_list: - dims_list.remove(ax[0]) + for a in ax: + if a not in coords_list: + _log.warning(("cf_xarray fix: %s axis %s not in dimensions " + "for %s; dropping."), k, a, var_name) + delete_keys.append(k) + else: + coords_list.remove(a) + if a in dims_list: + dims_list.remove(a) elif len(v) == 1: if v[0] not in coords_list: _log.warning(("cf_xarray fix: %s axis %s not in dimensions "