diff --git a/doc/sphinx/start_config.rst b/doc/sphinx/start_config.rst index 21b98a403..e402e96a1 100644 --- a/doc/sphinx/start_config.rst +++ b/doc/sphinx/start_config.rst @@ -147,7 +147,11 @@ Options for workflow control * **run_pp**: (boolean) Set to *true* to run the preprocessor; default *true* -* **translate_data**: (boolean) Set to *true* to perform data translation; default *true* +* **translate_data**: (boolean) Set to *true* to perform data translation. If *false*, the preprocessor query + automatically uses the convention for each case in the input dataset for the query, and skips translating the + variable names and attributes to the POD convention. Note that this means that the precipRateToFluxConversion is not + applied. This option is best if you know that the input dataset has variable attributes that exactly match the + the POD variable attributes; default *true* * **save_ps**: (boolean) Set to *true* to have PODs save postscript figures in addition to bitmaps; default *false* diff --git a/src/pod_setup.py b/src/pod_setup.py index 8b04577d8..5cd188984 100644 --- a/src/pod_setup.py +++ b/src/pod_setup.py @@ -299,13 +299,12 @@ def setup_pod(self, runtime_config: util.NameSpace, for case_name, case_dict in runtime_config.case_list.items(): cases[case_name].read_varlist(self, append_vars=append_vars) - # Translate the varlistEntries from the POD convention to the data convention if desired and the pod - # convention does not match the case convention + # Translate the varlistEntries from the POD convention to the data convention for the query if desired data_convention = case_dict.convention.lower() if not runtime_config.translate_data: - data_convention = 'no_translation' - self.log.info(f'Runtime option translate_data is set to .false.' + self.log.info(f'Runtime option translate_data is set to .false. ' f'No data translation will be performed for case {case_name}.') + data_convention = 'no_translation' else: if pod_convention != data_convention: self.log.info(f'Translating POD variables from {pod_convention} to {data_convention}') diff --git a/src/preprocessor.py b/src/preprocessor.py index 69b4ee617..53dff151d 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -1093,8 +1093,6 @@ def query_catalog(self, # check that start and end times include runtime startdate and enddate if not var.is_static: var_obj = var.translation - if var.translation.convention == 'no_translation': - var_obj = var try: self.check_time_bounds(cat_dict[case_name], var_obj, freq) except LookupError: @@ -1133,8 +1131,6 @@ def execute_pp_functions(self, v: varlist_util.VarlistEntry, return xarray_ds - return xarray_ds - def setup(self, pod): """Method to do additional configuration immediately before :meth:`process` is called on each variable for *pod*. Implements metadata cleaning via @@ -1441,8 +1437,6 @@ def write_pp_catalog(self, ds_match = input_catalog_ds[case_name] for var in case_dict.varlist.iter_vars(): var_name = var.translation.name - if var.translation.convention == 'no_translation': - var_name = var.name ds_var = ds_match.data_vars.get(var_name, None) if ds_var is None: log.error(f'No var {var_name}') diff --git a/src/translation.py b/src/translation.py index ce090372f..9590763b0 100644 --- a/src/translation.py +++ b/src/translation.py @@ -341,7 +341,7 @@ def translate_coord(self, coord, class_dict=None, log=_log) -> dict: coord_name = new_coord['name'] elif hasattr(new_coord, 'out_name'): coord_name = new_coord['out_name'] - else: + else: # TODO add more robust check for key name == 'plev' (or whatever the coordinate name in the lut should be based on fieldlist) coord_name = [k for k in lut1.keys()][0] coord_copy = copy.deepcopy(new_coord) @@ -435,7 +435,7 @@ def translate(self, var, from_convention: str): ) -class NoTranslationFieldlist(metaclass=util.Singleton): +class NoTranslationFieldlist: """Class which partially implements the :class:`Fieldlist` interface but does no variable translation. :class:`~diagnostic.VarlistEntry` objects from the POD are passed through to create :class:`TranslatedVarlistEntry` objects. @@ -476,30 +476,49 @@ def translate_coord(self, coord, log=_log) -> TranslatedVarlistEntry: # should never get here - not called externally raise NotImplementedError - def translate(self, var, from_convention: str): + def translate(self, var, data_convention: str): """Returns :class:`TranslatedVarlistEntry` instance, populated with contents of input :class:`~diagnostic.VarlistEntry` instance. - .. note:: + note:: We return a copy of the :class:`~diagnostic.VarlistEntry` because logic in :class:`~xr_parser.DefaultDatasetParser` alters the translation based on the file's actual contents. """ coords_copy = copy.deepcopy(var.dims) + copy.deepcopy(var.scalar_coords) - # TODO: coerce_to_dataclass runs into recursion limit on var; fix that + fieldlist_obj = VariableTranslator().get_convention(data_convention) + fieldlist_entry = dict() + var_id = "" + for variable_id, variable_id_dict in fieldlist_obj.lut.items(): + if variable_id_dict.get('standard_name', None) == var.standard_name \ + or var.standard_name in variable_id_dict.get('alternate_standard_names'): + if variable_id_dict.get('realm', None) == var.realm \ + and variable_id_dict.get('units', None) == var.units.units: + fieldlist_entry = variable_id_dict + var_id = variable_id + break + if len(fieldlist_entry.keys()) < 1: + var.log.error(f'No {data_convention} fieldlist entry found for variable {var.name}') + return None + alt_standard_names = fieldlist_entry.get('alternate_standard_names') return TranslatedVarlistEntry( - name=var.name, + name=var_id, standard_name=var.standard_name, units=var.units, - convention=_NO_TRANSLATION_CONVENTION, + convention=var.convention, coords=coords_copy, modifier=var.modifier, + alternate_standard_names=alt_standard_names, + realm=var.realm, log=var.log ) class VariableTranslator(metaclass=util.Singleton): - """:class:`~util.Singleton` containing information for different variable + """The use of class:`~util.Singleton` means that the VariableTranslator is not a + base class. Instead, it is a metaclass that needs to be created only once (done + in the mdtf_framework.py driver script to hold all the information from the fieldlist + tables that are later shared. Instead, the SUBCLASSES of the VariableTranslator are customized information for different variable naming conventions. These are defined in the ``data/fieldlist_*.jsonc`` files. """