From 6bc27606f8490cca5faec88413aecf2fd8f8603b Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA <58948505+AndrewEichmann-NOAA@users.noreply.github.com> Date: Tue, 12 Nov 2024 19:17:54 -0500 Subject: [PATCH] Changes prepoceanobs to using newly refactored BUFR converters (#1352) Just like the title. This mainly changes the converters used to the new ones that use yaml files, the templates for which are included, along with a little script that does most of the work of converting the earlier JSON templates to yaml. Also removes from `ocean.py` some methods for plotting, but can't go into global-workflow because of lack of the necessary python modules in that environment. --- .../bufr2ioda_insitu_profile_argo.yaml | 11 ++++++ .../bufr2ioda_insitu_profile_bathy.yaml | 11 ++++++ .../bufr2ioda_insitu_profile_glider.yaml | 11 ++++++ .../bufr2ioda_insitu_profile_tesac.yaml | 11 ++++++ .../bufr2ioda_insitu_profile_xbtctd.yaml | 11 ++++++ .../bufr2ioda_insitu_surface_trkob.yaml | 11 ++++++ parm/ioda/bufr2ioda/j2y.py | 24 ++++++++++++ parm/soca/obs/obs_list.yaml | 10 ++--- .../marine/b2i/b2iconverter/ioda_variables.py | 1 - ush/soca/prep_ocean_obs.py | 38 ++++++++++++------- ush/soca/prep_ocean_obs_utils.py | 7 ++-- 11 files changed, 122 insertions(+), 24 deletions(-) create mode 100644 parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_argo.yaml create mode 100644 parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_bathy.yaml create mode 100644 parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_glider.yaml create mode 100644 parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_tesac.yaml create mode 100644 parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_xbtctd.yaml create mode 100644 parm/ioda/bufr2ioda/bufr2ioda_insitu_surface_trkob.yaml create mode 100644 parm/ioda/bufr2ioda/j2y.py diff --git a/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_argo.yaml b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_argo.yaml new file mode 100644 index 000000000..7dd28f21c --- /dev/null +++ b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_argo.yaml @@ -0,0 +1,11 @@ +cycle_datetime: '{{ current_cycle | to_YMDH }}' +cycle_type: '{{ RUN }}' +data_description: 6-hrly in situ ARGO profiles +data_format: subpfl +data_provider: U.S. NOAA +data_type: argo +dump_directory: '{{ DMPDIR }}' +ioda_directory: '{{ COM_OBS }}' +source: NCEP data tank +subsets: SUBPFL +ocean_basin: '{{ OCEAN_BASIN_FILE }}' diff --git a/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_bathy.yaml b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_bathy.yaml new file mode 100644 index 000000000..f26d341ea --- /dev/null +++ b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_bathy.yaml @@ -0,0 +1,11 @@ +cycle_datetime: '{{ current_cycle | to_YMDH }}' +cycle_type: '{{ RUN }}' +data_description: 6-hrly in situ Bathythermal profiles +data_format: bathy +data_provider: U.S. NOAA +data_type: bathy +dump_directory: '{{ DMPDIR }}' +ioda_directory: '{{ COM_OBS }}' +source: NCEP data tank +subsets: BATHY +ocean_basin: '{{ OCEAN_BASIN_FILE }}' diff --git a/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_glider.yaml b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_glider.yaml new file mode 100644 index 000000000..cc732df94 --- /dev/null +++ b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_glider.yaml @@ -0,0 +1,11 @@ +cycle_datetime: '{{ current_cycle | to_YMDH }}' +cycle_type: '{{ RUN }}' +data_description: 6-hrly in situ GLIDER profiles +data_format: subpfl +data_provider: U.S. NOAA +data_type: glider +dump_directory: '{{ DMPDIR }}' +ioda_directory: '{{ COM_OBS }}' +source: NCEP data tank +subsets: SUBPFL +ocean_basin: '{{ OCEAN_BASIN_FILE }}' diff --git a/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_tesac.yaml b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_tesac.yaml new file mode 100644 index 000000000..01dc441ca --- /dev/null +++ b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_tesac.yaml @@ -0,0 +1,11 @@ +cycle_datetime: '{{ current_cycle | to_YMDH }}' +cycle_type: '{{ RUN }}' +data_description: 6-hrly in situ TESAC profiles +data_format: tesac +data_provider: U.S. NOAA +data_type: tesac +dump_directory: '{{ DMPDIR }}' +ioda_directory: '{{ COM_OBS }}' +source: NCEP data tank +subsets: TESAC +ocean_basin: '{{ OCEAN_BASIN_FILE }}' diff --git a/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_xbtctd.yaml b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_xbtctd.yaml new file mode 100644 index 000000000..49d7f13d2 --- /dev/null +++ b/parm/ioda/bufr2ioda/bufr2ioda_insitu_profile_xbtctd.yaml @@ -0,0 +1,11 @@ +cycle_datetime: '{{ current_cycle | to_YMDH }}' +cycle_type: '{{ RUN }}' +data_description: 6-hrly in situ XBT/XCTD profiles +data_format: xbtctd +data_provider: U.S. NOAA +data_type: xbtctd +dump_directory: '{{ DMPDIR }}' +ioda_directory: '{{ COM_OBS }}' +source: NCEP data tank +subsets: XBTCTD +ocean_basin: '{{ OCEAN_BASIN_FILE }}' diff --git a/parm/ioda/bufr2ioda/bufr2ioda_insitu_surface_trkob.yaml b/parm/ioda/bufr2ioda/bufr2ioda_insitu_surface_trkob.yaml new file mode 100644 index 000000000..1385920c1 --- /dev/null +++ b/parm/ioda/bufr2ioda/bufr2ioda_insitu_surface_trkob.yaml @@ -0,0 +1,11 @@ +cycle_datetime: '{{ current_cycle | to_YMDH }}' +cycle_type: '{{ RUN }}' +data_description: 6-hrly in situ TRACKOB surface +data_format: trkob +data_provider: U.S. NOAA +data_type: trackob +dump_directory: '{{ DMPDIR }}' +ioda_directory: '{{ COM_OBS }}' +source: NCEP data tank +subsets: TRACKOB +ocean_basin: '{{ OCEAN_BASIN_FILE }}' diff --git a/parm/ioda/bufr2ioda/j2y.py b/parm/ioda/bufr2ioda/j2y.py new file mode 100644 index 000000000..c8b158517 --- /dev/null +++ b/parm/ioda/bufr2ioda/j2y.py @@ -0,0 +1,24 @@ +import json +import yaml +import argparse + +def convert_json_to_yaml(input_file, output_file): + # Load the JSON data from the input file + with open(input_file, 'r') as json_file: + json_data = json.load(json_file) + + # Convert and save as YAML in the output file + with open(output_file, 'w') as yaml_file: + yaml.dump(json_data, yaml_file, default_flow_style=False) + +if __name__ == '__main__': + # Set up argument parser + parser = argparse.ArgumentParser(description='Convert JSON to YAML.') + parser.add_argument('input_file', help='Path to the input JSON file') + parser.add_argument('output_file', help='Path to the output YAML file') + + args = parser.parse_args() + + # Perform the conversion + convert_json_to_yaml(args.input_file, args.output_file) + diff --git a/parm/soca/obs/obs_list.yaml b/parm/soca/obs/obs_list.yaml index c11dc1ace..0ac8ab5af 100644 --- a/parm/soca/obs/obs_list.yaml +++ b/parm/soca/obs/obs_list.yaml @@ -25,15 +25,15 @@ observers: #- !INC ${MARINE_OBS_YAML_DIR}/icec_ssmis_f17_l2.yaml # in situ: monthly -#- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_bathy.yaml +- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_bathy.yaml - !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_argo.yaml -#- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_glider.yaml -#- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_tesac.yaml +- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_glider.yaml +- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_tesac.yaml #- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_tesac_salinity.yaml #- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_marinemammal.yaml -#- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_xbtctd.yaml +- !INC ${MARINE_OBS_YAML_DIR}/insitu_profile_xbtctd.yaml #- !INC ${MARINE_OBS_YAML_DIR}/insitu_surface_altkob.yaml -#- !INC ${MARINE_OBS_YAML_DIR}/insitu_surface_trkob.yaml +- !INC ${MARINE_OBS_YAML_DIR}/insitu_surface_trkob.yaml #- !INC ${MARINE_OBS_YAML_DIR}/insitu_surface_trkob_salinity.yaml # in situ: daily diff --git a/ush/ioda/bufr2ioda/marine/b2i/b2iconverter/ioda_variables.py b/ush/ioda/bufr2ioda/marine/b2i/b2iconverter/ioda_variables.py index 5319e0ff0..83a07771e 100644 --- a/ush/ioda/bufr2ioda/marine/b2i/b2iconverter/ioda_variables.py +++ b/ush/ioda/bufr2ioda/marine/b2i/b2iconverter/ioda_variables.py @@ -1,6 +1,5 @@ import numpy as np from pyiodaconv import bufr -from .ocean import OceanBasin from .util import * from .ioda_metadata import IODAMetadata from .ioda_addl_vars import IODAAdditionalVariables diff --git a/ush/soca/prep_ocean_obs.py b/ush/soca/prep_ocean_obs.py index be80bfcc5..da7b2da6a 100644 --- a/ush/soca/prep_ocean_obs.py +++ b/ush/soca/prep_ocean_obs.py @@ -10,6 +10,7 @@ from wxflow import (chdir, FileHandler, logit, + parse_j2yaml, save_as_yaml, Task, YAMLFile) @@ -74,6 +75,7 @@ def initialize(self): SOCA_INPUT_FIX_DIR = self.task_config['SOCA_INPUT_FIX_DIR'] ocean_mask_src = os.path.join(SOCA_INPUT_FIX_DIR, 'RECCAP2_region_masks_all_v20221025.nc') ocean_mask_dest = os.path.join(self.task_config.DATA, 'RECCAP2_region_masks_all_v20221025.nc') + self.task_config['OCEAN_BASIN_FILE'] = ocean_mask_dest try: FileHandler({'copy': [[ocean_mask_src, ocean_mask_dest]]}).sync() @@ -90,11 +92,15 @@ def initialize(self): logger.critical(f"OBSPREP_YAML file {OBSPREP_YAML} does not exist") raise FileNotFoundError - JSON_TMPL_DIR = self.task_config.JSON_TMPL_DIR - BUFR2IODA_PY_DIR = self.task_config.BUFR2IODA_PY_DIR + # TODO (AFE): this should be in the task config file in g-w + BUFR2IODA_TMPL_DIR = os.path.join(self.task_config.HOMEgfs, 'parm/gdas/ioda/bufr2ioda') + # TODO (AFE): this should be in the task config file in g-w, and reaches into GDASApp + # in order to avoid touching the g-w until we know this will remain a task + BUFR2IODA_PY_DIR = os.path.join(self.task_config.HOMEgfs, 'sorc/gdas.cd/ush/ioda/bufr2ioda/marine/b2i') COMIN_OBS = self.task_config.COMIN_OBS COMOUT_OBS = self.task_config['COMOUT_OBS'] + OCEAN_BASIN_FILE = self.task_config['OCEAN_BASIN_FILE'] if not os.path.exists(COMOUT_OBS): os.makedirs(COMOUT_OBS) @@ -146,32 +152,34 @@ def initialize(self): obsprep_space['window end'] = self.window_end ioda_filename = f"{RUN}.t{cyc:02d}z.{obs_space_name}.{cdatestr}.nc4" obsprep_space['output file'] = ioda_filename + ioda_config_file = obtype + '2ioda.yaml' # set up the config file for conversion to IODA for bufr and # netcdf files respectively if obsprep_space['type'] == 'bufr': - gen_bufr_json_config = {'RUN': RUN, - 'current_cycle': cdate, - 'DMPDIR': COMIN_OBS, - 'COM_OBS': COMIN_OBS} - json_config_file = os.path.join(COMIN_OBS, - f"{obtype}_{cdatestr}.json") - obsprep_space['conversion config file'] = json_config_file + bufrconv_config = { + 'RUN': RUN, + 'current_cycle': cdate, + 'DMPDIR': COMIN_OBS, + 'COM_OBS': COMIN_OBS, + 'OCEAN_BASIN_FILE': OCEAN_BASIN_FILE} + obsprep_space['conversion config file'] = ioda_config_file bufr2iodapy = BUFR2IODA_PY_DIR + '/bufr2ioda_' + obtype + '.py' obsprep_space['bufr2ioda converter'] = bufr2iodapy - tmpl_filename = 'bufr2ioda_' + obtype + '.json' - template = os.path.join(JSON_TMPL_DIR, tmpl_filename) + tmpl_filename = 'bufr2ioda_' + obtype + '.yaml' + bufrconv_template = os.path.join(BUFR2IODA_TMPL_DIR, tmpl_filename) + try: - gen_bufr_json(gen_bufr_json_config, template, json_config_file) + bufrconv = parse_j2yaml(bufrconv_template, bufrconv_config) + bufrconv.save(ioda_config_file) except Exception as e: - logger.warning(f"An exeception {e} occured while trying to run gen_bufr_json") + logger.warning(f"An exeception {e} occured while trying to create BUFR2IODA config") logger.warning(f"obtype {obtype} will be skipped") break # go to next observer in OBS_YAML obsspaces_to_convert.append({"obs space": obsprep_space}) elif obsprep_space['type'] == 'nc': - ioda_config_file = obtype + '2ioda.yaml' obsprep_space['conversion config file'] = ioda_config_file save_as_yaml(obsprep_space, ioda_config_file) @@ -260,6 +268,8 @@ def finalize(self): try: FileHandler({'copy': [[output_file, output_file_dest]]}).sync() FileHandler({'copy': [[conv_config_file, conv_config_file_dest]]}).sync() + except Exception as e: + logger.warning(f"An exeception {e} occured while trying to run gen_bufr_json") except OSError: logger.warning(f"Obs file not found, possible IODA converter failure)") continue diff --git a/ush/soca/prep_ocean_obs_utils.py b/ush/soca/prep_ocean_obs_utils.py index 11b18fd37..9ecb06464 100755 --- a/ush/soca/prep_ocean_obs_utils.py +++ b/ush/soca/prep_ocean_obs_utils.py @@ -68,13 +68,12 @@ def run_netcdf_to_ioda(obsspace_to_convert, OCNOBS2IODAEXEC): def run_bufr_to_ioda(obsspace_to_convert): logger.info(f"running run_bufr_to_ioda on {obsspace_to_convert['name']}") - json_output_file = obsspace_to_convert['conversion config file'] + bufrconv_yaml = obsspace_to_convert['conversion config file'] bufr2iodapy = obsspace_to_convert['bufr2ioda converter'] try: - subprocess.run(['python', bufr2iodapy, '-c', json_output_file, '-v'], check=True) - logger.info(f"ran ioda converter on obs space {obsspace_to_convert['name']} successfully") + subprocess.run(['python', bufr2iodapy, '-c', bufrconv_yaml], check=True) return 0 except subprocess.CalledProcessError as e: - logger.warning(f"bufr2ioda converter failed with error {e}, \ + logger.warning(f"bufr2ioda converter failed with error >{e}<, \ return code {e.returncode}") return e.returncode