diff --git a/test/test_conversion_driver_esm1p5.py b/test/test_conversion_driver_esm1p5.py index cc1af8d..b004971 100644 --- a/test/test_conversion_driver_esm1p5.py +++ b/test/test_conversion_driver_esm1p5.py @@ -220,6 +220,48 @@ def test_convert_esm1p5_output_dir_error(): ) +@pytest.mark.parametrize( + "input_output_pairs, expected_pairs", + [( # input_output_pairs + [(Path("/output000/atmosphere/aiihca.pea1120"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1130"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1140"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1150"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.aiihca.paa1jan"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010101_mon.nc")), + (Path("/output000/atmosphere/aiihca.aiihca.paa1feb"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010102_mon.nc"))], + # Expected pairs + [(Path("/output000/atmosphere/aiihca.aiihca.paa1jan"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010101_mon.nc")), + (Path("/output000/atmosphere/aiihca.aiihca.paa1feb"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010102_mon.nc"))] + ), + ( # input_output_pairs + [(Path("/output000/atmosphere/aiihca.pea1120"), + Path("/dir_1/dir_2/../aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1130"), + Path("/dir_1/aiihca.pe-010101_dai.nc"))], + # Expected pairs + [] + )] +) +def test_filter_naming_collisions(input_output_pairs, expected_pairs): + """ + Test that inputs with overlapping output paths are removed. + """ + with pytest.warns(match="Multiple inputs have same output path"): + filtered_paths = list( + esm1p5_convert.filter_name_collisions(input_output_pairs) + ) + + assert filtered_paths == expected_pairs + + def test_format_successes(): succeeded_inputs = [ Path("dir_1/fake_file_1"), diff --git a/umpost/conversion_driver_esm1p5.py b/umpost/conversion_driver_esm1p5.py index 2a96f9c..b6b226e 100755 --- a/umpost/conversion_driver_esm1p5.py +++ b/umpost/conversion_driver_esm1p5.py @@ -268,6 +268,45 @@ def format_failures(failed, quiet): yield failure_report +def _resolve_path(path): + """ + Resolve path for use in comparison. Ensure that symlinks, relative paths, + and home directories are expanded. + """ + return os.path.realpath(os.path.expanduser(path)) + + +def filter_name_collisions(input_output_pairs): + """ + Remove input/output pairs which have overlapping output paths. + + Parameters + ---------- + input_ouptut_pairs: iterator of tuples (input_path, output_path). + + Yields + ------- + filtered_pairs: (input_path, output_path) tuples with unique + output_path values. + """ + # Convert to list to allow repeated traversal. + input_output_pairs = list(input_output_pairs) + + output_paths = [_resolve_path(output) for _, output in input_output_pairs] + output_counts = collections.Counter(output_paths) + + for input_path, output_path in input_output_pairs: + if output_counts[_resolve_path(output_path)] != 1: + msg = ( + f"Multiple inputs have same output path {output_path}.\n" + f"{input_path} will not be converted." + ) + warnings.warn(msg) + continue + + yield input_path, output_path + + def convert_esm1p5_output_dir(esm1p5_output_dir): """ Driver function for converting ESM1.5 atmospheric outputs during a simulation. @@ -321,6 +360,7 @@ def convert_esm1p5_output_dir(esm1p5_output_dir): output_paths = [get_nc_write_path(path, nc_write_dir, get_ff_date(path)) for path in atm_dir_fields_files] input_output_pairs = zip(atm_dir_fields_files, output_paths) + input_output_pairs = filter_name_collisions(input_output_pairs) succeeded, failed = convert_fields_file_list(input_output_pairs)