Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Guard against ESM1.5 file naming collisions #133

Merged
merged 2 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions test/test_conversion_driver_esm1p5.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,48 @@ def test_convert_esm1p5_output_dir_error():
)


@pytest.mark.parametrize(
"input_output_pairs, expected_pairs",
[( # input_output_pairs
[(Path("/output000/atmosphere/aiihca.pea1120"),
Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")),
(Path("/output000/atmosphere/aiihca.pea1130"),
Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")),
(Path("/output000/atmosphere/aiihca.pea1140"),
Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")),
(Path("/output000/atmosphere/aiihca.pea1150"),
Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")),
(Path("/output000/atmosphere/aiihca.aiihca.paa1jan"),
Path("/output000/atmosphere/netCDF/aiihca.pa-010101_mon.nc")),
(Path("/output000/atmosphere/aiihca.aiihca.paa1feb"),
Path("/output000/atmosphere/netCDF/aiihca.pa-010102_mon.nc"))],
# Expected pairs
[(Path("/output000/atmosphere/aiihca.aiihca.paa1jan"),
Path("/output000/atmosphere/netCDF/aiihca.pa-010101_mon.nc")),
(Path("/output000/atmosphere/aiihca.aiihca.paa1feb"),
Path("/output000/atmosphere/netCDF/aiihca.pa-010102_mon.nc"))]
),
( # input_output_pairs
[(Path("/output000/atmosphere/aiihca.pea1120"),
Path("/dir_1/dir_2/../aiihca.pe-010101_dai.nc")),
(Path("/output000/atmosphere/aiihca.pea1130"),
Path("/dir_1/aiihca.pe-010101_dai.nc"))],
# Expected pairs
[]
)]
)
def test_filter_naming_collisions(input_output_pairs, expected_pairs):
"""
Test that inputs with overlapping output paths are removed.
"""
with pytest.warns(match="Multiple inputs have same output path"):
filtered_paths = list(
esm1p5_convert.filter_name_collisions(input_output_pairs)
)

assert filtered_paths == expected_pairs


def test_format_successes():
succeeded_inputs = [
Path("dir_1/fake_file_1"),
Expand Down
40 changes: 40 additions & 0 deletions umpost/conversion_driver_esm1p5.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,45 @@ def format_failures(failed, quiet):
yield failure_report


def _resolve_path(path):
"""
Resolve path for use in comparison. Ensure that symlinks, relative paths,
and home directories are expanded.
"""
return os.path.realpath(os.path.expanduser(path))

blimlim marked this conversation as resolved.
Show resolved Hide resolved

def filter_name_collisions(input_output_pairs):
"""
Remove input/output pairs which have overlapping output paths.

Parameters
----------
input_ouptut_pairs: iterator of tuples (input_path, output_path).

Yields
-------
filtered_pairs: (input_path, output_path) tuples with unique
output_path values.
"""
# Convert to list to allow repeated traversal.
input_output_pairs = list(input_output_pairs)

output_paths = [_resolve_path(output) for _, output in input_output_pairs]
output_counts = collections.Counter(output_paths)

for input_path, output_path in input_output_pairs:
if output_counts[_resolve_path(output_path)] != 1:
msg = (
f"Multiple inputs have same output path {output_path}.\n"
f"{input_path} will not be converted."
)
warnings.warn(msg)
continue

yield input_path, output_path


def convert_esm1p5_output_dir(esm1p5_output_dir):
"""
Driver function for converting ESM1.5 atmospheric outputs during a simulation.
Expand Down Expand Up @@ -321,6 +360,7 @@ def convert_esm1p5_output_dir(esm1p5_output_dir):

output_paths = [get_nc_write_path(path, nc_write_dir, get_ff_date(path)) for path in atm_dir_fields_files]
input_output_pairs = zip(atm_dir_fields_files, output_paths)
input_output_pairs = filter_name_collisions(input_output_pairs)

succeeded, failed = convert_fields_file_list(input_output_pairs)

Expand Down
Loading