From 4d4ba0c8dfbb34984b3ad5f8ab4339faf65f23d7 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 17 Mar 2024 15:35:42 +0000 Subject: [PATCH 1/4] WIP tests etc --- tests/data_testcase_schemas.py | 2 +- .../test_xarray_load_and_save_equivalence.py | 53 +++++++++++++++++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/tests/data_testcase_schemas.py b/tests/data_testcase_schemas.py index f44bb22..dfbc830 100644 --- a/tests/data_testcase_schemas.py +++ b/tests/data_testcase_schemas.py @@ -511,6 +511,6 @@ def standard_testcase(request, session_testdir): # We think Xarray can load ~anything (maybe returning nothing) "load": [], # Xarray can save ~anything - "save": [], + "save": [r"testdata__\testing\test_monotonic_coordinate"], }, } diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index 8f4d340..7ce1982 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -5,6 +5,7 @@ (1) check equivalence of cubes : xarray.load(file) VS xarray.load(ncdata(file)) (2) check equivalence of files : xarray -> file VS xarray->ncdata->file """ +import numpy as np from subprocess import check_output import pytest @@ -52,7 +53,9 @@ def test_load_direct_vs_viancdata( source_filepath = standard_testcase.filepath ncdata = from_nc4(source_filepath) - if standard_testcase.name in BAD_LOADSAVE_TESTCASES["xarray"]["load"]: + excluded_cases = BAD_LOADSAVE_TESTCASES["xarray"]["load"] + excluded_cases.extend(BAD_LOADSAVE_TESTCASES["xarray"]["save"]) + if standard_testcase.name in excluded_cases: pytest.skip("excluded testcase (xarray cannot load)") # _Debug = True @@ -69,15 +72,56 @@ def test_load_direct_vs_viancdata( # Load the testcase with Xarray. xr_ds = xarray.open_dataset(source_filepath, chunks=-1) + # Load same, via ncdata xr_ncdata_ds = to_xarray(ncdata) - # Xarray dataset (variable) comparison is problematic - # result = xr_ncdata_ds.identical(xr_ds) + testvar_names = None + if (standard_testcase.name == "ds_testdata1") or ("toa_brightness" in standard_testcase.name): + testvar_names = ['time'] + elif standard_testcase.name == r"testdata__\lambert_azimuthal_equal_area\euro_air_temp": + testvar_names = ["time", "forecast_reference_time"] + # testvar_names = ["time"] + # testvar_names = ["forecast_reference_time"] + elif "theta_nodal" in standard_testcase.name: + testvar_names = ["Mesh0"] + + if testvar_names: + # print("XR ds") + # print(xr_ds) + # + # print("") + # print("xrds == xrncds ?", xr_ds.identical(xr_ncdata_ds)) + + for testvar_name in testvar_names: + print(f"\nxr_ds['{testvar_name}']:\n", xr_ds[testvar_name]) + print(f"xr_ncdata_ds['{testvar_name}']:\n", xr_ncdata_ds[testvar_name]) + print(f"xr_ds['{testvar_name}'].encoding['units']:\n", xr_ds[testvar_name].encoding.get('units')) + print(f"xr_ncdata_ds['{testvar_name}'].encoding['units']:\n", xr_ncdata_ds[testvar_name].encoding.get('units')) + # Xarray dataset (variable) comparison is problematic + # result = xr_ncdata_ds.identical(xr_ds) + + # do_fix = "none" + # do_fix = "fix_xrds" + # do_fix = "fix_xrncds" + do_fix = "fix_origshape" + if do_fix == "fix_xrncds": + print('\nOLD xrncds data:\n', xr_ncdata_ds[testvar_name].data) + xr_ncdata_ds[testvar_name].data = xr_ncdata_ds[testvar_name].data.compute() + print('NEW xrncds data:\n', xr_ncdata_ds[testvar_name].data) + elif do_fix == "fix_xrds": + import dask.array as da + print('\nOLD xrds data:\n', xr_ds[testvar_name].data) + data = xr_ds[testvar_name].data + xr_ds[testvar_name].data = da.from_array(data, meta=np.ndarray((), dtype=data.dtype), chunks=-1) + print('NEW xrds data:\n', xr_ds[testvar_name].data) + elif do_fix == "fix_origshape": + xr_ncdata_ds[testvar_name].encoding['original_shape'] = () # So for now, save Xarray datasets to disk + compare that way. temp_xr_path = tmp_path / "tmp_out_xr.nc" temp_xr_ncdata_path = tmp_path / "tmp_out_xr_ncdata.nc" + xr_ds.to_netcdf(temp_xr_path) xr_ncdata_ds.to_netcdf(temp_xr_ncdata_path) @@ -110,6 +154,9 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path): ncdata = from_nc4(source_filepath) excluded_testcases = BAD_LOADSAVE_TESTCASES["xarray"]["load"] + excluded_testcases.extend(BAD_LOADSAVE_TESTCASES["xarray"]["save"]) + for excl in excluded_testcases: + print(' ', excl) if any(key in standard_testcase.name for key in excluded_testcases): pytest.skip("excluded testcase") From ab006a82a53826e66cbb753773c097a003266dac Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 17 Mar 2024 16:21:04 +0000 Subject: [PATCH 2/4] Check xrload == xrncload with xr.identical(). --- .../test_xarray_load_and_save_equivalence.py | 73 +------------------ 1 file changed, 2 insertions(+), 71 deletions(-) diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index 7ce1982..f3a877a 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -76,77 +76,8 @@ def test_load_direct_vs_viancdata( # Load same, via ncdata xr_ncdata_ds = to_xarray(ncdata) - testvar_names = None - if (standard_testcase.name == "ds_testdata1") or ("toa_brightness" in standard_testcase.name): - testvar_names = ['time'] - elif standard_testcase.name == r"testdata__\lambert_azimuthal_equal_area\euro_air_temp": - testvar_names = ["time", "forecast_reference_time"] - # testvar_names = ["time"] - # testvar_names = ["forecast_reference_time"] - elif "theta_nodal" in standard_testcase.name: - testvar_names = ["Mesh0"] - - if testvar_names: - # print("XR ds") - # print(xr_ds) - # - # print("") - # print("xrds == xrncds ?", xr_ds.identical(xr_ncdata_ds)) - - for testvar_name in testvar_names: - print(f"\nxr_ds['{testvar_name}']:\n", xr_ds[testvar_name]) - print(f"xr_ncdata_ds['{testvar_name}']:\n", xr_ncdata_ds[testvar_name]) - print(f"xr_ds['{testvar_name}'].encoding['units']:\n", xr_ds[testvar_name].encoding.get('units')) - print(f"xr_ncdata_ds['{testvar_name}'].encoding['units']:\n", xr_ncdata_ds[testvar_name].encoding.get('units')) - # Xarray dataset (variable) comparison is problematic - # result = xr_ncdata_ds.identical(xr_ds) - - # do_fix = "none" - # do_fix = "fix_xrds" - # do_fix = "fix_xrncds" - do_fix = "fix_origshape" - if do_fix == "fix_xrncds": - print('\nOLD xrncds data:\n', xr_ncdata_ds[testvar_name].data) - xr_ncdata_ds[testvar_name].data = xr_ncdata_ds[testvar_name].data.compute() - print('NEW xrncds data:\n', xr_ncdata_ds[testvar_name].data) - elif do_fix == "fix_xrds": - import dask.array as da - print('\nOLD xrds data:\n', xr_ds[testvar_name].data) - data = xr_ds[testvar_name].data - xr_ds[testvar_name].data = da.from_array(data, meta=np.ndarray((), dtype=data.dtype), chunks=-1) - print('NEW xrds data:\n', xr_ds[testvar_name].data) - elif do_fix == "fix_origshape": - xr_ncdata_ds[testvar_name].encoding['original_shape'] = () - - # So for now, save Xarray datasets to disk + compare that way. - temp_xr_path = tmp_path / "tmp_out_xr.nc" - temp_xr_ncdata_path = tmp_path / "tmp_out_xr_ncdata.nc" - - xr_ds.to_netcdf(temp_xr_path) - xr_ncdata_ds.to_netcdf(temp_xr_ncdata_path) - - if _Debug: - print("\n\n-----\nResult ncdump : 'DIRECT' nc4 -> xr -> nc4 ... ") - txt = check_output([f"ncdump {temp_xr_path}"], shell=True).decode() - print(txt) - print( - "\n\n-----\nResult ncdump : 'INDIRECT'' nc4 -> ncdata-> xr -> nc4 ... " - ) - txt = check_output( - [f"ncdump {temp_xr_ncdata_path}"], shell=True - ).decode() - print(txt) - - # FOR NOW: compare with experimental ncdata comparison. - # I know this is a bit circular, but it is useful for debugging, for now ... - result = compare_nc_datasets( - temp_xr_path, - temp_xr_ncdata_path, - check_dims_order=False, - suppress_warnings=True, - ) - if result != []: - assert result == [] + # Treat as OK if it passes xarray comparison + assert xr_ds.identical(xr_ncdata_ds) def test_save_direct_vs_viancdata(standard_testcase, tmp_path): From cf477f3bd95ccb84c65041fd0568b7d4d24b2bf7 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 17 Mar 2024 16:47:37 +0000 Subject: [PATCH 3/4] Drop control chars from exclude. --- tests/data_testcase_schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data_testcase_schemas.py b/tests/data_testcase_schemas.py index dfbc830..26f8c7e 100644 --- a/tests/data_testcase_schemas.py +++ b/tests/data_testcase_schemas.py @@ -511,6 +511,6 @@ def standard_testcase(request, session_testdir): # We think Xarray can load ~anything (maybe returning nothing) "load": [], # Xarray can save ~anything - "save": [r"testdata__\testing\test_monotonic_coordinate"], + "save": [r"test_monotonic_coordinate"], }, } From 370d2b97cc1809a42b475623ae00d27fe4d9c2c9 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 18 Mar 2024 09:14:44 +0000 Subject: [PATCH 4/4] Tidy, remove unused options. --- .../test_xarray_load_and_save_equivalence.py | 67 +++---------------- 1 file changed, 8 insertions(+), 59 deletions(-) diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index f3a877a..9786761 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -5,9 +5,6 @@ (1) check equivalence of cubes : xarray.load(file) VS xarray.load(ncdata(file)) (2) check equivalence of files : xarray -> file VS xarray->ncdata->file """ -import numpy as np -from subprocess import check_output - import pytest import xarray @@ -18,17 +15,15 @@ session_testdir, standard_testcase, ) -from tests.integration.equivalence_testing_utils import ( - adjust_chunks, - set_tiny_chunks, -) - -# Avoid complaints that imported fixtures are "unused" -standard_testcase, session_testdir, adjust_chunks from ncdata.threadlock_sharing import lockshare_context from ncdata.xarray import from_xarray, to_xarray +# Avoid complaints that imported fixtures are "unused" +# TODO: declare fixtures in usual way in pytest config? +standard_testcase, session_testdir + + # _FIX_LOCKS = True _FIX_LOCKS = False @@ -42,34 +37,16 @@ def use_xarraylock(): yield -# _USE_TINY_CHUNKS = True -_USE_TINY_CHUNKS = False -set_tiny_chunks(_USE_TINY_CHUNKS) - - def test_load_direct_vs_viancdata( - standard_testcase, use_xarraylock, adjust_chunks, tmp_path + standard_testcase, use_xarraylock, tmp_path ): source_filepath = standard_testcase.filepath ncdata = from_nc4(source_filepath) - excluded_cases = BAD_LOADSAVE_TESTCASES["xarray"]["load"] - excluded_cases.extend(BAD_LOADSAVE_TESTCASES["xarray"]["save"]) - if standard_testcase.name in excluded_cases: + excluded_testcases = BAD_LOADSAVE_TESTCASES["xarray"]["load"] + if any(key in standard_testcase.name for key in excluded_testcases): pytest.skip("excluded testcase (xarray cannot load)") - # _Debug = True - _Debug = False - if _Debug: - print(f"\ntestcase: {standard_testcase.name}") - print("spec =") - print(standard_testcase.spec) - print("\nncdata =") - print(ncdata) - print("\nncdump =") - txt = check_output([f"ncdump {source_filepath}"], shell=True).decode() - print(txt) - # Load the testcase with Xarray. xr_ds = xarray.open_dataset(source_filepath, chunks=-1) @@ -82,12 +59,9 @@ def test_load_direct_vs_viancdata( def test_save_direct_vs_viancdata(standard_testcase, tmp_path): source_filepath = standard_testcase.filepath - ncdata = from_nc4(source_filepath) excluded_testcases = BAD_LOADSAVE_TESTCASES["xarray"]["load"] excluded_testcases.extend(BAD_LOADSAVE_TESTCASES["xarray"]["save"]) - for excl in excluded_testcases: - print(' ', excl) if any(key in standard_testcase.name for key in excluded_testcases): pytest.skip("excluded testcase") @@ -102,31 +76,6 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path): ncds_fromxr = from_xarray(xrds) to_nc4(ncds_fromxr, temp_ncdata_savepath) - # _Debug = True - _Debug = False - if _Debug: - ncdump_opts = "-h" - # ncdump_opts = "" - txt = f""" - testcase: {standard_testcase.name} - spec = {standard_testcase.spec} - ncdata = ... - {ncdata} - ncdump ORIGINAL TESTCASE SOURCEFILE = - """ - txt += check_output( - [f"ncdump {ncdump_opts} {source_filepath}"], shell=True - ).decode() - txt += "\nncdump DIRECT FROM XARRAY =" - txt += check_output( - [f"ncdump {ncdump_opts} {temp_direct_savepath}"], shell=True - ).decode() - txt += "\nncdump VIA NCDATA =" - txt += check_output( - [f"ncdump {ncdump_opts} {temp_ncdata_savepath}"], shell=True - ).decode() - print(txt) - # Check equivalence results = compare_nc_datasets( temp_direct_savepath,