diff --git a/autotest/test_prms_atmosphere.py b/autotest/test_prms_atmosphere.py index 02add0a3..33c3d1a5 100644 --- a/autotest/test_prms_atmosphere.py +++ b/autotest/test_prms_atmosphere.py @@ -11,7 +11,7 @@ from utils_compare import compare_in_memory, compare_netcdfs # compare in memory (faster) or full output files? or both! -do_compare_output_files = True +do_compare_output_files = False do_compare_in_memory = True rtol = 1.0e-5 atol = 1.0e-5 # why is this relatively low accuracy? diff --git a/autotest/test_prms_canopy.py b/autotest/test_prms_canopy.py index e660dba4..f022f03e 100644 --- a/autotest/test_prms_canopy.py +++ b/autotest/test_prms_canopy.py @@ -9,7 +9,7 @@ from utils_compare import compare_in_memory, compare_netcdfs # compare in memory (faster) or full output files? or both! -do_compare_output_files = True +do_compare_output_files = False do_compare_in_memory = True rtol = atol = 1.0e-5 diff --git a/autotest/test_prms_canopy_runoff.py b/autotest/test_prms_canopy_runoff.py deleted file mode 100644 index 493e3b2c..00000000 --- a/autotest/test_prms_canopy_runoff.py +++ /dev/null @@ -1,145 +0,0 @@ -import pathlib as pl - -import numpy as np -import pytest - -from pywatershed.base.adapter import adapter_factory -from pywatershed.base.control import Control -from pywatershed.hydrology.prms_canopy import PRMSCanopy -from pywatershed.hydrology.prms_runoff import PRMSRunoff -from pywatershed.parameters import PrmsParameters - - -@pytest.fixture(scope="function") -def params(domain): - return PrmsParameters.load(domain["param_file"]) - - -@pytest.fixture(scope="function") -def control(domain): - return Control.load(domain["control_file"]) - - -def test_canopy_runoff(domain, control, params, tmp_path): - tmp_path = pl.Path(tmp_path) - - # get the answer data - - comparison_var_names = [ - "infil", - "dprst_stor_hru", - "hru_impervstor", - "sroff", - ] - output_dir = domain["prms_output_dir"] - - # Read PRMS output into ans for comparison with pywatershed results - ans = {} - for key in comparison_var_names: - nc_pth = output_dir / f"{key}.nc" - ans[key] = adapter_factory(nc_pth, variable_name=key, control=control) - - # instantiate canopy - input_variables = {} - for key in PRMSCanopy.get_inputs(): - nc_pth = output_dir / f"{key}.nc" - input_variables[key] = nc_pth - - canopy = PRMSCanopy( - control=control, - discretization=None, - parameters=params, - **input_variables, - ) - - # instantiate runoff - input_variables = {} - for key in PRMSRunoff.get_inputs(): - nc_pth = output_dir / f"{key}.nc" - if "soil_moist" in str(nc_pth): - nc_pth = output_dir / "soil_moist_prev.nc" - input_variables[key] = nc_pth - input_variables["net_ppt"] = None - input_variables["net_rain"] = None - input_variables["net_snow"] = None - runoff = PRMSRunoff( - control=control, - discretization=None, - parameters=params, - **input_variables, - ) - - # wire up output from canopy as input to runoff - runoff.set_input_to_adapter( - "net_ppt", - adapter_factory(canopy.net_ppt, "net_ppt", control=control), - ) - runoff.set_input_to_adapter( - "net_rain", - adapter_factory(canopy.net_rain, "net_rain", control=control), - ) - runoff.set_input_to_adapter( - "net_snow", - adapter_factory(canopy.net_snow, "net_snow", control=control), - ) - - all_success = True - for istep in range(control.n_times): - control.advance() - canopy.advance() - runoff.advance() - - canopy.calculate(1.0) - runoff.calculate(1.0) - - # advance the answer, which is being read from a netcdf file - for key, val in ans.items(): - val.advance() - - # make a comparison check with answer - check = True - failfast = True - detailed = True - if check: - atol = 1.0e-5 - success = check_timestep_results( - runoff, istep, ans, atol, detailed - ) - if not success: - all_success = False - if failfast: - assert success, "stopping..." - - runoff.finalize() - - # check at the end and error if one or more steps didn't pass - if not all_success: - raise Exception("pywatershed results do not match prms results") - - return - - -def check_timestep_results(storageunit, istep, ans, atol, detailed=False): - all_success = True - for key in ans.keys(): - a1 = ans[key].current - a2 = storageunit[key] - success = np.isclose(a2, a1, atol=atol).all() - if not success: - all_success = False - diff = a1 - a2 - diffmin = diff.min() - diffmax = diff.max() - if True: - print(f"time step {istep}") - print(f"output variable {key}") - print(f"prms {a1.min()} {a1.max()}") - print(f"pywatershed {a2.min()} {a2.max()}") - print(f"diff {diffmin} {diffmax}") - if detailed: - idx = np.where(np.abs(diff) > atol)[0] - for i in idx: - print( - f"hru {i} prms {a1[i]} pywatershed {a2[i]} diff {diff[i]}" - ) - return all_success diff --git a/autotest/test_prms_channel.py b/autotest/test_prms_channel.py index 4cae7c2c..21af0266 100644 --- a/autotest/test_prms_channel.py +++ b/autotest/test_prms_channel.py @@ -55,9 +55,8 @@ def test_compare_prms( ) tmp_path = pl.Path(tmp_path) - - # load csv files into dataframes output_dir = domain["prms_output_dir"] + input_variables = {} for key in PRMSChannel.get_inputs(): nc_path = output_dir / f"{key}.nc" diff --git a/autotest/test_prms_runoff.py b/autotest/test_prms_runoff.py index 831acdbf..82107958 100644 --- a/autotest/test_prms_runoff.py +++ b/autotest/test_prms_runoff.py @@ -9,7 +9,7 @@ from utils_compare import compare_in_memory, compare_netcdfs # compare in memory (faster) or full output files? or both! -do_compare_output_files = False +do_compare_output_files = True do_compare_in_memory = True rtol = atol = 1.0e-10 @@ -88,6 +88,7 @@ def test_compare_prms( control.advance() runoff.advance() runoff.calculate(1.0) + runoff.output() if do_compare_in_memory: compare_in_memory( runoff, answers, atol=atol, rtol=rtol, skip_missing_ans=True diff --git a/autotest/test_prms_snow.py b/autotest/test_prms_snow.py index 73c9a3e7..6a40e386 100644 --- a/autotest/test_prms_snow.py +++ b/autotest/test_prms_snow.py @@ -8,6 +8,12 @@ from pywatershed.constants import epsilon32, zero from pywatershed.hydrology.prms_snow import PRMSSnow from pywatershed.parameters import Parameters, PrmsParameters +from utils_compare import compare_in_memory, compare_netcdfs + +# compare in memory (faster) or full output files? or both! +do_compare_output_files = False +do_compare_in_memory = True +rtol = atol = 1.0e-3 calc_methods = ("numpy", "numba") params = ("params_sep", "params_one") @@ -41,49 +47,51 @@ def test_compare_prms( domain, control, discretization, parameters, tmp_path, calc_method ): tmp_path = pl.Path(tmp_path) - output_dir = domain["prms_output_dir"] # get the answer data comparison_var_names = [ - # # "ai", - # "albedo", - # # "frac_swe", - # # "freeh2o", - # # "iasw", - # # "int_alb", + # 'ai', + # 'albedo', + # 'frac_swe', + # 'freeh2o', + # 'freeh2o_change', + # 'freeh2o_prev', + # 'iasw', + # 'int_alb', "iso", - # # "lso", - # # "lst", - # # "mso", - # # "pk_def", - # # "pk_den", - # # "pk_depth", - # # "pk_ice", - # # "pk_precip", - # # "pk_temp", - # # "pksv", - # "pkwater_ante", -- dosent respect the iso memory eval + # 'lso', + # 'lst', + # 'mso', + # 'newsnow', + # 'pk_def', + # 'pk_den', + # 'pk_depth', + # 'pk_ice', + # 'pk_ice_change', + # 'pk_ice_prev', + # 'pk_precip', + # 'pk_temp', + # 'pksv', + # 'pkwater_ante', "pkwater_equiv", - # "pptmix_nopack", - # # "pss", - # "pst", - # # "salb", - # # "scrv", - # # "slst", + # 'pkwater_equiv_change', + # 'pptmix_nopack', + # 'pss', + # 'pst', + # 'salb', + # 'scrv', + # 'slst', "snow_evap", - # "snowcov_area", # issues on 12-23-1979 in drb - # "snowcov_areasv", - # "snowmelt", # issues on 12-22-1979 in drb - # #"snsv", + # 'snowcov_area', + # 'snowcov_areasv', + # 'snowmelt', + # 'snsv', "tcal", + "through_rain", ] - ans = {} - for key in comparison_var_names: - nc_pth = output_dir / f"{key}.nc" - ans[key] = adapter_factory(nc_pth, variable_name=key, control=control) + output_dir = domain["prms_output_dir"] - # setup the snow input_variables = {} for key in PRMSSnow.get_inputs(): nc_path = output_dir / f"{key}.nc" @@ -94,137 +102,41 @@ def test_compare_prms( discretization, parameters, **input_variables, - budget_type="warn", + budget_type="error", calc_method=calc_method, ) - all_success = True - iso_censor_mask = None - for istep in range(control.n_times): - # for istep in range(10): + if do_compare_output_files: + nc_parent = tmp_path / domain["domain_name"] + snow.initialize_netcdf(nc_parent) - print("\n") - print(f"solving time: {control.current_time}") + if do_compare_in_memory: + answers = {} + for var in comparison_var_names: + var_pth = output_dir / f"{var}.nc" + answers[var] = adapter_factory( + var_pth, variable_name=var, control=control + ) + for istep in range(control.n_times): control.advance() snow.advance() - - snow.calculate(float(istep)) - - # compare along the way - for key, val in ans.items(): - val.advance() - - # pkwater_equiv --- - # first check pkwater_equiv and assert they are with - # their own tolerance (approx float error) - key = "pkwater_equiv" - pkwe_ans = ans[key].current - pkwe = snow[key] - pkwater_absdiff = abs(pkwe - pkwe_ans) - atol = 5e-2 # 5 hundreds of an inch - - close = np.isclose(pkwe, pkwe_ans, atol=atol, rtol=zero) - if iso_censor_mask is None: - assert close.all() - else: - assert (close[~iso_censor_mask]).all() - - # if one of them is "zero" and the other is not, - # then there can be differences in other variables. - # so we'll exclude this case from the comparison of - # other variables as long as the SWEs pass the above test - pkwater_equiv_one_zero = (pkwe <= atol) | (pkwe_ans <= atol) - if iso_censor_mask is not None: - pkwater_equiv_one_zero = np.where( - iso_censor_mask, True, pkwater_equiv_one_zero + snow.calculate(1.0) + snow.output() + if do_compare_in_memory: + compare_in_memory( + snow, answers, atol=atol, rtol=rtol, skip_missing_ans=True ) - print( - f"pkwater_equiv_one_zero.sum(): " f"{pkwater_equiv_one_zero.sum()}" - ) - - # iso --- - # iso has a memory of packwater equiv which can cause about - # a 20% change in albedo based on pkwater differences in the - # noise... AND those differences in albedo can translate into - # longer term differences in other variables. The solution is - # to flag (memory) when iso gets out of snyc over reasonable/small - # pkwater equiv differences no not evaluate those points and - # after pk water has gone to zero for both pywatershed and prms. - # we'll want to report the number of points being censored this - # way at each time. this censor mask has to be used for pkwater - # above - key = "iso" - iso_ans = ans[key].current - iso = snow[key] - iso_absdiff = abs(iso - iso_ans) - - # anytime at least one of them is zero AND there - # are iso differences, then we add these points to the - # iso_censor list - iso_censor_mask_new = pkwater_equiv_one_zero & (iso_absdiff > 0) - - if iso_censor_mask is None: - iso_censor_mask = iso_censor_mask_new - else: - iso_censor_mask = iso_censor_mask | iso_censor_mask_new - - # reset points where near zero and have matching iso - pkwater_both_zero_and_iso = ( - (pkwe <= epsilon32) & (pkwe_ans <= epsilon32) & (iso == iso_ans) - ) - iso_censor_mask = np.where( - pkwater_both_zero_and_iso, False, iso_censor_mask - ) - print(f"iso_censor_mask.sum(): {iso_censor_mask.sum()}") - assert np.where(~iso_censor_mask, iso - iso_ans == 0, True).all() - - censor_comp = iso_censor_mask | pkwater_equiv_one_zero - - # The rest of the variables to be checked - atol = 1e-5 - for key in ans.keys(): - if key in ["pkwater_equiv", "iso"]: - continue - - print(key) - a1 = ans[key].current - a2 = snow[key] - - success = np.allclose(a1, a2, atol=atol, rtol=zero) - atol = 1e-3 - success_prelim = np.isclose(a1, a2, atol=atol) - success = np.where(~censor_comp, success_prelim, True).all() - - if not success: - all_success = False - diff = a1 - a2 - diffmin = diff.min() - diffmax = diff.max() - print(f"time step {istep}") - print(f"output variable: {key}") - print(f"prms {a1.min()} {a1.max()}") - print(f"pywatershed {a2.min()} {a2.max()}") - print(f"diff {diffmin} {diffmax}") - - zz = abs(a2 - a1) - max_diff = np.where(~censor_comp, zz, zero).max() - wh_max_diff = np.where(zz == max_diff) - print(f"wh_max_diff: {wh_max_diff}") - print(f"max diff: {zz[wh_max_diff]}") - print(f"prms: {a1[wh_max_diff]}") - print(f"pywatershed: {a2[wh_max_diff]}") - print( - f"pkwater_absdiff[wh_max_diff]: " - f"{pkwater_absdiff[wh_max_diff]}" - ) - print(f"pkwe[wh_max_diff]: {pkwe[wh_max_diff]}") - print(f"pkwe_ans[wh_max_diff]: {pkwe_ans[wh_max_diff]}") - assert success snow.finalize() - if not all_success: - raise Exception("pywatershed results do not match prms results") + if do_compare_output_files: + compare_netcdfs( + comparison_var_names, + tmp_path / domain["domain_name"], + output_dir, + atol=atol, + rtol=rtol, + ) return diff --git a/autotest/test_prms_soilzone.py b/autotest/test_prms_soilzone.py index d56010c7..a3c7acef 100644 --- a/autotest/test_prms_soilzone.py +++ b/autotest/test_prms_soilzone.py @@ -94,6 +94,7 @@ def test_compare_prms( control.advance() soil.advance() soil.calculate(1.0) + soil.output() if do_compare_in_memory: compare_in_memory( soil, answers, atol=atol, rtol=rtol, skip_missing_ans=True diff --git a/autotest/test_prms_solar_geom.py b/autotest/test_prms_solar_geom.py index f3dcbe2a..10404964 100644 --- a/autotest/test_prms_solar_geom.py +++ b/autotest/test_prms_solar_geom.py @@ -9,10 +9,10 @@ from utils_compare import compare_in_memory, compare_netcdfs # compare in memory (faster) or full output files? or both! -do_compare_output_files = True +do_compare_output_files = False do_compare_in_memory = True -rtol = atol = 1.0e-10 +# rtol = atol = 1.0e-10 atol = rtol = np.finfo(np.float32).resolution params = ("params_sep", "params_one") diff --git a/examples/compare_pws_prms.ipynb b/examples/03_compare_pws_prms.ipynb similarity index 86% rename from examples/compare_pws_prms.ipynb rename to examples/03_compare_pws_prms.ipynb index dfe58089..65609416 100644 --- a/examples/compare_pws_prms.ipynb +++ b/examples/03_compare_pws_prms.ipynb @@ -5,20 +5,27 @@ "id": "2d33f0d3-b07c-4fcb-8fd8-1d23014ec3cb", "metadata": {}, "source": [ - "# Compare PRMS and PWS\n", + "# Compare pywatershed and PRMS\n", "\n", - "This notebook compares PWS (pywatershed) and PRMS runs with input based on how PRMS/NHM domains are setup for testing in the `test_data/` directory of pywtaershed. The domains supplied with the pywatershed repo are `test_data/hru_1`, `test_data/drb_2yr`, and `test_data/ucb_2yr`. For this notebook the following file listing of any of these domain directories gives the critical to the functioning of this notebook.:\n", + "This notebook compares pywatershed (PWS) and PRMS outputs. As part of release of v1.0 this notebook is meant to give users insight on how wimilar results from pywatershed and PRMS are. They are identical or very close in the vast majority of cases. This notebook makes it easy to find when they are not by providing statistics at individual HRUs and timeseries for all HRUs.\n", + "\n", + "Note that this notebook requires an editable install of pywatershed (`pip install -e` in the pywatershed repository root) for the requisite data. PRMS/NHM domains which may be used are in the `test_data/` directory of pywatershed (`hru_1`, `drb_2yr`, and `ucb_2yr`) but any other domain may be used. \n", + "\n", + "## Notes on setting up other domains\n", + "You may want to supply your own domain and see how pywatershed works on it. Here are notes on doing so. Domains must supply the correct, required files in `test_data/your_domain` which are given in this listing:\n", "\n", "```\n", "control.test prcp.cbh sf_data tmax.nc tmin.nc\n", "myparam.param prcp.nc tmax.cbh tmin.cbh\n", "```\n", "\n", - "If you want to supply your own domain (e.g. CONUS), you need all these files to be present in `test_data/some_domain`. Note that the `*.cbh` files must be pre-converted to netcdf for `prcp`, `tmin`, and `tmax`. The `control.test` and `myparam.param` files are used by both PRMS and PWS. The `control.test` files in the repo are specific for being able to run sub-models and includes a nearly maximal amount of model output (time-inefficient for both PRMS and PWS). The stock control files can be found in `test_data/common` there is a file for single-hru domains and multi-hru domains and these are identical (as appropriate) for the domains included in the repository. These, of course, can be modified. For running a CONUS domain, for example, it is desirable to reduce the total amount of output (but this may not allow for PWS sub-models to be run as PRMS dosent necessarily supply all the required fields).\n", + "The `*.cbh` files must be pre-converted to netcdf for `prcp`, `tmin`, and `tmax` and how to do this can be found near the top of notebook 02. The `control.test` and `myparam.param` files are used by both PRMS and PWS. The `control.test` files in the repo are specific for being able to run sub-models and include a nearly maximal amount of model output (time-inefficient for both PRMS and PWS). The stock control files can be found in `test_data/common` there is a file for single-hru domains and multi-hru domains and these are identical (as appropriate) for the domains included in the repository. For running a large domain, for example, it is desirable to reduce the total amount of output (but this may not allow for PWS sub-models to be run as PRMS dosent necessarily supply all the required fields). So you may modify the `control.test` file but take careful note of what options are available in pywatershed as currently only NHM configuration is available.\n", "\n", "The runs of PRMS use double precision binaries produced by the `prms_src/prms5.2.1` source code in the pywatershed repository. The procedure used below is exactly as done in CI for running regression tests against PRMS.\n", "\n", - "All of the code required for plotting below is included so that it can be further tailored to your tastes." + "All of the code required for plotting below is included so that it can be further tailored to your tastes.\n", + "\n", + "## Imports, etc" ] }, { @@ -64,8 +71,6 @@ "import pywatershed as pws\n", "import xarray as xr\n", "\n", - "# pd.options.plotting.backend = \"holoviews\"\n", - "\n", "repo_root = pws.constants.__pywatershed_root__.parent\n", "nb_output_dir = pl.Path(\"./compare_pws_prms\")" ] @@ -87,14 +92,14 @@ "metadata": {}, "outputs": [], "source": [ - "domain_name = \"drb_2yr\" # must be present in test_data/domain_name\n", - "calc_method = \"numba\"\n", - "budget_type = None\n", + "domain_name: str = \"drb_2yr\" # must be present in test_data/domain_name\n", + "calc_method: str = \"numba\"\n", + "budget_type: str = None\n", "\n", - "run_pws = True # run if the output does not exist on disk\n", - "force_pws_run = True ## if it exists on disk, re-run it and overwrite?\n", + "run_prms: bool = True ## always forced/overwrite\n", "\n", - "run_prms = True ## always forced/overwrite" + "run_pws: bool = True # run if the output does not exist on disk\n", + "force_pws_run: bool = True # if it exists on disk, re-run it and overwrite?" ] }, { @@ -172,10 +177,10 @@ "source": [ "if run_pws:\n", " nhm_processes = [\n", - " # pws.PRMSSolarGeometry, # submodles are possible\n", - " # pws.PRMSAtmosphere,\n", - " # pws.PRMSCanopy,\n", - " # pws.PRMSSnow,\n", + " pws.PRMSSolarGeometry, # submodles are possible\n", + " pws.PRMSAtmosphere,\n", + " pws.PRMSCanopy,\n", + " pws.PRMSSnow,\n", " pws.PRMSRunoff,\n", " pws.PRMSSoilzone,\n", " pws.PRMSGroundwater,\n",