You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Not sure if this is something I should have known, but I have recently been surprised to see that different save_dt parameter values produce different results even when the seed is fixed. I think this is a bug (only RNG seed should control model values). Below is a test I think could be added to the test class TestModelIsReproducible within test_consistent_outputs.py that demonstrates the issue I have run into. The final assertion, which checks to see if the final topography grids are the same, fails. The test can be seen actively failing here.
def test_same_models_diff_save_dt(self, tmp_path):
"""Test models that have same parameters but different save_dt."""
file_name = 'user_parameters.yaml'
p, f = utilities.create_temporary_file(tmp_path, file_name)
utilities.write_parameter_to_file(f, 'out_dir', tmp_path / 'out_dir')
utilities.write_parameter_to_file(f, 'Length', 1000.0)
utilities.write_parameter_to_file(f, 'Width', 2000.0)
utilities.write_parameter_to_file(f, 'seed', 1)
utilities.write_parameter_to_file(f, 'verbose', 1)
utilities.write_parameter_to_file(f, 'dx', 50.0)
utilities.write_parameter_to_file(f, 'L0_meters', 150.0)
utilities.write_parameter_to_file(f, 'N0_meters', 250.0)
utilities.write_parameter_to_file(f, 'f_bedload', 0.2)
utilities.write_parameter_to_file(f, 'save_eta_grids', True)
utilities.write_matrix_to_file(f, ['save_dt'], [[0, 50000]])
f.close()
# use preprocessor to run models
pp = preprocessor.Preprocessor(input_file=p, timesteps=2)
pp.run_jobs()
# look at outputs
ModelA = Dataset(os.path.join(str(pp._file_list[0])[:-12],
'pyDeltaRCM_output.nc'), 'r', format='NETCDF4')
ModelB = Dataset(os.path.join(str(pp._file_list[1])[:-12],
'pyDeltaRCM_output.nc'), 'r', format='NETCDF4')
# check attributes of the netCDFs
# final eta grid has same LxW shape
assert ModelA['eta'][-1, :, :].shape == ModelB['eta'][-1, :, :].shape
assert ModelA.variables.keys() == ModelB.variables.keys()
# check a few pieces of metadata
assert ModelA['meta']['L0'][:] == ModelB['meta']['L0'][:]
assert ModelA['meta'].variables.keys() == ModelB['meta'].variables.keys()
# final time should be the same
assert ModelA['time'][-1].data == ModelB['time'][-1].data
# final eta grids should be the same
assert np.all(ModelA['eta'][-1, :, :].data == ModelB['eta'][-1, :, :].data)
# close netCDF output files
ModelA.close()
ModelB.close()
If a similar test is run with a seed matrix that has the same value twice (save_dt is the same between the two runs), then it passes and the runs are identical. This suggests to me that save_dt is the culprit and is somehow causing runs with identical random seeds to diverge. Passing test shown below.
def test_same_models_matrix(self, tmp_path):
"""Test models that have same parameters."""
file_name = 'user_parameters.yaml'
p, f = utilities.create_temporary_file(tmp_path, file_name)
utilities.write_parameter_to_file(f, 'out_dir', tmp_path / 'out_dir')
utilities.write_parameter_to_file(f, 'Length', 1000.0)
utilities.write_parameter_to_file(f, 'Width', 2000.0)
utilities.write_parameter_to_file(f, 'verbose', 1)
utilities.write_parameter_to_file(f, 'dx', 50.0)
utilities.write_parameter_to_file(f, 'L0_meters', 150.0)
utilities.write_parameter_to_file(f, 'N0_meters', 250.0)
utilities.write_parameter_to_file(f, 'f_bedload', 0.2)
utilities.write_parameter_to_file(f, 'save_eta_grids', True)
utilities.write_matrix_to_file(f, ['seed'], [[1, 1]])
f.close()
# use preprocessor to run models
pp = preprocessor.Preprocessor(input_file=p, timesteps=2)
pp.run_jobs()
# look at outputs
ModelA = Dataset(os.path.join(str(pp._file_list[0])[:-12],
'pyDeltaRCM_output.nc'), 'r', format='NETCDF4')
ModelB = Dataset(os.path.join(str(pp._file_list[1])[:-12],
'pyDeltaRCM_output.nc'), 'r', format='NETCDF4')
# check attributes of the netCDFs
# final eta grid has same LxW shape
assert ModelA['eta'][-1, :, :].shape == ModelB['eta'][-1, :, :].shape
assert ModelA.variables.keys() == ModelB.variables.keys()
# check a few pieces of metadata
assert ModelA['meta']['L0'][:] == ModelB['meta']['L0'][:]
assert ModelA['meta'].variables.keys() == ModelB['meta'].variables.keys()
# final time should be the same
assert ModelA['time'][-1].data == ModelB['time'][-1].data
# final eta grids should be the same
assert np.all(ModelA['eta'][-1, :, :].data == ModelB['eta'][-1, :, :].data)
# close netCDF output files
ModelA.close()
ModelB.close()
The text was updated successfully, but these errors were encountered:
I think so, they have the same "time" attribute since the assertion
# final time should be the same
assert ModelA['time'][-1].data == ModelB['time'][-1].data
is not throwing an error.
In this test case with 2 timesteps, one model is saving after both, so it does timestep 1 - save - timestep 2 - save, while the other model only saves after 2 timesteps, so it does timestep 1 - timestep 2 - save
Not sure if this is something I should have known, but I have recently been surprised to see that different
save_dt
parameter values produce different results even when the seed is fixed. I think this is a bug (only RNG seed should control model values). Below is a test I think could be added to the test classTestModelIsReproducible
withintest_consistent_outputs.py
that demonstrates the issue I have run into. The final assertion, which checks to see if the final topography grids are the same, fails. The test can be seen actively failing here.If a similar test is run with a
seed
matrix that has the same value twice (save_dt
is the same between the two runs), then it passes and the runs are identical. This suggests to me thatsave_dt
is the culprit and is somehow causing runs with identical random seeds to diverge. Passing test shown below.The text was updated successfully, but these errors were encountered: