diff --git a/components/eamxx/src/physics/nudging/tests/nudging_tests.cpp b/components/eamxx/src/physics/nudging/tests/nudging_tests.cpp index 51773cb249d..a5a158f44a1 100644 --- a/components/eamxx/src/physics/nudging/tests/nudging_tests.cpp +++ b/components/eamxx/src/physics/nudging/tests/nudging_tests.cpp @@ -51,7 +51,7 @@ TEST_CASE("nudging_tests") { const int nlevs_fine = 2*nlevs_data -1; // Files names - auto postfix = ".INSTANT.nsteps_x1." + get_t0().to_string() + ".nc"; + auto postfix = ".INSTANT.nsteps_x1.np*." + get_t0().to_string() + ".nc"; auto nudging_data = "nudging_data" + postfix; auto nudging_data_filled = "nudging_data_filled" + postfix; auto map_file = "map_ncol" + std::to_string(ngcols_data) diff --git a/components/eamxx/src/share/io/scream_io_utils.cpp b/components/eamxx/src/share/io/scream_io_utils.cpp index 4a0beb813b7..9318728d657 100644 --- a/components/eamxx/src/share/io/scream_io_utils.cpp +++ b/components/eamxx/src/share/io/scream_io_utils.cpp @@ -2,8 +2,10 @@ #include "share/io/scream_scorpio_interface.hpp" #include "share/util/scream_utils.hpp" +#include "share/scream_config.hpp" #include +#include namespace scream { @@ -11,37 +13,49 @@ std::string find_filename_in_rpointer ( const std::string& filename_prefix, const bool model_restart, const ekat::Comm& comm, - const util::TimeStamp& run_t0) + const util::TimeStamp& run_t0, + const OutputAvgType avg_type, + const IOControl& control) { std::string filename; bool found = false; std::string content; std::string suffix = model_restart ? ".r." : ".rhist."; + std::string pattern_str = filename_prefix + suffix; + + // The AD will pass a default constructed control, since it doesn't know the values + // of REST_N/REST_OPTION used in the previous run. Also, model restart is *always* INSTANT. + if (model_restart) { + EKAT_REQUIRE_MSG (avg_type==OutputAvgType::Instant, + "Error! Model restart output should have INSTANT avg type.\n" + " - input avg_type: " + e2str(avg_type) + "\n"); + pattern_str += e2str(OutputAvgType::Instant) + R"(.n(step|sec|min|hour|day|month|year)s_x\d+)"; + } else { + EKAT_REQUIRE_MSG (control.output_enabled(), + "Error! When restarting an output stream, we need a valid IOControl structure.\n" + " - filename prefix: " + filename_prefix + "\n"); + pattern_str += e2str(avg_type) + "." + control.frequency_units + "_x" + std::to_string(control.frequency); + } + if (is_scream_standalone()) { + pattern_str += ".np" + std::to_string(comm.size()); + } + pattern_str += "." + run_t0.to_string() + ".nc"; + std::regex pattern (pattern_str); + if (comm.am_i_root()) { std::ifstream rpointer_file; + std::string line; rpointer_file.open("rpointer.atm"); - // If the timestamp is in the filename, then the filename ends with "S.nc", - // with S being the string representation of the timestamp - auto ts_len = run_t0.to_string().size(); - auto extract_ts = [&] (const std::string& line) -> util::TimeStamp { - auto min_size = ts_len+3; - if (line.size()>=min_size) { - auto ts_str = line.substr(line.size()-min_size,ts_len); - auto ts = util::str_to_time_stamp(ts_str); - return ts; - } else { - return util::TimeStamp(); - } - }; - - while ((rpointer_file >> line) and not found) { + while (std::getline(rpointer_file,line)) { content += line + "\n"; - found = line.find(filename_prefix+suffix) != std::string::npos && - extract_ts(line)==run_t0; - filename = line; + if (std::regex_search(line,pattern)) { + filename = line; + found = true; + break; + } } } @@ -52,18 +66,29 @@ std::string find_filename_in_rpointer ( if (not found) { broadcast_string(content,comm,comm.root_rank()); - // If the history restart file is not found, it must be because the last - // model restart step coincided with a model output step, in which case - // a restart history file is not written. - // If that's the case, *disable* output restart, by setting - // 'Restart'->'Perform Restart' = false - // in the input parameter list - EKAT_ERROR_MSG ( - "Error! Restart requested, but no restart file found in 'rpointer.atm'.\n" - " restart filename prefix: " + filename_prefix + "\n" - " restart file type: " + std::string(model_restart ? "model restart" : "history restart") + "\n" - " run t0 : " + run_t0.to_string() + "\n" - " rpointer content:\n" + content); + if (model_restart) { + EKAT_ERROR_MSG ( + "Error! Restart requested, but no model restart file found in 'rpointer.atm'.\n" + " model restart filename prefix: " + filename_prefix + "\n" + " model restart filename pattern: " + pattern_str + "\n" + " run t0 : " + run_t0.to_string() + "\n" + " rpointer content:\n" + content + "\n\n"); + } else { + EKAT_ERROR_MSG ( + "Error! Restart requested, but no history restart file found in 'rpointer.atm'.\n" + " hist restart filename prefix: " + filename_prefix + "\n" + " hist restart filename pattern: " + pattern_str + "\n" + " run t0 : " + run_t0.to_string() + "\n" + " avg_type : " + e2str(avg_type) + "\n" + " output freq : " + std::to_string(control.frequency) + "\n" + " output freq units: " + control.frequency_units + "\n" + " rpointer content:\n" + content + "\n\n" + " Did you change output specs (avg type, freq, or freq units) across restart? If so, please, remember that it is not allowed.\n" + " It is also possible you are using a rhist file create before commit 6b7d441330d. That commit changed how rhist file names\n" + " are formed. In particular, we no longer use INSTANT.${REST_OPTION}_x${REST_N}, but we use the avg type, and freq/freq_option\n" + " of the output stream (to avoid name clashes if 2 streams only differ for one of those). If you want to use your rhist file,\n" + " please rename it, so that the avg-type, freq, and freq_option reflect those of the output stream.\n"); + } } // Have the root rank communicate the nc filename diff --git a/components/eamxx/src/share/io/scream_io_utils.hpp b/components/eamxx/src/share/io/scream_io_utils.hpp index efb2a4fd65b..01bc46e1e60 100644 --- a/components/eamxx/src/share/io/scream_io_utils.hpp +++ b/components/eamxx/src/share/io/scream_io_utils.hpp @@ -1,6 +1,7 @@ #ifndef SCREAM_IO_UTILS_HPP #define SCREAM_IO_UTILS_HPP +#include "scream_io_control.hpp" #include "share/util/scream_time_stamp.hpp" #include @@ -59,11 +60,17 @@ inline OutputAvgType str2avg (const std::string& s) { return OAT::Invalid; } +// The AD will pass a default constructed control, since it doesn't know the values +// of REST_N/REST_OPTION used in the previous run +// Output streams MUST pass a valid control structure, cause we need to differentiate +// between, e.g., streams with same filename prefix, but different output freq specs std::string find_filename_in_rpointer ( - const std::string& casename, + const std::string& filename_prefix, const bool model_restart, const ekat::Comm& comm, - const util::TimeStamp& run_t0); + const util::TimeStamp& run_t0, + const OutputAvgType avg_type = OutputAvgType::Instant, + const IOControl& control = {}); struct LongNames { diff --git a/components/eamxx/src/share/io/scream_output_manager.cpp b/components/eamxx/src/share/io/scream_output_manager.cpp index 0b738efe648..4f35827ba3f 100644 --- a/components/eamxx/src/share/io/scream_output_manager.cpp +++ b/components/eamxx/src/share/io/scream_output_manager.cpp @@ -171,7 +171,9 @@ setup (const ekat::Comm& io_comm, const ekat::ParameterList& params, if (perform_history_restart) { using namespace scorpio; - auto rhist_file = find_filename_in_rpointer(hist_restart_filename_prefix,false,m_io_comm,m_run_t0); + IOFileSpecs hist_restart_specs; + hist_restart_specs.ftype = FileType::HistoryRestart; + auto rhist_file = find_filename_in_rpointer(hist_restart_filename_prefix,false,m_io_comm,m_run_t0,m_avg_type,m_output_control); scorpio::register_file(rhist_file,scorpio::Read); // From restart file, get the time of last write, as well as the current size of the avg sample @@ -196,22 +198,8 @@ setup (const ekat::Comm& io_comm, const ekat::ParameterList& params, // We do NOT allow changing output specs across restart. If you do want to change // any of these, you MUST start a new output stream (e.g., setting 'Perform Restart: false') - auto old_freq = scorpio::get_attribute(rhist_file,"GLOBAL","averaging_frequency"); - EKAT_REQUIRE_MSG (old_freq == m_output_control.frequency, - "Error! Cannot change frequency when performing history restart.\n" - " - old freq: " << old_freq << "\n" - " - new freq: " << m_output_control.frequency << "\n"); - auto old_freq_units = scorpio::get_attribute(rhist_file,"GLOBAL","averaging_frequency_units"); - EKAT_REQUIRE_MSG (old_freq_units == m_output_control.frequency_units, - "Error! Cannot change frequency units when performing history restart.\n" - " - old freq units: " << old_freq_units << "\n" - " - new freq units: " << m_output_control.frequency_units << "\n"); - auto old_avg_type = scorpio::get_attribute(rhist_file,"GLOBAL","averaging_type"); - EKAT_REQUIRE_MSG (old_avg_type == e2str(m_avg_type), - "Error! Cannot change avg type when performing history restart.\n" - " - old avg type: " << old_avg_type + "\n" - " - new avg type: " << e2str(m_avg_type) << "\n"); - + // NOTE: we do not check that freq/freq_units/avg_type are not changed: since we used + // that info to find the correct rhist file, we already know that they match! auto old_storage_type = scorpio::get_attribute(rhist_file,"GLOBAL","file_max_storage_type"); EKAT_REQUIRE_MSG (old_storage_type == e2str(m_output_file_specs.storage.type), "Error! Cannot change file storage type when performing history restart.\n" @@ -400,7 +388,7 @@ void OutputManager::run(const util::TimeStamp& timestamp) // Check if we need to open a new file if (not filespecs.is_open) { - filespecs.filename = compute_filename (control,filespecs,timestamp); + filespecs.filename = compute_filename (filespecs,timestamp); // Register all dims/vars, write geometry data (e.g. lat/lon/hyam/hybm) setup_file(filespecs,control); } @@ -613,18 +601,20 @@ long long OutputManager::res_dep_memory_footprint () const { } std::string OutputManager:: -compute_filename (const IOControl& control, - const IOFileSpecs& file_specs, +compute_filename (const IOFileSpecs& file_specs, const util::TimeStamp& timestamp) const { auto filename = m_filename_prefix + file_specs.suffix(); + const auto& control = m_output_control; // Always add avg type and frequency info filename += "." + e2str(m_avg_type); filename += "." + control.frequency_units+ "_x" + std::to_string(control.frequency); - // Optionally, add number of mpi ranks (useful mostly in unit tests, to run multiple MPI configs in parallel) - if (m_params.get("MPI Ranks in Filename")) { + // For standalone EAMxx, we may have 2+ versions of the same test running with two + // different choices of ranks. To avoid name clashing for the output files, + // add the comm size to the output file name. + if (is_scream_standalone()) { filename += ".np" + std::to_string(m_io_comm.size()); } @@ -679,7 +669,6 @@ set_params (const ekat::ParameterList& params, m_filename_prefix = m_params.get("filename_prefix"); // Hard code some parameters in case we access them later - m_params.set("MPI Ranks in Filename",false); m_params.set("Floating Point Precision","real"); } else { auto avg_type = m_params.get("Averaging Type"); @@ -715,12 +704,6 @@ set_params (const ekat::ParameterList& params, "Error! Invalid/unsupported value for 'Floating Point Precision'.\n" " - input value: " + prec + "\n" " - supported values: float, single, double, real\n"); - - // If not set, hard code to false for CIME cases, and true for standalone, - // since standalone may be running multiple versions of the same test at once - if (not m_params.isParameter("MPI Ranks in Filename")) { - m_params.set("MPI Ranks in Filename",is_scream_standalone()); - } } // Output control diff --git a/components/eamxx/src/share/io/scream_output_manager.hpp b/components/eamxx/src/share/io/scream_output_manager.hpp index d25dd037c67..4d7486380cf 100644 --- a/components/eamxx/src/share/io/scream_output_manager.hpp +++ b/components/eamxx/src/share/io/scream_output_manager.hpp @@ -120,8 +120,7 @@ class OutputManager long long res_dep_memory_footprint () const; protected: - std::string compute_filename (const IOControl& control, - const IOFileSpecs& file_specs, + std::string compute_filename (const IOFileSpecs& file_specs, const util::TimeStamp& timestamp) const; void set_file_header(const IOFileSpecs& file_specs); diff --git a/components/eamxx/src/share/io/tests/io_utils.cpp b/components/eamxx/src/share/io/tests/io_utils.cpp index 77779307a72..cf7b66ad859 100644 --- a/components/eamxx/src/share/io/tests/io_utils.cpp +++ b/components/eamxx/src/share/io/tests/io_utils.cpp @@ -9,6 +9,9 @@ TEST_CASE ("find_filename_in_rpointer") { using namespace scream; + constexpr auto AVG = OutputAvgType::Average; + constexpr auto INST = OutputAvgType::Instant; + ekat::Comm comm(MPI_COMM_WORLD); util::TimeStamp t0({2023,9,7},{12,0,0}); @@ -17,21 +20,35 @@ TEST_CASE ("find_filename_in_rpointer") { // Create a dummy rpointer std::ofstream rpointer ("rpointer.atm"); - rpointer << "foo.r." + t0.to_string() + ".nc\n"; - rpointer << "bar2.rhist." + t0.to_string() + ".nc\n"; - rpointer << "bar.rhist." + t0.to_string() + ".nc\n"; - rpointer.close(); + IOControl foo_c, bar_c, bar2_c; + foo_c.frequency = 3; foo_c.frequency_units = "nsteps"; + bar_c.frequency = 1; bar_c.frequency_units = "ndays"; + bar2_c.frequency = 6; bar2_c.frequency_units = "nhours"; - // Now test find_filename_in_rpointer with different inputs + std::string suffix = ".np" + std::to_string(comm.size()) + "." + t0.to_string() + ".nc"; + std::string foo_fname = "foo.r.INSTANT.nsteps_x3" + suffix; + std::string bar_fname = "bar.rhist.AVERAGE.ndays_x1" + suffix; + std::string bar2_fname = "bar.rhist.AVERAGE.nhours_x6" + suffix; - REQUIRE_THROWS (find_filename_in_rpointer("baz",false,comm,t0)); // wrong prefix - REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t1)); // wrong timestamp - REQUIRE_THROWS (find_filename_in_rpointer("bar",true, comm,t0)); // bar is not model restart - REQUIRE_THROWS (find_filename_in_rpointer("foo",false,comm,t0)); // foo is model restart + rpointer << foo_fname<< "\n"; + rpointer << bar_fname<< "\n"; + rpointer << bar2_fname << "\n"; + rpointer.close(); - REQUIRE (find_filename_in_rpointer("bar", false,comm,t0)==("bar.rhist."+t0.to_string()+".nc")); - REQUIRE (find_filename_in_rpointer("bar2",false,comm,t0)==("bar2.rhist."+t0.to_string()+".nc")); - REQUIRE (find_filename_in_rpointer("foo", true, comm,t0)==("foo.r."+t0.to_string()+".nc")); + // Now test find_filename_in_rpointer with different inputs + REQUIRE_THROWS (find_filename_in_rpointer("baz",false,comm,t0,AVG)); // missing control (needed for rhist files) + REQUIRE_THROWS (find_filename_in_rpointer("baz",false,comm,t0,AVG,bar_c)); // wrong prefix + REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t1,AVG,bar_c)); // wrong timestamp + REQUIRE_THROWS (find_filename_in_rpointer("bar",true, comm,t0,AVG,bar_c)); // bar is not model restart + REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t0,INST,bar_c)); // wrong avg type + REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t0,INST,bar2_c)); // wrong freq specs + REQUIRE_THROWS (find_filename_in_rpointer("foo",false,comm,t0,INST,foo_c)); // foo is model restart + REQUIRE_THROWS (find_filename_in_rpointer("foo",true,comm,t0,AVG)); // model restart MUST be INSTANT + + auto test = find_filename_in_rpointer("bar",false,comm,t0,AVG,bar_c); + REQUIRE (find_filename_in_rpointer("bar",false,comm,t0,AVG,bar_c)==bar_fname); + REQUIRE (find_filename_in_rpointer("bar",false,comm,t0,AVG,bar2_c)==bar2_fname); + REQUIRE (find_filename_in_rpointer("foo",true, comm,t0)==foo_fname); } TEST_CASE ("io_control") { diff --git a/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_mam_aci_p3_mam_optics_rrtmgp_mam_drydep/output.yaml b/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_mam_aci_p3_mam_optics_rrtmgp_mam_drydep/output.yaml index f2408ab189a..5922c718e6e 100644 --- a/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_mam_aci_p3_mam_optics_rrtmgp_mam_drydep/output.yaml +++ b/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_mam_aci_p3_mam_optics_rrtmgp_mam_drydep/output.yaml @@ -120,5 +120,4 @@ Fields: output_control: Frequency: ${NUM_STEPS} frequency_units: nsteps - MPI Ranks in Filename: true ... diff --git a/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_p3_mam_optics_rrtmgp/output.yaml b/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_p3_mam_optics_rrtmgp/output.yaml index 559f5158bb6..be4bb16b95d 100644 --- a/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_p3_mam_optics_rrtmgp/output.yaml +++ b/components/eamxx/tests/multi-process/dynamics_physics/mam/homme_shoc_cld_p3_mam_optics_rrtmgp/output.yaml @@ -117,5 +117,4 @@ Fields: output_control: Frequency: ${NUM_STEPS} frequency_units: nsteps - MPI Ranks in Filename: true ... diff --git a/components/eamxx/tests/multi-process/physics_only/mam/optics_rrtmgp/output.yaml b/components/eamxx/tests/multi-process/physics_only/mam/optics_rrtmgp/output.yaml index 24e1dfbb3e7..bdda48ebe45 100644 --- a/components/eamxx/tests/multi-process/physics_only/mam/optics_rrtmgp/output.yaml +++ b/components/eamxx/tests/multi-process/physics_only/mam/optics_rrtmgp/output.yaml @@ -26,5 +26,4 @@ Field Names: output_control: Frequency: ${NUM_STEPS} frequency_units: nsteps - MPI Ranks in Filename: true ... diff --git a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/CMakeLists.txt b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/CMakeLists.txt index 973e58b7da5..62c4b6d2266 100644 --- a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/CMakeLists.txt +++ b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/CMakeLists.txt @@ -19,7 +19,6 @@ CreateUnitTest(create_vert_remap_and_weights "create_vert_remap_and_weights.cpp" # Run a test to setup nudging source data: set (NUM_STEPS 5) set (POSTFIX source_data) -set (ADD_RANKS false) set (ATM_TIME_STEP 300) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input_source_data.yaml ${CMAKE_CURRENT_BINARY_DIR}/input_source_data.yaml) @@ -36,7 +35,6 @@ CreateUnitTestFromExec (shoc_p3_source shoc_p3_nudging set (NUM_STEPS 5) set (ATM_TIME_STEP 300) set (POSTFIX nudged) -set (ADD_RANKS true) set (VERT_TYPE TIME_DEPENDENT_3D_PROFILE) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input_nudging.yaml ${CMAKE_CURRENT_BINARY_DIR}/input_nudging.yaml) diff --git a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/input_nudging.yaml b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/input_nudging.yaml index 2b33d15f9f7..6a4c0003376 100644 --- a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/input_nudging.yaml +++ b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/input_nudging.yaml @@ -14,7 +14,7 @@ atmosphere_processes: p3: max_total_ni: 720.0e3 nudging: - nudging_filenames_patterns: [shoc_p3_source_data_${POSTFIX}.INSTANT.nsteps_x${NUM_STEPS}.${RUN_T0}.nc] + nudging_filenames_patterns: [shoc_p3_source_data_${POSTFIX}.INSTANT.nsteps_x${NUM_STEPS}.np1.${RUN_T0}.nc] nudging_fields: ["T_mid", "qv"] nudging_timescale: 1000 use_nudging_weights: true diff --git a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output.yaml b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output.yaml index e740c5cdf1f..d9b573068ae 100644 --- a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output.yaml +++ b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output.yaml @@ -3,7 +3,6 @@ filename_prefix: shoc_p3_${POSTFIX}_nudged Averaging Type: Instant Max Snapshots Per File: 2 -MPI Ranks in Filename: ${ADD_RANKS} Field Names: - p_mid - T_mid diff --git a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output_remapped.yaml b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output_remapped.yaml index c75a6a862f7..26f53fbe57f 100644 --- a/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output_remapped.yaml +++ b/components/eamxx/tests/multi-process/physics_only/shoc_p3_nudging/output_remapped.yaml @@ -4,7 +4,6 @@ filename_prefix: shoc_p3_${POSTFIX}_nudged_remapped Averaging Type: Instant Max Snapshots Per File: 2 vertical_remap_file: vertical_remap.nc -MPI Ranks in Filename: ${ADD_RANKS} Field Names: - T_mid - qv diff --git a/components/eamxx/tests/single-process/mam/optics/output.yaml b/components/eamxx/tests/single-process/mam/optics/output.yaml index 8ea3d237933..d0c27e04284 100644 --- a/components/eamxx/tests/single-process/mam/optics/output.yaml +++ b/components/eamxx/tests/single-process/mam/optics/output.yaml @@ -13,5 +13,4 @@ Fields: output_control: Frequency: 2 frequency_units: nsteps - MPI Ranks in Filename: true ...