diff --git a/components/eamxx/src/share/io/scream_io_utils.cpp b/components/eamxx/src/share/io/scream_io_utils.cpp index 4a0beb813b7..9ff7a641958 100644 --- a/components/eamxx/src/share/io/scream_io_utils.cpp +++ b/components/eamxx/src/share/io/scream_io_utils.cpp @@ -4,6 +4,7 @@ #include "share/util/scream_utils.hpp" #include +#include namespace scream { @@ -11,37 +12,46 @@ std::string find_filename_in_rpointer ( const std::string& filename_prefix, const bool model_restart, const ekat::Comm& comm, - const util::TimeStamp& run_t0) + const util::TimeStamp& run_t0, + const OutputAvgType avg_type, + const IOControl& control) { std::string filename; bool found = false; std::string content; std::string suffix = model_restart ? ".r." : ".rhist."; + std::string pattern_str = filename_prefix + suffix; + + // The AD will pass a default constructed control, since it doesn't know the values + // of REST_N/REST_OPTION used in the previous run. Also, model restart is *always* INSTANT. + if (model_restart) { + EKAT_REQUIRE_MSG (avg_type==OutputAvgType::Instant, + "Error! Model restart output should have INSTANT avg type.\n" + " - input avg_type: " + e2str(avg_type) + "\n"); + pattern_str += e2str(OutputAvgType::Instant) + R"(.n(step|sec|min|hour|day|month|year)s_x\d+)"; + } else { + EKAT_REQUIRE_MSG (control.output_enabled(), + "Error! When restarting an output stream, we need a valid IOControl structure.\n" + " - filename prefix: " + filename_prefix + "\n"); + pattern_str += e2str(avg_type) + "." + control.frequency_units + "_x" + std::to_string(control.frequency); + } + pattern_str += "." + run_t0.to_string() + ".nc"; + std::regex pattern (pattern_str); + if (comm.am_i_root()) { std::ifstream rpointer_file; + std::string line; rpointer_file.open("rpointer.atm"); - // If the timestamp is in the filename, then the filename ends with "S.nc", - // with S being the string representation of the timestamp - auto ts_len = run_t0.to_string().size(); - auto extract_ts = [&] (const std::string& line) -> util::TimeStamp { - auto min_size = ts_len+3; - if (line.size()>=min_size) { - auto ts_str = line.substr(line.size()-min_size,ts_len); - auto ts = util::str_to_time_stamp(ts_str); - return ts; - } else { - return util::TimeStamp(); - } - }; - - while ((rpointer_file >> line) and not found) { + while (std::getline(rpointer_file,line)) { content += line + "\n"; - found = line.find(filename_prefix+suffix) != std::string::npos && - extract_ts(line)==run_t0; - filename = line; + if (std::regex_match(line,pattern)) { + filename = line; + found = true; + break; + } } } @@ -52,18 +62,23 @@ std::string find_filename_in_rpointer ( if (not found) { broadcast_string(content,comm,comm.root_rank()); - // If the history restart file is not found, it must be because the last - // model restart step coincided with a model output step, in which case - // a restart history file is not written. - // If that's the case, *disable* output restart, by setting - // 'Restart'->'Perform Restart' = false - // in the input parameter list - EKAT_ERROR_MSG ( - "Error! Restart requested, but no restart file found in 'rpointer.atm'.\n" - " restart filename prefix: " + filename_prefix + "\n" - " restart file type: " + std::string(model_restart ? "model restart" : "history restart") + "\n" - " run t0 : " + run_t0.to_string() + "\n" - " rpointer content:\n" + content); + if (model_restart) { + EKAT_ERROR_MSG ( + "Error! Restart requested, but no model restart file found in 'rpointer.atm'.\n" + " model restart filename prefix: " + filename_prefix + "\n" + " run t0 : " + run_t0.to_string() + "\n" + " rpointer content:\n" + content + "\n\n"); + } else { + EKAT_ERROR_MSG ( + "Error! Restart requested, but no history restart file found in 'rpointer.atm'.\n" + " hist restart filename prefix: " + filename_prefix + "\n" + " run t0 : " + run_t0.to_string() + "\n" + " avg_type : " + e2str(avg_type) + "\n" + " output freq : " + std::to_string(control.frequency) + "\n" + " output freq units: " + control.frequency_units + "\n" + " rpointer content:\n" + content + "\n\n" + " Did you change output specs (avg type, freq, or freq units) across restart? If so, please, remember that it is not allowed.\n"); + } } // Have the root rank communicate the nc filename diff --git a/components/eamxx/src/share/io/scream_io_utils.hpp b/components/eamxx/src/share/io/scream_io_utils.hpp index efb2a4fd65b..01bc46e1e60 100644 --- a/components/eamxx/src/share/io/scream_io_utils.hpp +++ b/components/eamxx/src/share/io/scream_io_utils.hpp @@ -1,6 +1,7 @@ #ifndef SCREAM_IO_UTILS_HPP #define SCREAM_IO_UTILS_HPP +#include "scream_io_control.hpp" #include "share/util/scream_time_stamp.hpp" #include @@ -59,11 +60,17 @@ inline OutputAvgType str2avg (const std::string& s) { return OAT::Invalid; } +// The AD will pass a default constructed control, since it doesn't know the values +// of REST_N/REST_OPTION used in the previous run +// Output streams MUST pass a valid control structure, cause we need to differentiate +// between, e.g., streams with same filename prefix, but different output freq specs std::string find_filename_in_rpointer ( - const std::string& casename, + const std::string& filename_prefix, const bool model_restart, const ekat::Comm& comm, - const util::TimeStamp& run_t0); + const util::TimeStamp& run_t0, + const OutputAvgType avg_type = OutputAvgType::Instant, + const IOControl& control = {}); struct LongNames { diff --git a/components/eamxx/src/share/io/scream_output_manager.cpp b/components/eamxx/src/share/io/scream_output_manager.cpp index b24b473d0b1..7797a0f76bd 100644 --- a/components/eamxx/src/share/io/scream_output_manager.cpp +++ b/components/eamxx/src/share/io/scream_output_manager.cpp @@ -171,7 +171,9 @@ setup (const ekat::Comm& io_comm, const ekat::ParameterList& params, if (perform_history_restart) { using namespace scorpio; - auto rhist_file = find_filename_in_rpointer(hist_restart_filename_prefix,false,m_io_comm,m_run_t0); + IOFileSpecs hist_restart_specs; + hist_restart_specs.ftype = FileType::HistoryRestart; + auto rhist_file = find_filename_in_rpointer(hist_restart_filename_prefix,false,m_io_comm,m_run_t0,m_avg_type,m_output_control); scorpio::register_file(rhist_file,scorpio::Read); // From restart file, get the time of last write, as well as the current size of the avg sample @@ -196,22 +198,8 @@ setup (const ekat::Comm& io_comm, const ekat::ParameterList& params, // We do NOT allow changing output specs across restart. If you do want to change // any of these, you MUST start a new output stream (e.g., setting 'Perform Restart: false') - auto old_freq = scorpio::get_attribute(rhist_file,"GLOBAL","averaging_frequency"); - EKAT_REQUIRE_MSG (old_freq == m_output_control.frequency, - "Error! Cannot change frequency when performing history restart.\n" - " - old freq: " << old_freq << "\n" - " - new freq: " << m_output_control.frequency << "\n"); - auto old_freq_units = scorpio::get_attribute(rhist_file,"GLOBAL","averaging_frequency_units"); - EKAT_REQUIRE_MSG (old_freq_units == m_output_control.frequency_units, - "Error! Cannot change frequency units when performing history restart.\n" - " - old freq units: " << old_freq_units << "\n" - " - new freq units: " << m_output_control.frequency_units << "\n"); - auto old_avg_type = scorpio::get_attribute(rhist_file,"GLOBAL","averaging_type"); - EKAT_REQUIRE_MSG (old_avg_type == e2str(m_avg_type), - "Error! Cannot change avg type when performing history restart.\n" - " - old avg type: " << old_avg_type + "\n" - " - new avg type: " << e2str(m_avg_type) << "\n"); - + // NOTE: we do not check that freq/freq_units/avg_type are not changed: since we used + // that info to find the correct rhist file, we already know that they match! auto old_storage_type = scorpio::get_attribute(rhist_file,"GLOBAL","file_max_storage_type"); EKAT_REQUIRE_MSG (old_storage_type == e2str(m_output_file_specs.storage.type), "Error! Cannot change file storage type when performing history restart.\n" diff --git a/components/eamxx/src/share/io/tests/io_utils.cpp b/components/eamxx/src/share/io/tests/io_utils.cpp index 77779307a72..5ee23d75b37 100644 --- a/components/eamxx/src/share/io/tests/io_utils.cpp +++ b/components/eamxx/src/share/io/tests/io_utils.cpp @@ -9,6 +9,9 @@ TEST_CASE ("find_filename_in_rpointer") { using namespace scream; + constexpr auto AVG = OutputAvgType::Average; + constexpr auto INST = OutputAvgType::Instant; + ekat::Comm comm(MPI_COMM_WORLD); util::TimeStamp t0({2023,9,7},{12,0,0}); @@ -17,21 +20,33 @@ TEST_CASE ("find_filename_in_rpointer") { // Create a dummy rpointer std::ofstream rpointer ("rpointer.atm"); - rpointer << "foo.r." + t0.to_string() + ".nc\n"; - rpointer << "bar2.rhist." + t0.to_string() + ".nc\n"; - rpointer << "bar.rhist." + t0.to_string() + ".nc\n"; - rpointer.close(); + IOControl foo_c, bar_c, bar2_c; + foo_c.frequency = 3; foo_c.frequency_units = "nsteps"; + foo_c.frequency = 1; foo_c.frequency_units = "ndays"; + foo_c.frequency = 5; foo_c.frequency_units = "nhours"; - // Now test find_filename_in_rpointer with different inputs + std::string foo_fname = "foo.r.INSTANT.nsteps_x3." + t0.to_string() + ".nc"; + std::string bar_fname = "bar.rhist.AVERAGE.ndays_x1." + t0.to_string() + ".nc"; + std::string bar2_fname = "bar.rhist.AVERAGE.nhours_x6." + t0.to_string() + ".nc"; - REQUIRE_THROWS (find_filename_in_rpointer("baz",false,comm,t0)); // wrong prefix - REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t1)); // wrong timestamp - REQUIRE_THROWS (find_filename_in_rpointer("bar",true, comm,t0)); // bar is not model restart - REQUIRE_THROWS (find_filename_in_rpointer("foo",false,comm,t0)); // foo is model restart + rpointer << foo_fname<< "\n"; + rpointer << bar_fname<< "\n"; + rpointer << bar2_fname << "\n"; + rpointer.close(); - REQUIRE (find_filename_in_rpointer("bar", false,comm,t0)==("bar.rhist."+t0.to_string()+".nc")); - REQUIRE (find_filename_in_rpointer("bar2",false,comm,t0)==("bar2.rhist."+t0.to_string()+".nc")); - REQUIRE (find_filename_in_rpointer("foo", true, comm,t0)==("foo.r."+t0.to_string()+".nc")); + // Now test find_filename_in_rpointer with different inputs + REQUIRE_THROWS (find_filename_in_rpointer("baz",false,comm,t0,AVG)); // missing control (needed for rhist files) + REQUIRE_THROWS (find_filename_in_rpointer("baz",false,comm,t0,AVG,bar_c)); // wrong prefix + REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t1,AVG,bar_c)); // wrong timestamp + REQUIRE_THROWS (find_filename_in_rpointer("bar",true, comm,t0,AVG,bar_c)); // bar is not model restart + REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t0,INST,bar_c)); // wrong avg type + REQUIRE_THROWS (find_filename_in_rpointer("bar",false,comm,t0,INST,bar2_c)); // wrong freq specs + REQUIRE_THROWS (find_filename_in_rpointer("foo",false,comm,t0,INST,foo_c)); // foo is model restart + REQUIRE_THROWS (find_filename_in_rpointer("foo",true,comm,t0,AVG)); // model restart MUST be INSTANT + + REQUIRE (find_filename_in_rpointer("bar",false,comm,t0,AVG,bar_c)==bar_fname); + REQUIRE (find_filename_in_rpointer("bar",false,comm,t0,AVG,bar2_c)==bar2_fname); + REQUIRE (find_filename_in_rpointer("foo",true, comm,t0)==foo_fname); } TEST_CASE ("io_control") {