diff --git a/src/io/ParameterMap.cpp b/src/io/ParameterMap.cpp index 6f19c129b..7de922f47 100644 --- a/src/io/ParameterMap.cpp +++ b/src/io/ParameterMap.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "../global/global.h" // MAXLEN #include "../io/io.h" // chprintf @@ -92,77 +94,307 @@ param_details::TypeErr param_details::try_string_(const std::string& str, std::s return param_details::TypeErr::none; } -/*! \brief Gets rid of trailing and leading whitespace. */ -static char* my_trim(char* s) +namespace +{ // stuff inside an anonymous namespace is local to this file + +/*! Helper class that specifes the parts of a string correspond to the key and the value */ +struct KeyValueViews { + std::string_view key; + std::string_view value; +}; + +/*! \brief Try to extract the parts of nul-terminated c-string that refers to a parameter name + * and a parameter value. If there are any issues, views will be empty optional is returned. */ +KeyValueViews Try_Extract_Key_Value_View(const char* buffer) { - /* Initialize start, end pointers */ - char *s1 = s, *s2 = &s[strlen(s) - 1]; + // create a view that wraps the full buffer (there aren't any allocations) + std::string_view full_view(buffer); + + // we explicitly mimic the old behavior - /* Trim and delimit right side */ - while ((std::isspace(*s2)) && (s2 >= s1)) { - s2--; + // find the position of the equal sign + std::size_t pos = full_view.find('='); + + // handle the edge-cases (where we can't parse a key-value pair) + if ((pos == 0) or // '=' sign is the first character + ((pos + 1) == full_view.size()) or // '=' sign is the last character + (pos == std::string_view::npos)) { // there is no '=' sign + return {std::string_view(), std::string_view()}; } - *(s2 + 1) = '\0'; + return {full_view.substr(0, pos), full_view.substr(pos + 1)}; +} +void rstrip(std::string_view& s) +{ + std::size_t cur_len = s.size(); + while ((cur_len > 0) and std::isspace(s[cur_len - 1])) { + cur_len--; + } + if (cur_len < s.size()) s = s.substr(0, cur_len); +} + +/*! \brief Modifies the string_view to remove trailing and leading whitespace. + * + * \note + * Since this is a string_view, we don't actually mutate any characters + */ +void my_trim(std::string_view& s) +{ /* Trim left side */ - while ((std::isspace(*s1)) && (s1 < s2)) { - s1++; + std::size_t start = 0; + const std::size_t max_start = s.size(); + while ((start < max_start) and std::isspace(s[start])) { + start++; } + if (start > 0) s = s.substr(start); - /* Copy finished string */ - std::strcpy(s, s1); - return s; + rstrip(s); } -ParameterMap::ParameterMap(std::FILE* fp, int argc, char** argv) -{ - int buf; +/*! \brief Object used to read in lines from a `FILE*` + * + * This primarily exists to help with error/warning formatting + */ +struct FileLineStream { + long long line_num; + FILE* fp; char *s, buff[256]; + FileLineStream(FILE* fp) : line_num{-1}, fp{fp}, s{nullptr} {} + + bool next() + { + this->line_num++; + return (this->s = fgets(this->buff, sizeof this->buff, this->fp)) != NULL; + } + + // formats a slightly more generic error + [[noreturn]] void error(std::string reason) const + { + std::string msg = "parsing problem\n"; + // specify the location + msg += this->location_description_(); + // include the reason + msg += " err: "; + msg += reason; + throw std::runtime_error(msg); + } + + // more-detailed error formatting + [[noreturn]] void error(std::string reason, std::string full_name, bool is_table_header) const + { + std::string msg = (is_table_header) ? "table-header parsing problem\n" : "parameter-name parsing problem\n"; + // give table/parameter name + msg += (is_table_header) ? " table-name: " : " full-parameter-name: "; + msg += full_name; + msg += '\n'; + // specify the location + msg += this->location_description_(); + // include the reason + msg += " err: "; + msg += reason; + throw std::runtime_error(msg); + } + + void warn(std::string reason) const + { + std::string msg = "parameter-file parsing warning\n"; + std::string location_description = this->location_description_(); + chprintf("parameter-file parsing warning\n%s message:%s\n", location_description.c_str(), reason.c_str()); + } + + private: + std::string location_description_() const + { + std::string out = " on line "; + out += std::to_string(this->line_num); + out += "of the parameter file: "; + out += this->s; + out += '\n'; + return out; + } +}; + +struct CliLineStream { + char** next_arg; + char** stop; + char* s; + + CliLineStream(int argc, char** argv) : next_arg{argv}, stop{argv + argc}, s{nullptr} {}; + + bool next() + { + if (this->next_arg == this->stop) return false; + this->s = *this->next_arg; + this->next_arg++; + return true; + } + + [[noreturn]] void error(std::string reason, std::string full_name, bool is_table_header) const + { + CHOLLA_ASSERT(not is_table_header, "something went very wrong - can't parse table header from cli"); + + std::string msg = "parameter-name parsing problem\n"; + // give table/parameter name + msg += " full-parameter-name: "; + msg += full_name; + msg += '\n'; + // specify the location + msg += " from cli arg: "; + msg += s; + // include the reason + msg += " err: "; + msg += reason; + throw std::runtime_error(msg); + } +}; + +/*! Helper function used to handle some parsing-related tasks that come up when considering the + * full name of a parameter-table or a parameter-key. + * + * This does the following: + * 1. Validates the full_name only contains allowed characters + * 2. for a name "a.b.c.d", we step through the "a.b.c", "a.b", and "a" to + * (i) confirm that no-segment is empty + * (ii) ensure that the part is registered as a table + * (iii) ensure that the part does not collide with the name of a parameter + * + * \returns An empty string if there aren't any problems. Otherwise the returned string provides an error message + */ +std::string Process_Full_Name(std::string full_name, std::map>& full_table_map, + const std::map& param_entries) +{ + // first, confirm the name only holds valid characters + std::size_t bad_value_count = 0; + for (char ch : full_name) { + bad_value_count += ((ch != '.') and (ch != '_') and (ch != '-') and not std::isalnum(ch)); + } + if (bad_value_count > 0) { + return "contains an unallowed character"; + } + + // now lets step through the parts of a name (delimited by the '.') + // -> for a name "a.b.c.d", we check "a.b.c", "a.b", and "a" + // -> specifically, we (i) confirm that no-segment is empty + // (ii) ensure that the table is registered + // (iii) ensure there aren't any collisions with parameter-names + const std::size_t size_minus_1 = full_name.size() - 1; + std::size_t rfind_start = size_minus_1; + while (true) { + const std::size_t pos = full_name.rfind('.', rfind_start); + if (pos == std::string_view::npos) return {}; + if (pos == size_minus_1) return "ends with a '.' character"; + if (pos == 0) return "start with a '.' character"; + if (pos == rfind_start) return "contains contiguous '.' characters"; + + std::string_view table_name_prefix(full_name.data(), pos); + + // if table_name_prefix has been seen before, then we're done (its parents have been seen too) + if (full_table_map.find(table_name_prefix) != full_table_map.end()) return {}; + + // register table_name_prefix for the future + std::string table_name_prefix_str(table_name_prefix); + full_table_map[table_name_prefix_str] = false; // we assign false because it was implicitly declared + + if (param_entries.find(table_name_prefix_str) != param_entries.end()) { + return "the (sub)table name collides with the existing \"" + table_name_prefix_str + "\" parameter"; + } + + rfind_start = pos - 1; + } +} + +} // anonymous namespace + +ParameterMap::ParameterMap(std::FILE* fp, int argc, char** argv) +{ CHOLLA_ASSERT(fp != nullptr, "ParameterMap was passed a nullptr rather than an actual file object"); + FileLineStream file_line_stream(fp); + CliLineStream cli_line_stream(argc, argv); + + // to provide consistent table-related behavior to TOML, we need to track the names of tables to ensure that: + // 1. no table is explicitly defined more than once + // 2. no table name collides with a parameter name. + // + // To accomplish this, we: + // -> we add all explicitly defined table-names (e.g. we add "my-table" if we encounter the [my-table] header). + // The associated value is true if it was explicitly defined + // -> we also tracks implicitly defined tables. An implicitly defined table is associated with a false value. + // A table can be implicitly defined: + // 1. In an explicit definition. For example, the [my.first.table] header implicitly defines the table + // "my.first.table". It also implicitly defines the "my.first" and "my" tables when they don't exist + // 2. In a dotted parameter name. For example `my.table.val=3` defines the `my.table.val` parameter. It also + // implicitly defines the `my.table + // -> std::less<> is here so we can compare perform "heterogenous-lookups." In other words, it lets us check if + // the set contains a string specified in a `std::string_view` instance even though the set internally stores + // `std::string` instances (without heterogenous-lookups we couldn't use std::string_view) + std::map> all_tables; + + std::string cur_table_header{}; /* Read next line */ - while ((s = fgets(buff, sizeof buff, fp)) != NULL) { + while (file_line_stream.next()) { + char* buff = file_line_stream.s; + /* Skip blank lines and comments */ if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') { continue; } - /* Parse name/value pair from line */ - char name[MAXLEN], value[MAXLEN]; - s = std::strtok(buff, "="); - if (s == NULL) { - continue; - } else { - std::strncpy(name, s, MAXLEN); - } - s = std::strtok(NULL, "="); - if (s == NULL) { - continue; - } else { - std::strncpy(value, s, MAXLEN); + if (buff[0] == '[') { // here we are parsing a header like "[my_table]\n" + std::string_view view(buff); + rstrip(view); // strip off trailing whitespace from the view + if (view.back() != ']') file_line_stream.error("problem parsing a parameter-table header"); + cur_table_header = view.substr(1, view.size() - 2); + if (cur_table_header.size() == 0) file_line_stream.error("empty table-names aren't allowed"); + + // confirm that we haven't seen this header before (and that there isn't a parameter with the same name) + auto search = all_tables.find(cur_table_header); + if ((search != all_tables.end()) and (search->second)) { + // when search->second is false, there's no issue (since the table was never explicitly defined) + file_line_stream.error("the same table header can't appear more than once", cur_table_header, true); + } else if (this->entries_.find(cur_table_header) != this->entries_.end()) { + file_line_stream.error("table-name collides with a parameter of the same name"); + } + + std::string msg = Process_Full_Name(cur_table_header, all_tables, this->entries_); + if (not msg.empty()) file_line_stream.error(msg, cur_table_header, true); + + // record that we've seen this header (for future checks) + all_tables[cur_table_header] = true; + + } else { // Parse name/value pair from line + KeyValueViews kv_pair = Try_Extract_Key_Value_View(buff); + if (kv_pair.key.empty()) { + file_line_stream.warn("skipping line due to invalid format (this may become an error in the future)"); + continue; + } + my_trim(kv_pair.value); + + if (kv_pair.key.find('.') != std::string_view::npos) { + file_line_stream.error("parameter-names in the parameter-file aren't currently allowed to contain a '.'"); + } + std::string full_param_name = (not cur_table_header.empty()) ? (cur_table_header + '.') : std::string{}; + full_param_name += std::string(kv_pair.key); + + std::string msg = Process_Full_Name(full_param_name, all_tables, this->entries_); + if (not msg.empty()) file_line_stream.error(msg, full_param_name, false); + entries_[full_param_name] = {std::string(kv_pair.value), false}; } - my_trim(value); - entries_[std::string(name)] = {std::string(value), false}; } // Parse overriding args from command line - for (int i = 0; i < argc; ++i) { - char name[MAXLEN], value[MAXLEN]; - s = std::strtok(argv[i], "="); - if (s == NULL) { - continue; - } else { - std::strncpy(name, s, MAXLEN); - } - s = std::strtok(NULL, "="); - if (s == NULL) { - continue; - } else { - std::strncpy(value, s, MAXLEN); - } - entries_[std::string(name)] = {std::string(value), false}; - chprintf("Override with %s=%s\n", name, value); + while (cli_line_stream.next()) { + // try to parse the argument + KeyValueViews kv_pair = Try_Extract_Key_Value_View(cli_line_stream.s); + if (kv_pair.key.empty()) continue; + my_trim(kv_pair.value); + std::string key_str(kv_pair.key); + std::string msg = Process_Full_Name(key_str, all_tables, this->entries_); + if (not msg.empty()) cli_line_stream.error(msg, key_str, false); + std::string value_str(kv_pair.value); + chprintf("Override with %s=%s\n", key_str.c_str(), value_str.c_str()); + entries_[key_str] = {value_str, false}; } } @@ -185,4 +417,54 @@ int ParameterMap::warn_unused_parameters(const std::set& ignore_par } } return unused_params; +} + +void ParameterMap::Enforce_Table_Content_Uniform_Access_Status(std::string table_name, bool expect_unused) const +{ + // error check: + std::string_view table_name_view(table_name); + if (table_name_view.back() == '.') table_name_view = table_name_view.substr(0, table_name_view.size() - 1); + CHOLLA_ASSERT(table_name_view.size() > 0, "the table_name_view must contain at least one character"); + + std::string prefix = std::string(table_name_view) + '.'; + std::size_t prefix_size = prefix.size(); + + std::string problematic_parameter{}; + for (auto it = entries_.lower_bound(prefix); it != entries_.end(); ++it) { + const std::string& name = it->first; + const ParameterMap::ParamEntry& param_entry = it->second; + if (name.compare(0, prefix_size, prefix) != 0) break; + + if (param_entry.accessed == expect_unused) { + problematic_parameter = name; + break; + } + } + + if (problematic_parameter.size() == 0) return; // no issues! + + // Report the errors: + if (expect_unused) { + CHOLLA_ERROR("Internal Error: the %s shouldn't have been accessed yet", problematic_parameter.c_str()); + } else { + // gather the parameters that have been accessed (for an informative message) + std::string par_list{}; + for (auto it = entries_.lower_bound(prefix); it != entries_.end(); ++it) { + const std::string& name = it->first; + const ParameterMap::ParamEntry& param_entry = it->second; + if (name.compare(0, prefix_size, prefix) != 0) break; + + if (param_entry.accessed) { + par_list += "\n "; + par_list += name; + } + } + + if (par_list.size() > 0) { + CHOLLA_ERROR("Based on the parameter(s):%s\nthe %s parameter should not be present in the parameter file", + par_list.c_str(), problematic_parameter.c_str()); + } + CHOLLA_ERROR("Something is wrong, the %s parameter should not be present in the parameter file", + problematic_parameter.c_str()); + } } \ No newline at end of file diff --git a/src/io/ParameterMap.h b/src/io/ParameterMap.h index a134b039e..4d848ab13 100644 --- a/src/io/ParameterMap.h +++ b/src/io/ParameterMap.h @@ -78,6 +78,7 @@ inline param_details::TypeErr try_int_(const std::string& str, int& val) */ class ParameterMap { + public: struct ParamEntry { std::string param_str; bool accessed; @@ -198,6 +199,13 @@ class ParameterMap } } + /*! Aborts with an error message if one or more of the parameters in the specified table has been used or has not + * been used. The precise details depend on the `expect_unused` argument. + * + * \note + * It may be better if this were a function that operated on ParameterMap rather than a method */ + void Enforce_Table_Content_Uniform_Access_Status(std::string table_name, bool expect_unused) const; + private: // private helper methods /* helper function template that tries to retrieve values associated with a given parameter. * diff --git a/src/io/ParameterMap_tests.cpp b/src/io/ParameterMap_tests.cpp index 14acd381f..717d57964 100644 --- a/src/io/ParameterMap_tests.cpp +++ b/src/io/ParameterMap_tests.cpp @@ -50,7 +50,7 @@ TEST(tALLParameterMap, Methodsize) const char* CONTENTS = R"LITERAL( # My sample parameters tout=50000 -")LITERAL"; +)LITERAL"; DummyFile dummy = DummyFile(CONTENTS); ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr); @@ -69,7 +69,7 @@ n_hydro=10 xmin=-5 mypar=true mypar2=false -")LITERAL"; +)LITERAL"; TEST(tALLParameterMap, Methodhasparam) { @@ -94,7 +94,7 @@ gamma=1.4 init=Disk_3D xmin=-5 mypar=true -")LITERAL"; +)LITERAL"; DummyFile dummy = DummyFile(CONTENTS); ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr); @@ -190,7 +190,7 @@ TEST(tALLParameterMap, Methodwarnunusedparameters) tout=50000 gamma=1.4 mypar=true -")LITERAL"; +)LITERAL"; DummyFile dummy = DummyFile(CONTENTS); ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr); @@ -230,4 +230,189 @@ mypar=true ASSERT_EQ(pmap.warn_unused_parameters({"gamma", "tout"}, false, true), 1); ASSERT_EQ(pmap.warn_unused_parameters({"tout", "gamma", "mypar"}, false, true), 0); ASSERT_EQ(pmap.warn_unused_parameters({"mypar"}, false, true), 0); +} + +// the test code for all of the examples is based on the TOML 1.0.0 specification +// https://toml.io/en/v1.0.0#table + +TEST(tALLParameterMapTables, NoKeys) +{ + const char* CONTENTS = R"LITERAL( +[table] +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr); + ASSERT_EQ(pmap.size(), 0); +} + +TEST(tALLParameterMapTables, simple) +{ + const char* CONTENTS = R"LITERAL( +[table-1] +key1=-1.0 +key2=123 + +[table-2] +key1=-2.0 +key2=456 +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr); + ASSERT_EQ(pmap.size(), 4); + pmap.Enforce_Table_Content_Uniform_Access_Status("table-1", true); + ASSERT_EQ(pmap.value("table-1.key1"), -1.0); + ASSERT_EQ(pmap.value("table-1.key2"), 123); + pmap.Enforce_Table_Content_Uniform_Access_Status("table-1", false); + + pmap.Enforce_Table_Content_Uniform_Access_Status("table-2", true); + ASSERT_EQ(pmap.value("table-2.key1"), -2.0); + ASSERT_EQ(pmap.value("table-2.key2"), 456); + pmap.Enforce_Table_Content_Uniform_Access_Status("table-2", false); +} + +TEST(tALLParameterMapTables, EmptyName) +{ + const char* CONTENTS = R"LITERAL( +[] +key1=-1.0 +key2=123 +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr);); +} + +TEST(tALLParameterMapTables, BadNames) +{ + std::vector bad_names = {"[.]", "[ ]", "[ fdssf]", "[sadfasd ]", "[sda.]", + "[.asd]", "[ssad.sd ]", "[ sdas.sd]", "[sds sdsd]"}; + for (const std::string& table_name : bad_names) { + std::string contents = table_name + "\nkey1=-1.0\n"; + DummyFile dummy = DummyFile(contents.c_str()); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr);); + } +} + +TEST(tALLParameterMapTables, ForbidTableRedefine) +{ + const char* CONTENTS = R"LITERAL( +[fruit] +apple=1 + +[fruit] +orange=2 +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr);); +} + +TEST(tALLParameterMapTables, DontRedefineTable) +{ + const char* CONTENTS = R"LITERAL( +[fruit] +apple=1 + +[fruit] +orange=2 +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr);); +} + +TEST(tALLParameterMapTables, ForbidTableKeyNameCollision) +{ + const char* CONTENTS = R"LITERAL( +[fruit] +apple=1 + +[fruit.apple] +texture=2 +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr);); +} + +TEST(tALLParameterMapTables, TopLevelTableAndTable) +{ + const char* CONTENTS = R"LITERAL( +# these parameters are in the top-level table +key1=-1.0 +key2=100.0 + +[table] +key1=-2.0 +key2=456 +)LITERAL"; + DummyFile dummy = DummyFile(CONTENTS); + ParameterMap pmap = ParameterMap(dummy.fp, 0, nullptr); + ASSERT_EQ(pmap.size(), 4); + ASSERT_EQ(pmap.value("key1"), -1.0); + ASSERT_EQ(pmap.value("key2"), 100.0); + ASSERT_EQ(pmap.value("table.key1"), -2.0); + ASSERT_EQ(pmap.value("table.key2"), 456); +} + +// for now we forbid dotted keys from being specified inside a parameter file +// (the following cases are technically supported by TOML) +TEST(tALLParameterMapTables, TemporaryForbiddenDottedKeys) +{ + const char* CONTENTS1 = R"LITERAL( +fruit.apple.color=3 +# Defines a table named fruit +# Defines a table named fruit.apple + +fruit.apple.taste.sweet=true +# Defines a table named fruit.apple.taste +# fruit and fruit.apple were already created +)LITERAL"; + DummyFile dummy1 = DummyFile(CONTENTS1); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy1.fp, 0, nullptr);); + + const char* CONTENTS2 = R"LITERAL( +[fruit] +apple.color=1 +apple.taste.sweet=true + +# you should be able to add sub-tables +[fruit.apple.texture] +smooth=true +)LITERAL"; + DummyFile dummy2 = DummyFile(CONTENTS2); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy2.fp, 0, nullptr);); +} + +// the following tests check cases that toml forbids related to dotted-keys +// (because these scenarios redefine tables) +TEST(tALLParameterMapTables, ForbiddenRedefineTableDottedKeys) +{ + const char* CONTENTS1 = R"LITERAL( +[fruit] +apple.color=1 +apple.taste.sweet=true + +[fruit.apple] +)LITERAL"; + DummyFile dummy1 = DummyFile(CONTENTS1); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy1.fp, 0, nullptr);); + + const char* CONTENTS2 = R"LITERAL( +[fruit] +apple.color=1 +apple.taste.sweet=true + +[fruit.apple.taste] +)LITERAL"; + DummyFile dummy2 = DummyFile(CONTENTS2); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy2.fp, 0, nullptr);); + + // it's a little less clear that this last case should correspond to a failure + // based on the wording on the toml site, it sounds like it should be disallowed + const char* CONTENTS3 = R"LITERAL( +[fruit.apple.taste] +sweet=true + +[fruit] +apple.taste.soure=false +)LITERAL"; + DummyFile dummy3 = DummyFile(CONTENTS3); + ASSERT_ANY_THROW(ParameterMap pmap = ParameterMap(dummy3.fp, 0, nullptr);); } \ No newline at end of file