diff --git a/src/Cello/CMakeLists.txt b/src/Cello/CMakeLists.txt index e1ad732a93..2ba9050ff1 100644 --- a/src/Cello/CMakeLists.txt +++ b/src/Cello/CMakeLists.txt @@ -87,8 +87,11 @@ endfunction(addUnitTestBinary) #addUnitTestBinary(test_itindex "test_ItIndex.cpp" "") #addUnitTestBinary(test_scalar "test_Scalar.cpp" "") #addUnitTestBinary(test_enzo_units "test_EnzoUnits.cpp" "") -addUnitTestBinary(test_carrcollec test_CArrCollec.cpp "array") +addUnitTestBinary(test_carr_collec "test_CArrCollec.cpp" "array") addUnitTestBinary(test_cello_array "test_CelloArray.cpp" "array") +addUnitTestBinary( + test_string_ind_rd_only_map "test_StringIndRdOnlyMap.cpp" "array" +) #addUnitTestBinary(test_colormap "test_Colormap.cpp" "io") addUnitTestBinary(test_error "test_Error.cpp" "error") #addUnitTestBinary(test_field "test_Field.cpp" "") diff --git a/src/Cello/_array.hpp b/src/Cello/_array.hpp index 502dcb5524..91028f590b 100644 --- a/src/Cello/_array.hpp +++ b/src/Cello/_array.hpp @@ -24,5 +24,6 @@ #include "array_CelloArray.hpp" #include "array_CArrCollec.hpp" +#include "array_StringIndRdOnlyMap.hpp" #endif /* _ARRAY_HPP */ diff --git a/src/Cello/array_CArrCollec.hpp b/src/Cello/array_CArrCollec.hpp index e6c9137a01..9a9830d34b 100644 --- a/src/Cello/array_CArrCollec.hpp +++ b/src/Cello/array_CArrCollec.hpp @@ -9,6 +9,8 @@ #include #include +#include // std::shared_ptr, std::default_delete +#include // std::swap #include #ifndef ARRAY_C_ARR_COLLEC_HPP @@ -50,16 +52,48 @@ namespace detail { //---------------------------------------------------------------------- template - class VecBackedCArrCollec_{ - /// equivalent to an array of pointers + struct SharedBuffer_{ + + SharedBuffer_() = default; + + SharedBuffer_(const std::vector &v) noexcept + : arr_(std::shared_ptr(new T[v.size()](), std::default_delete())), + length_(v.size()) + { for (std::size_t i = 0; i < length_; i++) { arr_.get()[i] = v[i]; } } + + const T& operator[](std::size_t i) const noexcept { return arr_.get()[i]; } + T& operator[](std::size_t i) noexcept { return arr_.get()[i]; } + std::size_t size() const noexcept {return length_;} + + void swap(SharedBuffer_& other) noexcept + { + this->arr_.swap(other.arr_); + std::swap(this->length_, other.length_); + } + + private: + std::shared_ptr arr_; + std::size_t length_; + }; + +//---------------------------------------------------------------------- + + template + class ArrOfPtrsCArrCollec_{ + /// equivalent to an array of pointers (where each pointer is a CelloArray) + /// - we track the arrays lifetime with a std::shared_ptr to makes copies + /// cheaper. The disadvantage to this should be minimal since we don't + /// allow the "array" to be directly mutated after construction (note: + /// the "pointers" can't be mutated but the elements at the "pointer" + /// addresses can be mutated) public: - VecBackedCArrCollec_() = default; + ArrOfPtrsCArrCollec_() = default; - VecBackedCArrCollec_(const std::vector> &arrays) noexcept + ArrOfPtrsCArrCollec_(const std::vector> &arrays) noexcept : arrays_(arrays) - { confirm_shared_shape_("VecBackedCArrCollec_",arrays); } + { confirm_shared_shape_("ArrOfPtrsCArrCollec_", arrays); } const CelloArray operator[](std::size_t index) const noexcept { return arrays_[index]; } @@ -69,16 +103,16 @@ namespace detail { constexpr bool contiguous_items() const {return false;} const CelloArray get_backing_array() const noexcept{ - ERROR("VecBackedCArrCollec__::get_backing_array", + ERROR("ArrOfPtrsCArrCollec_::get_backing_array", "This is an invalid method call"); } - friend void swap(VecBackedCArrCollec_& a, VecBackedCArrCollec_& b) + friend void swap(ArrOfPtrsCArrCollec_& a, ArrOfPtrsCArrCollec_& b) noexcept { a.arrays_.swap(b.arrays_); } // There might be some benefit to not directly constructing subarrays and // lazily evaluating them later - VecBackedCArrCollec_ subarray_collec(const CSlice &slc_z, + ArrOfPtrsCArrCollec_ subarray_collec(const CSlice &slc_z, const CSlice &slc_y, const CSlice &slc_x) const noexcept { @@ -86,12 +120,12 @@ namespace detail { for (std::size_t i = 0; i < arrays_.size(); i++){ temp.push_back(arrays_[i].subarray(slc_z, slc_y, slc_x)); } - return VecBackedCArrCollec_(temp); + return ArrOfPtrsCArrCollec_(temp); } private: // ordered list of arrays - std::vector> arrays_; + SharedBuffer_> arrays_; }; //---------------------------------------------------------------------- @@ -153,24 +187,24 @@ class CArrCollec{ /// /// This is a hybrid data-type that abstracts 2 separate implementations for /// collections of CelloArrays. Under the hood, the arrays are either stored - /// in a single 4D Contiguous Array (see SingleAddressCArrCollec_) or a - /// vector of 3D arrays (see VecBackedCArrCollec_) + /// in a single 4D Contiguous Array (see SingleAddressCArrCollec_) or an + /// array of 3D CelloArrays (see ArrOfPtrsCArrCollec_) /// /// The implementation of this hybrid data-structure is fairly crude. If we /// decide that we want to support this hybrid data-type in the long-term, /// we probably want to consider using boost::variant/a backport of /// std::variant or take advantage of the fact that we can represent the - /// contents of a SingleAddressCArrCollec_ with a VecBackedCArrCollec_ + /// contents of a SingleAddressCArrCollec_ with a ArrOfPtrsCArrCollec_ private: // tags how the arrays are stored (whether they're stored in a single - // contiguous CelloArray or a vector of CelloArrays) - enum class Tag {CONTIG, VEC}; + // contiguous CelloArray or stored like an array of pointers) + enum class Tag {CONTIG, ARR_OF_PTR}; Tag tag_; union{ detail::SingleAddressCArrCollec_ single_arr_; - detail::VecBackedCArrCollec_ vec_of_arr_; + detail::ArrOfPtrsCArrCollec_ arr_of_ptr_; }; private: // helper methods: @@ -180,10 +214,15 @@ class CArrCollec{ if (currently_initialized) { dealloc_active_member_(); } tag_ = tag; - using namespace detail; switch (tag_){ - case Tag::CONTIG: {new(&single_arr_) SingleAddressCArrCollec_; break; } - case Tag::VEC: {new(&vec_of_arr_) VecBackedCArrCollec_; break; } + case Tag::CONTIG: { + new(&single_arr_) detail::SingleAddressCArrCollec_; + break; + } + case Tag::ARR_OF_PTR: { + new(&arr_of_ptr_) detail::ArrOfPtrsCArrCollec_; + break; + } } } @@ -191,8 +230,8 @@ class CArrCollec{ // need to explicitly call destructor because we use placement new void dealloc_active_member_() noexcept{ switch (tag_){ - case Tag::CONTIG: { single_arr_.~SingleAddressCArrCollec_(); break; } - case Tag::VEC: { vec_of_arr_.~VecBackedCArrCollec_(); break; } + case Tag::CONTIG: { single_arr_.~SingleAddressCArrCollec_(); break; } + case Tag::ARR_OF_PTR: { arr_of_ptr_.~ArrOfPtrsCArrCollec_(); break; } } } @@ -208,16 +247,16 @@ class CArrCollec{ /// construct from vector of arrays CArrCollec(const std::vector>& v) noexcept{ - activate_member_(Tag::VEC, false); - vec_of_arr_ = detail::VecBackedCArrCollec_(v); + activate_member_(Tag::ARR_OF_PTR, false); + arr_of_ptr_ = detail::ArrOfPtrsCArrCollec_(v); } /// copy constructor CArrCollec(const CArrCollec& other) noexcept{ activate_member_(other.tag_, false); switch (other.tag_){ - case Tag::CONTIG: { single_arr_ = other.single_arr_; break; } - case Tag::VEC: { vec_of_arr_ = other.vec_of_arr_; break; } + case Tag::CONTIG: { single_arr_ = other.single_arr_; break; } + case Tag::ARR_OF_PTR: { arr_of_ptr_ = other.arr_of_ptr_; break; } } } @@ -245,14 +284,14 @@ class CArrCollec{ if (&a == &b) { return; } if (a.tag_ == b.tag_){ switch (a.tag_){ - case Tag::CONTIG: { swap(a.single_arr_, b.single_arr_); break; } - case Tag::VEC: { swap(a.vec_of_arr_, b.vec_of_arr_); break; } + case Tag::CONTIG: { swap(a.single_arr_, b.single_arr_); break; } + case Tag::ARR_OF_PTR: { swap(a.arr_of_ptr_, b.arr_of_ptr_); break; } } } else if (a.tag_ == Tag::CONTIG){ detail::SingleAddressCArrCollec_ temp_single_arr; swap(temp_single_arr, a.single_arr_); - a.activate_member_(Tag::VEC, true); - swap(a.vec_of_arr_, b.vec_of_arr_); + a.activate_member_(Tag::ARR_OF_PTR, true); + swap(a.arr_of_ptr_, b.arr_of_ptr_); b.activate_member_(Tag::CONTIG, true); swap(temp_single_arr, b.single_arr_); } else { @@ -263,8 +302,8 @@ class CArrCollec{ /// Returns the number of arrays held in the collection std::size_t size() const noexcept { switch (tag_){ - case Tag::CONTIG: { return single_arr_.size(); } - case Tag::VEC: { return vec_of_arr_.size(); } + case Tag::CONTIG: { return single_arr_.size(); } + case Tag::ARR_OF_PTR: { return arr_of_ptr_.size(); } } ERROR("CArrCollec::size", "problem with exhaustive switch-statement"); } @@ -276,8 +315,8 @@ class CArrCollec{ "Can't retrieve value at %zu when size() is %zu", i, n_elements, i < n_elements); switch (tag_){ - case Tag::CONTIG: { return single_arr_[i]; } - case Tag::VEC: { return vec_of_arr_[i]; } + case Tag::CONTIG: { return single_arr_[i]; } + case Tag::ARR_OF_PTR: { return arr_of_ptr_[i]; } } ERROR("CArrCollec::operator[]", "problem with exhaustive switch-statement"); } @@ -286,8 +325,8 @@ class CArrCollec{ /// (Alternatively they can be stored as an array of pointers) bool contiguous_items() const noexcept { switch (tag_){ - case Tag::CONTIG: { return single_arr_.contiguous_items(); } - case Tag::VEC: { return vec_of_arr_.contiguous_items(); } + case Tag::CONTIG: { return single_arr_.contiguous_items(); } + case Tag::ARR_OF_PTR: { return arr_of_ptr_.contiguous_items(); } } ERROR("CArrCollec::contiguous_items", "problem with exhaustive switch-statement"); @@ -303,8 +342,8 @@ class CArrCollec{ /// the `contiguous_items()` method returns `false`. const CelloArray get_backing_array() const noexcept { switch (tag_){ - case Tag::CONTIG: { return single_arr_.get_backing_array(); } - case Tag::VEC: { return vec_of_arr_.get_backing_array(); } + case Tag::CONTIG: { return single_arr_.get_backing_array(); } + case Tag::ARR_OF_PTR: { return arr_of_ptr_.get_backing_array(); } } ERROR("CArrCollec::get_backing_array", "problem with exhaustive switch-statement"); @@ -346,8 +385,8 @@ class CArrCollec{ out.single_arr_ = single_arr_.subarray_collec(slc_z,slc_y,slc_x); return out; } - case Tag::VEC: { - out.vec_of_arr_ = vec_of_arr_.subarray_collec(slc_z,slc_y,slc_x); + case Tag::ARR_OF_PTR: { + out.arr_of_ptr_ = arr_of_ptr_.subarray_collec(slc_z,slc_y,slc_x); return out; } } diff --git a/src/Cello/array_StringIndRdOnlyMap.hpp b/src/Cello/array_StringIndRdOnlyMap.hpp new file mode 100644 index 0000000000..31c9409d92 --- /dev/null +++ b/src/Cello/array_StringIndRdOnlyMap.hpp @@ -0,0 +1,219 @@ +// See LICENSE_CELLO file for license and copyright information + +/// @file array_StringIndRdOnlyMap.hpp +/// @author Matthew Abruzzo (matthewabruzzo@gmail.com) +/// @date Thurs May 30 2019 +/// @brief Declaration and implementation of the StringIndRdOnlyMap class + +#include +#include // hash +#include +#include +#include +#include + +#ifndef ARRAY_STRING_IND_RD_ONLY_MAP_HPP +#define ARRAY_STRING_IND_RD_ONLY_MAP_HPP + +//---------------------------------------------------------------------- + +/// @def INVERSE_LOAD_FACTOR +/// @brief the load factor specifies the fraction of the capcity of the Hash +/// table that is filled. This should be an integer. +/// +/// This should not be 1. Generally, the larger this is, the fewer collisions +/// there are, but the more memory is required. If this is too large, it may +/// actually slow down lookups. +#define INVERSE_LOAD_FACTOR 2 + +//---------------------------------------------------------------------- + +class StringIndRdOnlyMap{ + + /// @class StringIndRdOnlyMap + /// @ingroup Array + /// @brief [\ref Array] The primary purpose of this is to associate string + /// keys with indices. If you have `n` keys, then they will be + /// associated with an ordered set values from `0` to `n-1`. + /// + /// This is primarily intended to be used in the implementation of the array + /// map (but may be broadly useful for other applications). + /// + /// To make instances cheap to copy, we store the actual hash table in a + /// shared_ptr. There is no downside to doing this since the hash table + /// can never be mutated after it is constructed. + /// + /// We implement our own simple hash-table instead of using one of the standard + /// library datastructures (i.e. ``std::map``) to avoid an extra level of + /// indirection. This uses a simple open-addressed hash-table (with linear + /// probing to address collisions). Since we don't allow the contents to be + /// mutated, most of the negatives of an open-addressed hash-table do not + /// apply. + /// + /// We prioritized making the implementation fast for looking up the index + /// associated with a given key. This also supports looking up the value of + /// the `i`th key, with the `key` method, (basically a reverse lookup). But, + /// the `key` method is mostly for convenience and is slow. If we ever + /// the `key` method to be faster, this class could additionally track a + /// shared read-only array that stores the order of keys. + /// + /// There is definitely room for optimizing this implementation: + /// - We could be smarter about the order that we insert keys into the table + /// (in the constructor) to minimize the search time. + /// - We might be able to come up with a better hash function + /// - If we limit the max key size we could have `KVpair` store keys in-place. + /// For example if the max key size never exceeds ~22 characters and + /// there are never more than ~128 entries, then we could define `KVpair` + /// as `KVpair{ char key[23]; uint8_t val; };`. (This reduces indirection). + /// If `sizeof(KVpair)` is `24` or `32`, we also could fit 3 or 2 entries + /// per cache line. + +public: + typedef uint16_t val_t; + struct KVpair{ std::string key; val_t val; }; + +public: + /// Constructor + StringIndRdOnlyMap() = default; + + /// Constructor + inline StringIndRdOnlyMap(const std::vector &keys) noexcept; + + /// returns the value associated with the key + /// + /// The program aborts if the key isn't in the table. + inline val_t operator[](const std::string& key) const noexcept + { + const KVpair& tmp = find_match_or_empty_(key); + if (tmp.key == ""){ + ERROR1("StringIndRdOnlyMap::operator[]", + "there is no key called %s", key.c_str()); + } + return tmp.val; + } + + /// returns the value associated with the key + /// + /// The program aborts if the key isn't in the table. + inline val_t at(const std::string& key) const noexcept + { return (*this)[key]; } + + /// returns the number of entries in the map + inline std::size_t size() const noexcept {return num_keys_;} + + /// checks if the map contains a key + inline bool contains(const std::string& key) const noexcept + { return (find_match_or_empty_(key).key == key); } + + /// Return the ith key (this is effectively a reverse lookup) + inline const std::string& key(val_t i) const noexcept{ + if (i >= (num_keys_)){ + ERROR2("StringIndRdOnlyMap::key", + "Can't find key number %d. There only %d keys.", + (int) i, (int)num_keys_); + } + + /// iterate over over the entire capacity of the hash table + for (uint32_t j = 0; j < capacity_; j++){ + const KVpair& pair = data_.get()[j]; + if ((pair.key != "") & (pair.val == i)){ return pair.key; } + } + + // the following should be unreachable + ERROR2("StringIndRdOnlyMap::find_match_or_empty_", + "something went horribly wrong while searching for key number %d. " + "the max number of keys is %d", + (int) i, (int)num_keys_); + } + +private: + + const KVpair& find_match_or_empty_(const std::string& key) const { + std::size_t hash_code = hash_(key) % capacity_; + + for (std::size_t i = hash_code; i < capacity_; i++){ + const KVpair& pair = data_.get()[i]; + if ((pair.key == key) | (pair.key == "")) {return pair;} + } + + for (std::size_t i = 0; i < hash_code; i++){ + const KVpair& pair = data_.get()[i]; + if ((pair.key == key) | (pair.key == "")) {return pair;} + } + + ERROR("StringIndRdOnlyMap::find_match_or_empty_", + "something went horribly wrong - this should be unreachable"); + } + +private: + std::shared_ptr data_; + std::uint32_t num_keys_; + std::uint32_t capacity_; + std::hash hash_; +}; + +//---------------------------------------------------------------------- + +inline StringIndRdOnlyMap::StringIndRdOnlyMap +(const std::vector &keys) noexcept +{ + + if ((std::size_t)(std::numeric_limits::max()) < keys.size()){ + ERROR("StringIndRdOnlyMap::StringIndRdOnlyMap", + "There are WAY too many keys"); + } + + // determine capacity of hash table + capacity_ = 0; + uint32_t target_capacity = (uint32_t) (keys.size() * INVERSE_LOAD_FACTOR); + + // we want the capacity to be a prime number + std::array allowed_caps = {7, 19, 31, 41, + 53, 61, 71, 83, + 89, 101, 113, 127}; + + if (target_capacity == 0){ + ERROR("StringIndRdOnlyMap::StringIndRdOnlyMap", + "can't construct a map without any keys"); + } + + for (const uint32_t &val : allowed_caps){ + if (target_capacity < val){ + capacity_ = val; + break; + } + } + if (capacity_ == 0){ + ERROR("StringIndRdOnlyMap::StringIndRdOnlyMap", + "There are too many keys for the current implementation"); + } + num_keys_ = keys.size(); + + // allocate hash table (all KVpairs should be default constructed) + KVpair* ptr = new KVpair[capacity_](); + if (ptr == nullptr){ + ERROR("StringIndRdOnlyMap::StringIndRdOnlyMap", + "Something went wrong while allocating data"); + } + + data_ = std::shared_ptr(ptr, std::default_delete()); + + // fill in the hash table: + for (std::size_t i = 0; i < keys.size(); i++){ + const std::string& key = keys[i]; + if (key == ""){ + ERROR("StringIndRdOnlyMap::StringIndRdOnlyMap", + "Empty strings are not allowed to be keys"); + } + KVpair& pair = const_cast(find_match_or_empty_(key)); + if (pair.key == key){ + ERROR1("StringIndRdOnlyMap::StringIndRdOnlyMap", + "There can't be more than 1 key called %s.", + key.c_str()); + } + pair = {key, (val_t)i}; + } +} + + +#endif /* ARRAY_STRING_IND_RD_ONLY_MAP_HPP */ diff --git a/src/Cello/test_StringIndRdOnlyMap.cpp b/src/Cello/test_StringIndRdOnlyMap.cpp new file mode 100644 index 0000000000..314960043c --- /dev/null +++ b/src/Cello/test_StringIndRdOnlyMap.cpp @@ -0,0 +1,95 @@ +// See LICENSE_CELLO file for license and copyright information + +/// @file test_StringIndRdOnlyMap.cpp +/// @author Matthew Abruzzo (matthewabruzzo@gmail.com) +/// @date 2022-04-01 +/// @brief Test program for StringIndRdOnlyMap + +#include "main.hpp" +#include "test.hpp" +#include "array.hpp" + +//---------------------------------------------------------------------- + +bool consistent_key_order_(const std::vector& ref_order, + const StringIndRdOnlyMap& string_ind_map) +{ + if (ref_order.size() != string_ind_map.size()){ + return false; + } + + for (std::size_t i = 0; i < string_ind_map.size(); i++){ + const std::string& key = ref_order[i]; + + // first, check that the key is even contained by string_ind_map + if (!string_ind_map.contains(key)) { return false; } + + // next, check the value associated with key using the operator[] method + if (string_ind_map[key] != i) { return false; } + + // then, check the value associated with key using the at method + if (string_ind_map.at(key) != i) { return false; } + + // finally, check result with key method + if (key != string_ind_map.key(i)) { return false; } + } + return true; +} + +//---------------------------------------------------------------------- + +PARALLEL_MAIN_BEGIN +{ + + PARALLEL_INIT; + + unit_init(0,1); + + unit_class("StringIndRdOnlyMap"); + + std::vector key_order_1 = {"density", "velocity_x", "velocity_y", + "velocity_z", "total_energy"}; + std::vector key_order_2 = {"total_energy", "density", + "velocity_x", "velocity_y", + "velocity_z"}; + std::vector key_order_3 = {"bfield_x", "internal_energy"}; + + // it would probably be better to isolate which methods are being tested at + // a given time. This first test checks a bunch of things. + StringIndRdOnlyMap str_ind_map_primary(key_order_1); + + unit_assert (consistent_key_order_(key_order_1, str_ind_map_primary)); + // this is effectively a sanity check + unit_assert (!consistent_key_order_(key_order_2, str_ind_map_primary)); + unit_assert (!consistent_key_order_(key_order_3, str_ind_map_primary)); + + + // now check copy constructor: + { + StringIndRdOnlyMap str_ind_map_secondary = str_ind_map_primary; + + unit_assert (consistent_key_order_(key_order_1, str_ind_map_secondary)); + // check that the original is unaffected by the copy: + unit_assert (consistent_key_order_(key_order_1, str_ind_map_primary)); + } + + // now check copy assignment: + { + StringIndRdOnlyMap str_ind_map_other(key_order_3); + unit_assert (consistent_key_order_(key_order_3, str_ind_map_other)); + + str_ind_map_other = str_ind_map_primary; + unit_assert (!consistent_key_order_(key_order_3, str_ind_map_other)); + unit_assert (consistent_key_order_(key_order_1, str_ind_map_other)); + + // check that the original is unaffected by the copy: + unit_assert (consistent_key_order_(key_order_1, str_ind_map_primary)); + } + + //---------------------------------------------------------------------- + unit_finalize(); + //---------------------------------------------------------------------- + + exit_(); +} +PARALLEL_MAIN_END diff --git a/src/Enzo/enzo_EnzoEFltArrayMap.cpp b/src/Enzo/enzo_EnzoEFltArrayMap.cpp index 09efecee97..2157daaed3 100644 --- a/src/Enzo/enzo_EnzoEFltArrayMap.cpp +++ b/src/Enzo/enzo_EnzoEFltArrayMap.cpp @@ -10,32 +10,11 @@ //---------------------------------------------------------------------- -namespace{ // define some local helper functions - - std::map init_str_index_map_ - (const std::vector &keys) - { - std::map out; - for (std::size_t i = 0; i < keys.size(); i++){ - const std::string& key = keys[i]; - ASSERT1("init_str_index_map_", - "There can't be more than 1 key called %s", - key.c_str(), out.find(key) == out.end()); - out[key] = (unsigned int)i; - } - return out; - } - -} - -//---------------------------------------------------------------------- - EnzoEFltArrayMap::EnzoEFltArrayMap(std::string name, const std::vector &keys, const std::array& shape) : name_(name), - str_index_map_(init_str_index_map_(keys)), - keys_(keys), + str_index_map_(keys), arrays_(keys.size(), shape) { } @@ -45,8 +24,7 @@ EnzoEFltArrayMap::EnzoEFltArrayMap(std::string name, const std::vector &keys, const std::vector &arrays) : name_(name), - str_index_map_(init_str_index_map_(keys)), - keys_(keys), + str_index_map_(keys), arrays_(arrays) { ASSERT2("EnzoEFltArrayMap::EnzoEFltArrayMap", @@ -65,26 +43,27 @@ bool EnzoEFltArrayMap::validate_key_order(const std::vector &ref, std::string name_string = (name_ == "") ? "" : (std::string(", \"") + name_ + std::string("\",")); - for (std::size_t i =0; i < std::min(ref.size(),keys_.size()); i++){ - bool equal = ref[i] == keys_[i]; + for (std::size_t i =0; i < std::min(ref.size(),str_index_map_.size()); i++){ + const std::string k = str_index_map_.key(i); + bool equal = ref[i] == k; if (!equal && raise_err){ print_summary(); ERROR4("EnzoEFltArrayMap::validate_key_order", ("keys of ArrayMap%s don't match expectations. At index %d, the " "key is \"%s\" when it's expected to be \"%s\"\n"), - name_string.c_str(), (int)i, keys_[i].c_str(), ref[i].c_str()); + name_string.c_str(), (int)i, k.c_str(), ref[i].c_str()); } else if (!equal){ return false; } } - if ((!allow_smaller_ref) && (ref.size() != keys_.size())){ + if ((!allow_smaller_ref) && (ref.size() != str_index_map_.size())){ if (raise_err){ print_summary(); ERROR3("EnzoEFltArrayMap::validate_key_order", "ArrayMap%s doesn't have the expected number of keys. It has %d " "keys. It's expected to have %d", - name_string.c_str(), (int)ref.size(), (int)keys_.size()); + name_string.c_str(), (int)ref.size(), (int)str_index_map_.size()); } else { return false; } @@ -96,14 +75,7 @@ bool EnzoEFltArrayMap::validate_key_order(const std::vector &ref, CelloArray EnzoEFltArrayMap::at_(const std::string& key) const noexcept -{ - auto result = str_index_map_.find(key); - if (result == str_index_map_.cend()){ - ERROR1("EnzoEFltArrayMap::at_", "map doesn't contain the key: \"%s\"", - key.c_str()); - } - return arrays_[result->second]; -} +{ return arrays_[str_index_map_.at(key)]; } //---------------------------------------------------------------------- @@ -168,8 +140,8 @@ void EnzoEFltArrayMap::print_summary() const noexcept } EFlt3DArray array = arrays_[i]; - CkPrintf("\"%s\" : EFlt3DArray(%p)", - keys_[i].c_str(), (void*)array.data()); + std::string key = str_index_map_.key(i); + CkPrintf("\"%s\" : EFlt3DArray(%p)", key.c_str(), (void*)array.data()); } CkPrintf("}\n"); fflush(stdout); @@ -182,7 +154,7 @@ EnzoEFltArrayMap EnzoEFltArrayMap::subarray_map(const CSlice &slc_z, const CSlice &slc_x, const std::string& name) { - return EnzoEFltArrayMap(name, str_index_map_, keys_, + return EnzoEFltArrayMap(name, str_index_map_, arrays_.subarray_collec(slc_z, slc_y, slc_x)); } @@ -190,7 +162,7 @@ const EnzoEFltArrayMap EnzoEFltArrayMap::subarray_map (const CSlice &slc_z, const CSlice &slc_y, const CSlice &slc_x, const std::string& name) const { - return EnzoEFltArrayMap(name, str_index_map_, keys_, + return EnzoEFltArrayMap(name, str_index_map_, arrays_.subarray_collec(slc_z, slc_y, slc_x)); } @@ -198,20 +170,12 @@ const EnzoEFltArrayMap EnzoEFltArrayMap::subarray_map void EnzoEFltArrayMap::validate_invariants_() const noexcept { + // several invariants are alread enforced: + // - StringIndRdOnlyMap implicitly enforces that there aren't any duplicate + // keys, and that a unique key is associated with each integer index from 0 + // through (str_index_map_.size() - 1) + // - CArrCollec enforces that all arrays have the same shape ASSERT("EnzoEFltArrayMap::validate_invariants_", - "str_index_map_, keys_, and arrays_ don't have the same length", - keys_.size() == arrays_.size() && - keys_.size() == str_index_map_.size()); - - for (std::size_t i = 0; i < keys_.size(); i++){ - auto result = str_index_map_.find(keys_[i]); - ASSERT1("EnzoEFltArrayMap::validate_invariants_", - "str_index_map_ is missing the key, \"%s\"", keys_[i].c_str(), - result != str_index_map_.cend()); - - ASSERT1("EnzoEFltArrayMap::validate_invariants_", - ("str_index_map_ and keys_ disagree on the location of \"%s\" in " - "the ordering"), keys_[i].c_str(), - result->second == i); - } + "str_index_map_ and arrays_ don't have the same length", + arrays_.size() == str_index_map_.size()); } diff --git a/src/Enzo/enzo_EnzoEFltArrayMap.hpp b/src/Enzo/enzo_EnzoEFltArrayMap.hpp index f2a262987f..bfc90b51fb 100644 --- a/src/Enzo/enzo_EnzoEFltArrayMap.hpp +++ b/src/Enzo/enzo_EnzoEFltArrayMap.hpp @@ -12,20 +12,18 @@ /// - makes it easier to order the entries in an arbitrary order. This /// facilitates optimizations in the Riemann Solver when the values are /// initialized in the order expected by the Riemann Solver +/// - facillitates some optimizations that make instances relatively cheap to +/// copy. /// /// If necessary, a number of optimizations could be made to the implementation /// that might make key lookups faster. These optimizations could take /// advantage of the following factors: -/// - Entries are never deleted and the contents won't be resized (if a hash -/// table can be resized, then the hash codes must stay the same or be -/// recomputed). These factors probably make a custom hash table using -/// open-addressing superior to std::map or std::unordered_map. /// - Assumptions about the max key size and the max capacity of the map. /// For example, if the max key size never exceeds ~22 characters and /// there are never more than ~128 entries, it would probably be optimal /// to store the strings in-place (improving cache locallity). -/// It would also be worth considering whether linear search is faster (since -/// the arrays are small. +/// - It would also be worth considering whether linear search is faster +/// (since the arrays are small) #ifndef ENZO_ENZO_EFLT_ARRAY_MAP_HPP #define ENZO_ENZO_EFLT_ARRAY_MAP_HPP @@ -87,9 +85,8 @@ class EnzoEFltArrayMap { { return at_(key); } /// Checks whether the container holds the specified key - bool contains(const std::string& key) const noexcept{ - return (str_index_map_.find(key) != str_index_map_.cend()); - } + bool contains(const std::string& key) const noexcept + { return str_index_map_.contains(key); } /// Similar to `at`, but a slice of the array ommitting staled values is /// returned by value @@ -167,12 +164,10 @@ class EnzoEFltArrayMap { /// This private constructor is used by subarray_map. It can skip some /// unnecessary work relating to initialization EnzoEFltArrayMap(std::string name, - const std::map &str_index_map, - const std::vector &ordered_keys, + const StringIndRdOnlyMap& str_index_map, CArrCollec&& arrays) : name_(name), str_index_map_(str_index_map), - keys_(ordered_keys), arrays_(arrays) { validate_invariants_(); } @@ -187,11 +182,9 @@ class EnzoEFltArrayMap { private: // attributes // name_ is to help with debugging! std::string name_; - + // str_index_map_ maps the keys to the index - std::map str_index_map_; - // keys_ is the ordered list of keys - std::vector keys_; + StringIndRdOnlyMap str_index_map_; // arrays_ is the ordered collection of arrays_ CArrCollec arrays_; }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4169e4e996..db8a0a9e7b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -75,7 +75,9 @@ endfunction() ############################### UNIT TESTS #################################### # The following tests are self-contained binary of individual components +setup_test_unit(CArrCollec ArrayComponent/CArrCollec test_carr_collec) setup_test_unit(Array ArrayComponent/Array test_cello_array) +setup_test_unit(StringIndRdOnlyMap ArrayComponent/StringIndRdOnlyMap test_string_ind_rd_only_map) # TODO(need help) Need to reintroduce following tests once dependency hell is resolved, # see commented unit tests in src/Cello/CMakeLists.txt #setup_test_unit(CelloType Cello/Type test_type)