diff --git a/src/realm/array_string.cpp b/src/realm/array_string.cpp index 1eb2fdaa96..d7277faedf 100644 --- a/src/realm/array_string.cpp +++ b/src/realm/array_string.cpp @@ -192,6 +192,14 @@ StringData ArrayString::get(size_t ndx) const return {}; } +std::optional realm::ArrayString::get_string_id(size_t ndx) const +{ + if (m_type == Type::interned_strings) { + return StringID(static_cast(m_arr)->get(ndx)); + } + return m_string_interner->lookup(get(ndx)); +} + Mixed ArrayString::get_any(size_t ndx) const { return Mixed(get(ndx)); @@ -274,6 +282,16 @@ void ArrayString::clear() } size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const noexcept +{ + // This should only be called if we don't have a string id for this particular array (aka no string interner) + std::optional id; + if (m_type == Type::interned_strings) + id = m_string_interner->lookup(value); + + return find_first(value, begin, end, id); +} + +size_t ArrayString::find_first(StringData value, size_t begin, size_t end, std::optional id) const noexcept { switch (m_type) { case Type::small_strings: @@ -289,14 +307,13 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const break; } case Type::interned_strings: { - // we need a way to avoid this lookup for each leaf array. The lookup must appear - // higher up the call stack and passed down. - auto id = m_string_interner->lookup(value); if (id) { return static_cast(m_arr)->find_first(*id, begin, end); } break; } + default: + break; } return not_found; } diff --git a/src/realm/array_string.hpp b/src/realm/array_string.hpp index cdf8cceded..0e5a5cc389 100644 --- a/src/realm/array_string.hpp +++ b/src/realm/array_string.hpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace realm { @@ -74,6 +75,10 @@ class ArrayString : public ArrayPayload { { m_string_interner = string_interner; } + bool is_compressed() const + { + return m_type == Type::interned_strings; + } void update_parent() { @@ -99,6 +104,7 @@ class ArrayString : public ArrayPayload { } void insert(size_t ndx, StringData value); StringData get(size_t ndx) const; + std::optional get_string_id(size_t ndx) const; Mixed get_any(size_t ndx) const override; bool is_null(size_t ndx) const; void erase(size_t ndx); @@ -107,6 +113,9 @@ class ArrayString : public ArrayPayload { size_t find_first(StringData value, size_t begin, size_t end) const noexcept; + /// Special version for searching in an array or compressed strings. + size_t find_first(StringData value, size_t begin, size_t end, std::optional) const noexcept; + size_t lower_bound(StringData value); /// Get the specified element without the cost of constructing an diff --git a/src/realm/query_engine.cpp b/src/realm/query_engine.cpp index 3a9c375f6d..03cec8674a 100644 --- a/src/realm/query_engine.cpp +++ b/src/realm/query_engine.cpp @@ -453,7 +453,7 @@ bool StringNode::do_consume_condition(ParentNode& node) size_t StringNode::_find_first_local(size_t start, size_t end) { if (m_needles.empty()) { - return m_leaf->find_first(m_string_value, start, end); + return m_leaf->find_first(m_string_value, start, end, m_interned_string_id); } else { if (end == npos) @@ -505,7 +505,8 @@ size_t StringNode::_find_first_local(size_t start, size_t end) } StringNodeFulltext::StringNodeFulltext(StringData v, ColKey column, std::unique_ptr lm) - : StringNodeEqualBase(v, column) + : m_value(v) + , m_col(column) , m_link_map(std::move(lm)) { if (!m_link_map) @@ -518,17 +519,21 @@ void StringNodeFulltext::table_changed() } StringNodeFulltext::StringNodeFulltext(const StringNodeFulltext& other) - : StringNodeEqualBase(other) + : ParentNode(other) + , m_value(other.m_value) + , m_col(other.m_col) + , m_link_map(std::make_unique(*other.m_link_map)) { - m_link_map = std::make_unique(*other.m_link_map); } -void StringNodeFulltext::_search_index_init() +void StringNodeFulltext::init(bool will_query_ranges) { - StringIndex* index = m_link_map->get_target_table()->get_string_index(ParentNode::m_condition_column_key); + ParentNode::init(will_query_ranges); + + StringIndex* index = m_link_map->get_target_table()->get_string_index(m_col); REALM_ASSERT(index && index->is_fulltext_index()); m_index_matches.clear(); - index->find_all_fulltext(m_index_matches, StringNodeBase::m_string_value); + index->find_all_fulltext(m_index_matches, m_value); // If links exists, use backlinks to find the original objects if (m_link_map->links_exist()) { @@ -541,7 +546,7 @@ void StringNodeFulltext::_search_index_init() } m_index_evaluator = IndexEvaluator{}; - m_index_evaluator->init(&m_index_matches); + m_index_evaluator.init(&m_index_matches); } std::unique_ptr TwoColumnsNodeBase::update_cached_leaf_pointers_for_column(Allocator& alloc, diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 3a428c04d4..a72a067876 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -151,6 +151,8 @@ class ParentNode { { m_dD = 100.0; + if (m_condition_column_key) + m_table->check_column(m_condition_column_key); if (m_child) m_child->init(will_query_ranges); } @@ -1647,6 +1649,11 @@ class StringNodeBase : public ParentNode { m_dT = 10.0; } + void table_changed() override + { + m_string_interner = m_table.unchecked_ptr()->get_string_interner(m_condition_column_key); + } + void cluster_changed() override { m_leaf.emplace(m_table.unchecked_ptr()->get_alloc()); @@ -1662,6 +1669,7 @@ class StringNodeBase : public ParentNode { m_end_s = 0; m_leaf_start = 0; m_leaf_end = 0; + m_interned_string_id = m_string_interner->lookup(m_value); } virtual void clear_leaf_state() @@ -1673,6 +1681,8 @@ class StringNodeBase : public ParentNode { : ParentNode(from) , m_value(from.m_value) , m_string_value(m_value) + , m_string_interner(from.m_string_interner) + , m_interned_string_id(from.m_interned_string_id) { } @@ -1687,6 +1697,8 @@ class StringNodeBase : public ParentNode { std::optional m_value; std::optional m_leaf; StringData m_string_value; + StringInterner* m_string_interner = nullptr; + std::optional m_interned_string_id; size_t m_end_s = 0; size_t m_leaf_start = 0; @@ -1703,7 +1715,7 @@ template class StringNode : public StringNodeBase { public: constexpr static bool case_sensitive_comparison = - is_any_v; + is_any_v; StringNode(StringData v, ColKey column) : StringNodeBase(v, column) { @@ -1732,8 +1744,21 @@ class StringNode : public StringNodeBase { TConditionFunction cond; for (size_t s = start; s < end; ++s) { + if constexpr (std::is_same_v) { + if (m_leaf->is_compressed()) { + if (m_interned_string_id) { + // The search string has been interned, so there might be a match + // We can compare the string IDs directly + const auto id = m_leaf->get_string_id(s); + if (m_string_interner->compare(*m_interned_string_id, *id) == 0) { + // The value matched, so we continue to the next value + continue; + } + } + return s; + } + } StringData t = get_string(s); - if constexpr (case_sensitive_comparison) { // case insensitive not implemented for: >, >=, <, <= if (cond(t, m_string_value)) @@ -2061,20 +2086,24 @@ class StringNode : public StringNodeEqualBase { size_t _find_first_local(size_t start, size_t end) override; }; - -class StringNodeFulltext : public StringNodeEqualBase { +class StringNodeFulltext : public ParentNode { public: StringNodeFulltext(StringData v, ColKey column, std::unique_ptr lm = {}); void table_changed() override; - void _search_index_init() override; + void init(bool will_query_ranges) override; bool has_search_index() const override { return true; // it's a required precondition for fulltext queries } + const IndexEvaluator* index_based_keys() override + { + return &m_index_evaluator; + } + std::unique_ptr clone() const override { return std::unique_ptr(new StringNodeFulltext(*this)); @@ -2086,13 +2115,16 @@ class StringNodeFulltext : public StringNodeEqualBase { } private: - std::vector m_index_matches; + std::string m_value; + ColKey m_col; std::unique_ptr m_link_map; + IndexEvaluator m_index_evaluator; + std::vector m_index_matches; StringNodeFulltext(const StringNodeFulltext&); - size_t _find_first_local(size_t, size_t) override + size_t find_first_local(size_t start, size_t end) override { - REALM_UNREACHABLE(); + return m_index_evaluator.do_search_index(m_cluster, start, end); } }; diff --git a/src/realm/table.cpp b/src/realm/table.cpp index 56c34c999f..b279d2a520 100644 --- a/src/realm/table.cpp +++ b/src/realm/table.cpp @@ -1735,9 +1735,25 @@ ObjKey Table::find_first(ColKey col_key, T value) const using LeafType = typename ColumnTypeTraits::cluster_leaf_type; LeafType leaf(get_alloc()); - auto f = [&key, &col_key, &value, &leaf](const Cluster* cluster) { + // In case of a string column we can try to look up the StringID of the search string, + // and search for that in case the leaf is compressed. + std::optional string_id; + if constexpr (std::is_same_v) { + auto string_interner = get_string_interner(col_key); + REALM_ASSERT(string_interner != nullptr); + string_id = string_interner->lookup(value); + } + + auto f = [&](const Cluster* cluster) { cluster->init_leaf(col_key, &leaf); - size_t row = leaf.find_first(value, 0, cluster->node_size()); + size_t row; + if constexpr (std::is_same_v) { + row = leaf.find_first(value, 0, cluster->node_size(), string_id); + } + else { + row = leaf.find_first(value, 0, cluster->node_size()); + } + if (row != realm::npos) { key = cluster->get_real_key(row); return IteratorControl::Stop; diff --git a/test/test_query.cpp b/test/test_query.cpp index c2d7215d35..e8093cbb73 100644 --- a/test/test_query.cpp +++ b/test/test_query.cpp @@ -330,10 +330,13 @@ columns or queries involved */ -TEST(Query_NextGen_StringConditions) +TEST_TYPES(Query_NextGen_StringConditions, std::true_type, std::false_type) { - Group group; - TableRef table1 = group.add_table("table1"); + SHARED_GROUP_TEST_PATH(path); + + auto db = DB::create(make_in_realm_history(), path); + auto wt = db->start_write(); + TableRef table1 = wt->add_table("table1"); auto col_str1 = table1->add_column(type_String, "str1"); auto col_str2 = table1->add_column(type_String, "str2"); @@ -342,6 +345,11 @@ TEST(Query_NextGen_StringConditions) table1->create_object().set_all("!", "x").get_key(); ObjKey key_1_2 = table1->create_object().set_all("bar", "r").get_key(); + if (TEST_TYPE::value) { + wt->commit_and_continue_as_read(); + wt->promote_to_write(); + } + ObjKey m; // Equal m = table1->column(col_str1).equal("bar", false).find(); @@ -433,7 +441,7 @@ TEST(Query_NextGen_StringConditions) CHECK_EQUAL(m, null_key); // Test various compare operations with null - TableRef table2 = group.add_table("table2"); + TableRef table2 = wt->add_table("table2"); auto col_str3 = table2->add_column(type_String, "str3", true); ObjKey key_2_0 = table2->create_object().set(col_str3, "foo").get_key(); @@ -442,6 +450,11 @@ TEST(Query_NextGen_StringConditions) ObjKey key_2_3 = table2->create_object().set(col_str3, "bar").get_key(); ObjKey key_2_4 = table2->create_object().set(col_str3, "").get_key(); + if (TEST_TYPE::value) { + wt->commit_and_continue_as_read(); + wt->promote_to_write(); + } + size_t cnt; cnt = table2->column(col_str3).contains(StringData("")).count(); CHECK_EQUAL(cnt, 4); @@ -522,6 +535,12 @@ TEST(Query_NextGen_StringConditions) } }; + // not equal + check_results((table2->column(col_str3) != StringData("")), {StringData(), "foo", "bar", "!"}); + check_results((table2->column(col_str3) != StringData()), {"", "foo", "bar", "!"}); + check_results((table2->column(col_str3) != StringData("foo")), {StringData(), "", "bar", "!"}); + check_results((table2->column(col_str3) != StringData("barr")), {StringData(), "", "foo", "bar", "!"}); + // greater check_results((table2->column(col_str3) > StringData("")), {"foo", "bar", "!"}); check_results((table2->column(col_str3) > StringData("b")), {"foo", "bar"}); @@ -553,7 +572,7 @@ TEST(Query_NextGen_StringConditions) check_results((table2->column(col_str3) <= StringData("barrrr")), {"bar", "", "!", StringData()}); check_results((table2->column(col_str3) <= StringData("z")), {"foo", "bar", "", "!", StringData()}); - TableRef table3 = group.add_table(StringData("table3")); + TableRef table3 = wt->add_table(StringData("table3")); auto col_link1 = table3->add_column(*table2, "link1"); table3->create_object().set(col_link1, key_2_0); @@ -562,6 +581,11 @@ TEST(Query_NextGen_StringConditions) table3->create_object().set(col_link1, key_2_3); table3->create_object().set(col_link1, key_2_4); + if (TEST_TYPE::value) { + wt->commit_and_continue_as_read(); + wt->promote_to_write(); + } + cnt = table3->link(col_link1).column(col_str3).contains(StringData("")).count(); CHECK_EQUAL(cnt, 4); @@ -638,8 +662,14 @@ TEST(Query_NextGen_StringConditions) "This is a long search string that does not contain the word being searched for!, " "This is a long search string that does not contain the word being searched for!, " "needle"; + table2->create_object().set(col_str3, long_string).get_key(); + if (TEST_TYPE::value) { + wt->commit_and_continue_as_read(); + wt->promote_to_write(); + } + cnt = table2->column(col_str3).contains(search_1, false).count(); CHECK_EQUAL(cnt, 1);