From 022af982bbc1c6b0587bf241ca5b882fce6113ee Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 18 Feb 2025 09:34:23 -0500 Subject: [PATCH] Fix query condition regression introduced by #5417 (#5461) #5417 made several changes to `SparseGlobalOrderReader::merge_result_cell_slabs`. One of these changes erroneously caused coordinates which do not pass the query condition to be emitted. This pull request restores the correct behavior. The crux of the change is ``` length = 1; ``` to ``` length = to_process.max_slab_length(1); ``` The affected line of code occurs during coordinate de-duplication. `merge_result_cell_slabs` puts coordinates from the leading tile of each fragment into a priority queue and pops them off. If the coordinate at the head of the queue matches what we just pulled off, then we continue popping until there is no longer a match. This de-duplication can cause a fragment to run out of loaded tiles, in which case it is necessary to end the loop to get more data so that we do not emit coordinates out of order. That's how we get to this `length = 1` code, which signals that we should emit a result slab of length 1. In contrast, `max_slab_length(1)` checks the query condition bitmap to see if the duplicated coordinate actually belongs in the query result. This was found by a customer data set which has many fragments which are essentially copies of the whole coordinate domain. This scenario de-duplicates many coordinates and is likely to hit the condition described. The new test `Sparse global order reader: fragment full copy 1d` simulates this set-up. A large fraction of the changes in this PR are the test scaffolding changes to support evaluating query conditions upon our `FxRun1D` and `FxRun2D` structures, so we can continue to check whether the expected and actual results match in the presence of a query condition. --- TYPE: BUG DESC: Fix query condition regression introduced by #5417 --- CMakeLists.txt | 5 +- test/src/unit-sparse-global-order-reader.cc | 469 +++++++++++++----- test/support/rapidcheck/array_templates.h | 6 +- test/support/rapidcheck/query_condition.h | 206 ++++++++ test/support/src/array_templates.h | 102 ++++ test/support/stdx/fold.h | 73 +++ test/support/stdx/tuple.h | 14 + test/support/tdb_rapidcheck.h | 4 +- tiledb/sm/query/readers/result_coords.h | 62 ++- .../readers/sparse_global_order_reader.cc | 9 +- 10 files changed, 784 insertions(+), 166 deletions(-) create mode 100644 test/support/rapidcheck/query_condition.h create mode 100644 test/support/stdx/fold.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b8359a1c09..9f69418e1f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,12 +169,13 @@ if (MSVC) add_compile_definitions("$,DEBUG,NDEBUG>") add_compile_options( # /Od: Disable optimizations - # /bigobj: Increase number of sections in .obj file - "$<$:/Od;/bigobj>" + "$<$:/Od>" # /Ox: Enable most speed optimizations "$<$:/Ox>" # /Zi: Generate debug info in a separate .pdb file "$<$:/Zi>") + # /bigobj: increase number of sections in .obj file + add_compile_options("/bigobj") else() add_compile_options(-Wall -Wextra) if (TILEDB_WERROR) diff --git a/test/src/unit-sparse-global-order-reader.cc b/test/src/unit-sparse-global-order-reader.cc index 2a2307d780c..9cbacc31310 100644 --- a/test/src/unit-sparse-global-order-reader.cc +++ b/test/src/unit-sparse-global-order-reader.cc @@ -31,6 +31,7 @@ */ #include "test/support/rapidcheck/array_templates.h" +#include "test/support/rapidcheck/query_condition.h" #include "test/support/src/array_helpers.h" #include "test/support/src/array_templates.h" #include "test/support/src/error_helpers.h" @@ -43,6 +44,7 @@ #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/misc/comparators.h" #include "tiledb/sm/misc/types.h" +#include "tiledb/sm/query/ast/query_ast.h" #include "tiledb/sm/query/readers/result_tile.h" #include "tiledb/sm/query/readers/sparse_global_order_reader.h" @@ -78,11 +80,14 @@ using Subarray2DType = std::vector>>>; namespace rc { -Gen>> make_subarray_1d( - const templates::Domain& domain); + +template +Gen>> make_subarray_1d( + const templates::Domain& domain); Gen> make_subarray_2d( const templates::Domain& d1, const templates::Domain& d2); + } // namespace rc /* ********************************* */ @@ -92,11 +97,12 @@ Gen> make_subarray_2d( /** * Options for configuring the CSparseGlobalFx default 1D array */ +template struct DefaultArray1DConfig { uint64_t capacity_; bool allow_dups_; - templates::Dimension dimension_; + templates::Dimension dimension_; DefaultArray1DConfig() : capacity_(2) @@ -116,17 +122,31 @@ struct DefaultArray1DConfig { /** * An instance of one-dimension array input to `CSparseGlobalOrderFx::run` */ +template < + Datatype DIMENSION_TYPE = Datatype::INT32, + Datatype ATTR_TYPE = Datatype::INT32, + Datatype... ATTR_TYPES> struct FxRun1D { - using FragmentType = templates::Fragment1D; + using CoordType = tiledb::type::datatype_traits::value_type; + + using FragmentType = templates::Fragment1D< + CoordType, + typename tiledb::type::datatype_traits::value_type, + typename tiledb::type::datatype_traits::value_type...>; + + static constexpr Datatype DimensionType = DIMENSION_TYPE; uint64_t num_user_cells; std::vector fragments; // NB: for now this always has length 1, global order query does not // support multi-range subarray - std::vector> subarray; + std::vector> subarray; - DefaultArray1DConfig array; + // for evaluating + std::optional> condition; + + DefaultArray1DConfig array; SparseGlobalOrderReaderMemoryBudget memory; uint64_t tile_capacity() const { @@ -163,11 +183,11 @@ struct FxRun1D { /** * @return true if the cell at index `record` in `fragment` passes `subarray` */ - bool accept(const FragmentType& fragment, int record) const { + bool pass_subarray(const FragmentType& fragment, int record) const { if (subarray.empty()) { return true; } else { - const int coord = fragment.dim_[record]; + const CoordType coord = fragment.dim_[record]; for (const auto& range : subarray) { if (range.contains(coord)) { return true; @@ -183,8 +203,8 @@ struct FxRun1D { bool intersects(const sm::NDRange& mbr) const { auto accept = [&](const auto& range) { const auto& untyped_mbr = mbr[0]; - const templates::Domain typed_mbr( - untyped_mbr.start_as(), untyped_mbr.end_as()); + const templates::Domain typed_mbr( + untyped_mbr.start_as(), untyped_mbr.end_as()); return range.intersects(typed_mbr); }; return subarray.empty() || @@ -200,7 +220,7 @@ struct FxRun1D { return mbr; } assert(subarray.size() == 1); - if (subarray[0].upper_bound < mbr[0].end_as()) { + if (subarray[0].upper_bound < mbr[0].end_as()) { // in this case, the bitmap will filter out the other coords in the // tile and it will be discarded return std::vector{subarray[0].range()}; @@ -209,13 +229,15 @@ struct FxRun1D { } } - std::tuple&> dimensions() const { - return std::tuple&>( + std::tuple&> dimensions() const { + return std::tuple&>( array.dimension_); } - std::tuple attributes() const { - return std::make_tuple(Datatype::INT32); + std::vector attributes() const { + std::vector a = {ATTR_TYPE}; + (a.push_back(ATTR_TYPES), ...); + return a; } }; @@ -229,6 +251,7 @@ struct FxRun2D { std::optional>, std::optional>>> subarray; + std::optional> condition; size_t num_user_cells; @@ -316,8 +339,8 @@ struct FxRun2D { /** * @return true if the cell at index `record` in `fragment` passes `subarray` */ - bool accept(const FragmentType& fragment, int record) const { - if (subarray.empty()) { + bool pass_subarray(const FragmentType& fragment, int record) const { + if (subarray.empty() && !condition.has_value()) { return true; } else { const int r = fragment.d1_[record], c = fragment.d2_[record]; @@ -387,8 +410,8 @@ struct FxRun2D { return CoordsRefType(d1, d2); } - std::tuple attributes() const { - return std::make_tuple(Datatype::INT32); + std::vector attributes() const { + return {Datatype::INT32}; } }; @@ -406,10 +429,12 @@ concept InstanceType = requires(const T& instance) { instance.fragments; instance.memory; instance.subarray; + instance.condition; instance.dimensions(); instance.attributes(); - // also `accept(Self::FragmentType, int)`, unclear how to represent that + // also `pass_subarray(Self::FragmentType, int)`, unclear how to represent + // that }; struct CSparseGlobalOrderFx { @@ -427,7 +452,7 @@ struct CSparseGlobalOrderFx { template void create_default_array_1d( - const DefaultArray1DConfig& config = DefaultArray1DConfig()); + const DefaultArray1DConfig<>& config = DefaultArray1DConfig<>()); void create_default_array_1d_strings(bool allow_dups = false); @@ -536,7 +561,7 @@ void CSparseGlobalOrderFx::update_config() { template void CSparseGlobalOrderFx::create_default_array_1d( - const DefaultArray1DConfig& config) { + const DefaultArray1DConfig<>& config) { tiledb::test::create_array( context(), array_name_, @@ -1267,7 +1292,8 @@ TEST_CASE_METHOD( size_t num_user_cells, int extent, const std::vector>& subarray = - std::vector>()) { + std::vector>(), + tdb_unique_ptr condition = nullptr) { // Write a fragment F0 with unique coordinates templates::Fragment1D fragment0; fragment0.dim_.resize(fragment_size); @@ -1291,7 +1317,7 @@ TEST_CASE_METHOD( f1atts.resize(fragment1.dim_.size()); std::iota(f1atts.begin(), f1atts.end(), int(fragment0.dim_.size())); - struct FxRun1D instance; + FxRun1D instance; instance.fragments.push_back(fragment0); instance.fragments.push_back(fragment1); instance.num_user_cells = num_user_cells; @@ -1303,14 +1329,24 @@ TEST_CASE_METHOD( instance.memory.ratio_array_data_ = "0.5"; instance.subarray = subarray; + if (condition) { + instance.condition.emplace(std::move(condition)); + } - run(instance); + run(instance); }; SECTION("Example") { doit.operator()(200, 8, 2); } + SECTION("Condition") { + int value = 110; + tdb_unique_ptr qc(new tiledb::sm::ASTNodeVal( + "a1", &value, sizeof(int), tiledb::sm::QueryConditionOp::GE)); + doit.operator()(200, 8, 2, {}, std::move(qc)); + } + SECTION("Shrink", "Some examples found by rapidcheck") { doit.operator()( 2, 1, 1, {templates::Domain(1, 1)}); @@ -1327,8 +1363,15 @@ TEST_CASE_METHOD( const int extent = *rc::gen::inRange(1, 200); const auto subarray = *rc::make_subarray_1d(templates::Domain(1, 200)); + auto condition = + *rc::make_query_condition::FragmentType>(std::make_tuple( + templates::Domain(1, 200), templates::Domain(0, 400))); doit.operator()( - fragment_size, num_user_cells, extent, subarray); + fragment_size, + num_user_cells, + extent, + subarray, + std::move(condition)); }); } } @@ -1352,7 +1395,8 @@ TEST_CASE_METHOD( auto doit = [this]( size_t fragment_size, size_t num_user_cells, - const std::vector>& subarray = {}) { + const std::vector>& subarray = {}, + tdb_unique_ptr condition = nullptr) { templates::Fragment1D fragment0; templates::Fragment1D fragment1; @@ -1378,16 +1422,19 @@ TEST_CASE_METHOD( f1atts.resize(fragment1.dim_.size()); std::iota(f1atts.begin(), f1atts.end(), int(f0atts.size())); - struct FxRun1D instance; + FxRun1D instance; instance.fragments.push_back(fragment0); instance.fragments.push_back(fragment1); instance.num_user_cells = num_user_cells; instance.subarray = subarray; + if (condition) { + instance.condition.emplace(std::move(condition)); + } instance.memory.total_budget_ = "20000"; instance.memory.ratio_array_data_ = "0.5"; instance.array.allow_dups_ = true; - run(instance); + run(instance); }; SECTION("Example") { @@ -1400,8 +1447,11 @@ TEST_CASE_METHOD( const size_t num_user_cells = *rc::gen::inRange(1, 1024); const auto subarray = *rc::make_subarray_1d(templates::Domain(1, 200)); + auto condition = + *rc::make_query_condition::FragmentType>(std::make_tuple( + templates::Domain(1, 200), templates::Domain(1, 200))); doit.operator()( - fragment_size, num_user_cells, subarray); + fragment_size, num_user_cells, subarray, std::move(condition)); }); } } @@ -1444,7 +1494,8 @@ TEST_CASE_METHOD( size_t num_user_cells, bool allow_dups, const std::vector>& subarray = - std::vector>()) { + std::vector>(), + tdb_unique_ptr condition = nullptr) { const uint64_t total_budget = 100000; const double ratio_coords = 0.2; @@ -1453,6 +1504,9 @@ TEST_CASE_METHOD( instance.array.dimension_.extent = int(num_fragments) * 2; instance.array.allow_dups_ = allow_dups; instance.subarray = subarray; + if (condition) { + instance.condition.emplace(std::move(condition)); + } instance.memory.total_budget_ = std::to_string(total_budget); instance.memory.ratio_coords_ = std::to_string(ratio_coords); @@ -1478,7 +1532,7 @@ TEST_CASE_METHOD( instance.num_user_cells = num_user_cells; - run(instance); + run(instance); }; SECTION("Example") { @@ -1504,8 +1558,18 @@ TEST_CASE_METHOD( const bool allow_dups = *rc::gen::arbitrary(); const auto subarray = *rc::make_subarray_1d(templates::Domain(1, 200)); + auto condition = + *rc::make_query_condition::FragmentType>(std::make_tuple( + templates::Domain(1, 200), + templates::Domain( + 0, static_cast(num_fragments * fragment_size)))); doit.operator()( - num_fragments, fragment_size, num_user_cells, allow_dups, subarray); + num_fragments, + fragment_size, + num_user_cells, + allow_dups, + subarray, + std::move(condition)); }); } } @@ -1536,7 +1600,8 @@ TEST_CASE_METHOD( size_t num_user_cells, size_t tile_capacity, bool allow_dups, - const Subarray1DType& subarray = {}) { + const Subarray1DType& subarray = {}, + tdb_unique_ptr condition = nullptr) { FxRun1D instance; instance.num_user_cells = num_user_cells; instance.subarray = subarray; @@ -1547,6 +1612,9 @@ TEST_CASE_METHOD( instance.array.dimension_.domain.lower_bound = 0; instance.array.dimension_.domain.upper_bound = static_cast(num_fragments * fragment_size); + if (condition) { + instance.condition.emplace(std::move(condition)); + } for (size_t f = 0; f < num_fragments; f++) { templates::Fragment1D fragment; @@ -1565,7 +1633,7 @@ TEST_CASE_METHOD( instance.num_user_cells = num_user_cells; - run(instance); + run(instance); }; SECTION("Example") { @@ -1581,13 +1649,19 @@ TEST_CASE_METHOD( const bool allow_dups = *rc::gen::arbitrary(); const auto subarray = *rc::make_subarray_1d(templates::Domain( 0, static_cast(num_fragments * fragment_size))); + auto condition = + *rc::make_query_condition::FragmentType>(std::make_tuple( + templates::Domain(1, 200), + templates::Domain( + 0, static_cast(num_fragments * fragment_size)))); doit.operator()( num_fragments, fragment_size, num_user_cells, tile_capacity, allow_dups, - subarray); + subarray, + std::move(condition)); }); } } @@ -1646,7 +1720,8 @@ TEST_CASE_METHOD( size_t num_user_cells, size_t tile_capacity, bool allow_dups, - const Subarray2DType& subarray = {}) { + const Subarray2DType& subarray = {}, + tdb_unique_ptr condition = nullptr) { FxRun2D instance; instance.num_user_cells = num_user_cells; instance.capacity = tile_capacity; @@ -1654,6 +1729,9 @@ TEST_CASE_METHOD( instance.tile_order_ = tile_order; instance.cell_order_ = cell_order; instance.subarray = subarray; + if (condition) { + instance.condition.emplace(std::move(condition)); + } auto row = [&](size_t f, size_t i) -> int { return 1 + @@ -1743,6 +1821,12 @@ TEST_CASE_METHOD( const size_t tile_capacity = *rc::gen::inRange(4, 32); const auto subarray = *rc::make_subarray_2d( templates::Domain(1, 200), templates::Domain(1, 200)); + auto condition = + *rc::make_query_condition(std::make_tuple( + templates::Domain(1, 200), + templates::Domain(1, 200), + templates::Domain( + 0, static_cast((num_fragments + 1) * fragment_size)))); doit.operator()( tile_order, @@ -1752,7 +1836,8 @@ TEST_CASE_METHOD( num_user_cells, tile_capacity, allow_dups, - subarray); + subarray, + std::move(condition)); }); } } @@ -1789,6 +1874,8 @@ TEST_CASE_METHOD( CSparseGlobalOrderFx, "Sparse global order reader: fragment skew 2d merge bound", "[sparse-global-order][rest][rapidcheck]") { + constexpr size_t d1_extent = 4; + constexpr size_t d2_extent = 4; auto doit = [this]( tiledb_layout_t tile_order, tiledb_layout_t cell_order, @@ -1797,16 +1884,20 @@ TEST_CASE_METHOD( size_t num_fragments, size_t tile_capacity, bool allow_dups, - const Subarray2DType& subarray = {}) { + const Subarray2DType& subarray = {}, + tdb_unique_ptr condition = nullptr) { FxRun2D instance; instance.tile_order_ = tile_order; instance.cell_order_ = cell_order; - instance.d1.extent = 4; - instance.d2.extent = 4; + instance.d1.extent = d1_extent; + instance.d2.extent = d2_extent; instance.capacity = tile_capacity; instance.allow_dups = allow_dups; instance.num_user_cells = num_user_cells; instance.subarray = subarray; + if (condition) { + instance.condition.emplace(std::move(condition)); + } const size_t tile_size_estimate = (1600 + instance.capacity * sizeof(int)); const double coords_ratio = @@ -1862,7 +1953,7 @@ TEST_CASE_METHOD( instance.fragments.push_back(fpoint); } - run(instance); + run(instance); }; SECTION("Example") { @@ -1883,7 +1974,7 @@ TEST_CASE_METHOD( } SECTION("Rapidcheck") { - rc::prop("rapidcheck fragment skew", [doit]() { + rc::prop("rapidcheck fragment skew 2d merge bound", [doit]() { const auto tile_order = *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); const auto cell_order = @@ -1895,6 +1986,14 @@ TEST_CASE_METHOD( const bool allow_dups = *rc::gen::arbitrary(); const auto subarray = *rc::make_subarray_2d( templates::Domain(1, 200), templates::Domain(1, 200)); + auto condition = + *rc::make_query_condition(std::make_tuple( + templates::Domain(1, 200), + templates::Domain(1, 200), + templates::Domain( + 0, + static_cast( + 2 * num_fragments * d1_extent * d2_extent)))); doit.operator()( tile_order, cell_order, @@ -1903,7 +2002,117 @@ TEST_CASE_METHOD( num_fragments, tile_capacity, allow_dups, - subarray); + subarray, + std::move(condition)); + }); + } +} + +/** + * Test usage patterns where each fragment is kind of an entire data set, + * e.g. one fragment for observations from Monday, one for observations + * from Tuesday, one for observations from Wednesday, etc. + * + * This sort of usage is not really what fragments and time travel were + * originally built to support, but it is more flexible with respect + * to schema evolution than having time as another dimension would be, + * and as such at least one prominent customer is known to do this. + * + * What this looks like is each fragment covering essentially the entire + * domain of the array. Queries which open the array at a specific + * timestamp are nice and easy; queries which do not open with a specific + * timestamp are sad because they do tons of work to de-duplicate. + */ +TEST_CASE_METHOD( + CSparseGlobalOrderFx, + "Sparse global order reader: fragment full copy 1d", + "[sparse-global-order][rest][rapidcheck]") { + using FxRunType = FxRun1D; + auto doit = [this]( + size_t num_fragments, + templates::Dimension dimension, + const Subarray1DType& subarray = {}, + tdb_unique_ptr condition = nullptr) { + const size_t fragment_size = + std::min(1024 * 32, dimension.domain.num_cells()); + + FxRunType instance; + instance.num_user_cells = 1024 * 1024; + instance.array.dimension_ = dimension; + instance.memory.total_budget_ = std::to_string(1024 * 1024 * 4); + instance.array.capacity_ = std::min( + 1024, std::max(16, dimension.domain.num_cells() / 1024)); + instance.array.allow_dups_ = false; + instance.subarray = subarray; + if (condition) { + instance.condition.emplace(std::move(condition)); + } + + for (size_t f = 0; f < num_fragments; f++) { + FxRunType::FragmentType fragment; + + fragment.dim_.resize(fragment_size); + std::iota( + fragment.dim_.begin(), + fragment.dim_.end(), + dimension.domain.lower_bound); + + std::get<0>(fragment.atts_).resize(fragment.dim_.size()); + std::iota( + std::get<0>(fragment.atts_).begin(), + std::get<0>(fragment.atts_).end(), + 0); + + std::get<1>(fragment.atts_).resize(fragment.dim_.size()); + std::iota( + std::get<1>(fragment.atts_).begin(), + std::get<1>(fragment.atts_).end(), + static_cast(f * fragment.dim_.size())); + + instance.fragments.push_back(fragment); + } + + run(instance); + }; + + SECTION("Example") { + templates::Domain domain(0, 262143); + doit.operator()( + 8, templates::Dimension(domain, 1024)); + } + + SECTION("Condition") { + int64_t value = 524288; + tdb_unique_ptr qc(new tiledb::sm::ASTNodeVal( + "a1", &value, sizeof(value), tiledb::sm::QueryConditionOp::GE)); + templates::Domain domain(0, 262143); + doit.operator()( + 8, + templates::Dimension(domain, 1024), + {}, + std::move(qc)); + } + + SECTION("Rapidcheck") { + rc::prop("rapidcheck fragment full copy 1d", [doit]() { + const size_t num_fragments = *rc::gen::inRange(2, 16); + const auto dimension = + *rc::gen::arbitrary>(); + const auto subarray = *rc::make_subarray_1d(dimension.domain); + + const auto domains = std::make_tuple( + dimension.domain, + templates::Domain(0, dimension.domain.upper_bound), + templates::Domain( + 0, + static_cast(std::min( + std::numeric_limits::max(), + dimension.domain.upper_bound * num_fragments)))); + auto condition = + *rc::make_query_condition(domains); + + doit.operator()( + num_fragments, dimension, subarray, std::move(condition)); }); } } @@ -2797,9 +3006,9 @@ void CSparseGlobalOrderFx::create_array(const Instance& instance) { attribute_cell_val_nums.push_back(1); attribute_compressors.push_back(std::make_pair(TILEDB_FILTER_NONE, -1)); }; - std::apply( - [&](As... attribute) { (add_attribute(attribute), ...); }, - attributes); + for (const Datatype atype : attributes) { + add_attribute(atype); + } tiledb::test::create_array( context(), @@ -2866,15 +3075,26 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { auto expect_dimensions = expect.dimensions(); auto expect_attributes = expect.attributes(); - if (instance.subarray.empty()) { + if (instance.subarray.empty() && !instance.condition.has_value()) { stdx::extend(expect_dimensions, fragment.dimensions()); stdx::extend(expect_attributes, fragment.attributes()); } else { std::vector accept; + std::optional< + templates::QueryConditionEvalSchema> + eval; + if (instance.condition.has_value()) { + eval.emplace(); + } for (uint64_t i = 0; i < fragment.size(); i++) { - if (instance.accept(fragment, i)) { - accept.push_back(i); + if (!instance.pass_subarray(fragment, i)) { + continue; + } + if (eval.has_value() && + !eval->test(fragment, i, *instance.condition.value().get())) { + continue; } + accept.push_back(i); } const auto fdimensions = stdx::select(fragment.dimensions(), std::span(accept)); @@ -2941,6 +3161,13 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { tiledb_subarray_free(&subarray); } + if (instance.condition.has_value()) { + tiledb::sm::QueryCondition qc(instance.condition->get()->clone()); + const auto rc = + query->query_->set_condition(qc); // SAFETY: this performs a deep copy + ASSERTER(rc.to_string() == "Ok"); + } + // Prepare output buffer std::decay_t out; @@ -3126,31 +3353,31 @@ namespace rc { /** * @return a generator of valid subarrays within `domain` */ -Gen>> make_subarray_1d( - const templates::Domain& domain) { +template +Gen>> make_subarray_1d( + const templates::Domain& domain) { // NB: when (if) multi-range subarray is supported for global order // (or if this is used for non-global order) // change `num_ranges` to use the weighted element version - std::optional> num_ranges; + std::optional> num_ranges; if (true) { - num_ranges = gen::just(1); + num_ranges = gen::just(1); } else { - num_ranges = gen::weightedElement( + num_ranges = gen::weightedElement( {{50, 1}, {25, 2}, {13, 3}, {7, 4}, {4, 5}, {1, 6}}); } return gen::mapcat(*num_ranges, [domain](int num_ranges) { - return gen::container>>( - num_ranges, rc::make_range(domain)); + return gen::container>>( + num_ranges, rc::make_range(domain)); }); } -template <> -struct Arbitrary { - static Gen arbitrary() { - constexpr Datatype DIMENSION_TYPE = Datatype::INT32; - using CoordType = tiledb::type::datatype_traits::value_type; +template +struct Arbitrary> { + using value_type = FxRun1D; + static Gen arbitrary() { auto dimension = gen::arbitrary>(); auto allow_dups = gen::arbitrary(); @@ -3174,33 +3401,39 @@ struct Arbitrary { templates::Dimension dimension; std::tie(allow_dups, dimension) = arg; - auto fragment = rc::make_fragment_1d( - allow_dups, dimension.domain); + auto fragment = rc::make_fragment_1d< + typename value_type::CoordType, + typename tiledb::type::datatype_traits< + ATTR_TYPES>::value_type...>(allow_dups, dimension.domain); + + auto fragments = gen::nonEmpty( + gen::container>( + fragment)); + + auto fragments_and_qc = gen::mapcat(fragments, [](auto fragments) { + auto condition = + make_query_condition( + field_domains(fragments)); + return gen::pair(gen::just(fragments), condition); + }); return gen::tuple( gen::just(allow_dups), gen::just(dimension), make_subarray_1d(dimension.domain), - gen::nonEmpty( - gen::container>>( - fragment))); + fragments_and_qc); }); auto num_user_cells = gen::inRange(1, 8 * 1024 * 1024); return gen::apply( - [](std::tuple< - bool, - templates::Dimension, - std::vector>, - std::vector>> fragments, - int num_user_cells) { + [](auto fragments, int num_user_cells) { FxRun1D instance; - std::tie( - instance.array.allow_dups_, - instance.array.dimension_, - instance.subarray, - instance.fragments) = fragments; + instance.array.allow_dups_ = std::get<0>(fragments); + instance.array.dimension_ = std::get<1>(fragments); + instance.subarray = std::get<2>(fragments); + instance.fragments = std::move(std::get<3>(fragments).first); + instance.condition = std::move(std::get<3>(fragments).second); instance.num_user_cells = num_user_cells; @@ -3277,13 +3510,22 @@ struct Arbitrary { auto fragment = rc::make_fragment_2d( allow_dups, d0.domain, d1.domain); + + auto fragments = gen::nonEmpty( + gen::container>(fragment)); + + auto fragments_and_qc = gen::mapcat(fragments, [](auto fragments) { + auto condition = make_query_condition( + field_domains(fragments)); + return gen::pair(gen::just(fragments), condition); + }); + return gen::tuple( gen::just(allow_dups), gen::just(d0), gen::just(d1), make_subarray_2d(d0.domain, d1.domain), - gen::nonEmpty( - gen::container>(fragment))); + fragments_and_qc); }); auto num_user_cells = gen::inRange(1, 8 * 1024 * 1024); @@ -3296,12 +3538,12 @@ struct Arbitrary { tiledb_layout_t tile_order, tiledb_layout_t cell_order) { FxRun2D instance; - std::tie( - instance.allow_dups, - instance.d1, - instance.d2, - instance.subarray, - instance.fragments) = fragments; + instance.allow_dups = std::get<0>(fragments); + instance.d1 = std::get<1>(fragments); + instance.d2 = std::get<2>(fragments); + instance.subarray = std::get<3>(fragments); + instance.fragments = std::move(std::get<4>(fragments).first); + instance.condition = std::move(std::get<4>(fragments).second); // TODO: capacity instance.num_user_cells = num_user_cells; @@ -3317,55 +3559,6 @@ struct Arbitrary { } }; -/** - * Specializes `show` to print the final test case after shrinking - */ -template <> -void show(const FxRun1D& instance, std::ostream& os) { - size_t f = 0; - - os << "{" << std::endl; - os << "\t\"fragments\": [" << std::endl; - for (const auto& fragment : instance.fragments) { - os << "\t\t{" << std::endl; - os << "\t\t\t\"coords\": [" << std::endl; - os << "\t\t\t\t"; - show(fragment.dim_, os); - os << std::endl; - os << "\t\t\t], " << std::endl; - os << "\t\t\t\"atts\": [" << std::endl; - os << "\t\t\t\t"; - show(std::get<0>(fragment.atts_), os); - os << std::endl; - os << "\t\t\t] " << std::endl; - os << "\t\t}"; - if ((f++) + 1 < instance.fragments.size()) { - os << ", " << std::endl; - } else { - os << std::endl; - } - } - os << "\t]," << std::endl; - os << "\t\"num_user_cells\": " << instance.num_user_cells << std::endl; - os << "\t\"array\": {" << std::endl; - os << "\t\t\"allow_dups\": " << instance.array.allow_dups_ << std::endl; - os << "\t\t\"domain\": [" << instance.array.dimension_.domain.lower_bound - << ", " << instance.array.dimension_.domain.upper_bound << "]," - << std::endl; - os << "\t\t\"extent\": " << instance.array.dimension_.extent << std::endl; - os << "\t}," << std::endl; - os << "\t\"memory\": {" << std::endl; - os << "\t\t\"total_budget\": " << instance.memory.total_budget_ << ", " - << std::endl; - os << "\t\t\"ratio_tile_ranges\": " << instance.memory.ratio_tile_ranges_ - << ", " << std::endl; - os << "\t\t\"ratio_array_data\": " << instance.memory.ratio_array_data_ - << ", " << std::endl; - os << "\t\t\"ratio_coords\": " << instance.memory.ratio_coords_ << std::endl; - os << "\t}" << std::endl; - os << "}"; -} - /** * Specializes `show` to print the final test case after shrinking */ @@ -3446,8 +3639,10 @@ TEST_CASE_METHOD( "[sparse-global-order][rest][rapidcheck]") { SECTION("Rapidcheck") { rc::prop( - "rapidcheck arbitrary 1d", [this](rc::NonShrinking instance) { - run(instance); + "rapidcheck arbitrary 1d", + [this](rc::NonShrinking> instance) { + run( + instance); }); } } diff --git a/test/support/rapidcheck/array_templates.h b/test/support/rapidcheck/array_templates.h index 95f8b671030..80d71fcbe1c 100644 --- a/test/support/rapidcheck/array_templates.h +++ b/test/support/rapidcheck/array_templates.h @@ -49,10 +49,10 @@ struct Arbitrary> { // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is // inclusive. So we have to use `int64_t` to avoid overflow. auto bounds = gen::mapcat(gen::arbitrary(), [](D lb) { - if (std::is_same::value) { + if constexpr (std::is_same::value) { return gen::pair( gen::just(lb), gen::inRange(lb, std::numeric_limits::max())); - } else if (std::is_same::value) { + } else if constexpr (std::is_same::value) { return gen::pair( gen::just(lb), gen::inRange(lb, std::numeric_limits::max())); } else { @@ -128,7 +128,7 @@ struct Arbitrary> { }); return gen::map(tup, [](std::pair, CoordType> tup) { - return templates::Dimension{.domain = tup.first, .extent = tup.second}; + return templates::Dimension(tup.first, tup.second); }); } }; diff --git a/test/support/rapidcheck/query_condition.h b/test/support/rapidcheck/query_condition.h new file mode 100644 index 00000000000..22b30aebc43 --- /dev/null +++ b/test/support/rapidcheck/query_condition.h @@ -0,0 +1,206 @@ +/** + * @file test/support/rapidcheck/query_condition.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2022-2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines rapidcheck generators for query condition AST. + */ + +#ifndef TILEDB_RAPIDCHECK_QUERY_CONDITION_H +#define TILEDB_RAPIDCHECK_QUERY_CONDITION_H + +#include "tiledb/sm/query/ast/query_ast.h" + +#include "test/support/rapidcheck/array_templates.h" + +#include +#include +#include + +namespace tiledb::test::templates { + +// Helper function used for `domain_type_t`. +// This is not intended to be evaluated. +template +std::tuple...> domain_type_f( + const std::tuple&); + +/** + * Maps a tuple type `(T1, T2, ...)` to a type + * `(Domain, Domain, ...)`. + */ +template +using domain_type_t = decltype(domain_type_f(std::declval())); + +/** + * Maps a `FragmentType` to a tuple type which has a `Domain` + * for each of the dimensions and attributes. + */ +template +struct query_condition_domain { + using field_tuple = decltype(std::tuple_cat( + std::declval().dimensions())>>(), + std::declval().attributes())>>())); + + using value_type = domain_type_t; +}; + +} // namespace tiledb::test::templates + +namespace rc { + +using namespace tiledb::test; + +template <> +struct Arbitrary { + static Gen arbitrary() { + // NOT IN and IN are not handled yet by the users of this + return gen::element( + tiledb::sm::QueryConditionOp::LT, + tiledb::sm::QueryConditionOp::LE, + tiledb::sm::QueryConditionOp::GT, + tiledb::sm::QueryConditionOp::GE, + tiledb::sm::QueryConditionOp::EQ, + tiledb::sm::QueryConditionOp::NE); + } +}; + +/** + * @return a generator which produces arbitrary query conditions over `Fragment` + * using ranges drawn from `field_domains` + */ +template +Gen> make_query_condition( + const typename query_condition_domain::value_type& + field_domains) { + using DimensionTuple = + stdx::decay_tuple().dimensions())>; + using AttributeTuple = + stdx::decay_tuple().attributes())>; + using FieldTuple = decltype(std::tuple_cat( + std::declval(), std::declval())); + + auto field = gen::inRange(0, std::tuple_size_v); + auto op = gen::arbitrary(); + + auto parts = gen::mapcat(gen::pair(field, op), [field_domains](auto arg) { + size_t field; + tiledb::sm::QueryConditionOp op; + std::tie(field, op) = arg; + + const auto field_names = QueryConditionEvalSchema().field_names_; + + using R = std::pair>; + auto make_value_gen = [&](auto i) -> std::optional { + if (field == i) { + auto value = make_range(std::get(field_domains)); + auto p = std::make_pair(field_names[i], gen::map(value, [](auto value) { + return std::string( + reinterpret_cast(&value), + sizeof(value)); + })); + return std::optional{p}; + } else { + return std::optional{}; + } + }; + + auto fielddata = stdx::fold_optional( + std::make_index_sequence>{}, + make_value_gen); + + return gen::tuple( + gen::just(fielddata.value().first), + gen::just(op), + fielddata.value().second); + }); + + return gen::map(parts, [](auto arg) { + return tdb_unique_ptr(new tiledb::sm::ASTNodeVal( + std::get<0>(arg), + std::get<2>(arg).data(), + std::get<2>(arg).size(), + std::get<1>(arg))); + }); +} + +} // namespace rc + +namespace tiledb::test::templates { + +/** + * @return a tuple containing the min/max values of each field in `fragment`. + */ +template +typename query_condition_domain::value_type field_domains( + const Fragment& fragment) { + auto make_domain = [](const std::vector& field) { + return Domain( + *std::min_element(field.begin(), field.end()), + *std::max_element(field.begin(), field.end())); + }; + + return std::apply( + [make_domain](const std::vector&... fields) { + return std::make_tuple(make_domain(fields)...); + }, + std::tuple_cat(fragment.dimensions(), fragment.attributes())); +} + +/** + * @return a tuple containing the min/max values of each field in `fragments`. + */ +template +typename query_condition_domain::value_type field_domains( + const std::vector& fragments) { + auto make_union_domain = [](auto& d1, const auto d2) { + d1 = Domain( + std::min(d1.lower_bound, d2.lower_bound), + std::max(d1.upper_bound, d2.upper_bound)); + }; + + auto full_domain = field_domains(fragments[0]); + for (size_t i = 1; i < fragments.size(); i++) { + const auto this_domain = field_domains(fragments[i]); + std::apply( + [&](auto... full_field) { + std::apply( + [&](const auto... this_field) { + (make_union_domain(full_field, this_field), ...); + }, + this_domain); + }, + full_domain); + } + return full_domain; +} + +} // namespace tiledb::test::templates + +#endif diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index d70d2217b20..5cc648fc9ae 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -35,11 +35,15 @@ #define TILEDB_ARRAY_TEMPLATES_H #include "tiledb.h" +#include "tiledb/common/unreachable.h" +#include "tiledb/sm/query/ast/query_ast.h" #include "tiledb/type/datatype_traits.h" #include "tiledb/type/range/range.h" #include #include +#include +#include #include #include @@ -180,15 +184,113 @@ template struct Dimension { using value_type = tiledb::type::datatype_traits::value_type; + Dimension() = default; + Dimension(Domain domain, value_type extent) + : domain(domain) + , extent(extent) { + } + Domain domain; value_type extent; }; +/** + * Schema of named fields for simple evaluation of a query condition + */ +template +struct QueryConditionEvalSchema { + std::vector field_names_; + + QueryConditionEvalSchema() { + stdx::decay_tuple().dimensions())> dims; + stdx::decay_tuple().attributes())> atts; + + auto add_dimension = [&](auto) { + field_names_.push_back("d" + std::to_string(field_names_.size() + 1)); + }; + auto add_attribute = [&](auto) { + field_names_.push_back( + "a" + + std::to_string( + field_names_.size() + 1 - std::tuple_size())); + }; + + std::apply([&](const auto... dim) { (add_dimension(dim), ...); }, dims); + std::apply([&](const auto... att) { (add_attribute(att), ...); }, atts); + } + + /** + * @return true if a value passes a simple condition + */ + template + static bool test(const T& value, const tiledb::sm::ASTNode& condition) { + switch (condition.get_op()) { + case tiledb::sm::QueryConditionOp::LT: + return value < *static_cast(condition.get_value_ptr()); + case tiledb::sm::QueryConditionOp::LE: + return value <= *static_cast(condition.get_value_ptr()); + case tiledb::sm::QueryConditionOp::GT: + return value > *static_cast(condition.get_value_ptr()); + case tiledb::sm::QueryConditionOp::GE: + return value >= *static_cast(condition.get_value_ptr()); + case tiledb::sm::QueryConditionOp::EQ: + return value == *static_cast(condition.get_value_ptr()); + case tiledb::sm::QueryConditionOp::NE: + return value != *static_cast(condition.get_value_ptr()); + case tiledb::sm::QueryConditionOp::IN: + case tiledb::sm::QueryConditionOp::NOT_IN: + case tiledb::sm::QueryConditionOp::ALWAYS_TRUE: + case tiledb::sm::QueryConditionOp::ALWAYS_FALSE: + default: + // not implemented here, lazy + stdx::unreachable(); + } + } + + /** + * @return true if `record` passes a simple (i.e. non-combination) query + * condition + */ + bool test( + const Fragment& fragment, + int record, + const tiledb::sm::ASTNode& condition) const { + using DimensionTuple = stdx::decay_tuple; + using AttributeTuple = stdx::decay_tuple; + + const auto dim_eval = stdx::fold_sequence( + std::make_index_sequence>{}, + [&](auto i) { + if (condition.get_field_name() == field_names_[i]) { + return test(std::get(fragment.dimensions())[record], condition); + } else { + return false; + } + }); + if (dim_eval) { + return dim_eval; + } + + return stdx::fold_sequence( + std::make_index_sequence>{}, + [&](auto i) { + if (condition.get_field_name() == + field_names_[i + std::tuple_size_v]) { + return test(std::get(fragment.attributes())[record], condition); + } else { + return false; + } + }); + } +}; + /** * Data for a one-dimensional array */ template struct Fragment1D { + using DimensionType = D; + std::vector dim_; std::tuple...> atts_; diff --git a/test/support/stdx/fold.h b/test/support/stdx/fold.h new file mode 100644 index 00000000000..f5e149c544d --- /dev/null +++ b/test/support/stdx/fold.h @@ -0,0 +1,73 @@ +/** + * @file test/support/stdx/fold.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines functions which do things you would want a fold expression + * to do but can't figure out how to make them do it. + */ + +#ifndef TILEDB_TEST_SUPPORT_FOLD_H +#define TILEDB_TEST_SUPPORT_FOLD_H + +namespace stdx { + +template +constexpr bool fold_sequence(std::integer_sequence, F&& f) { + return (f(std::integral_constant{}) || ...); +} + +template +struct fold_optional_t { + template + static constexpr std::optional fold(F&& f) { + const auto maybe = f(std::integral_constant{}); + if (maybe.has_value()) { + return maybe; + } else { + return fold_optional_t::template fold(f); + } + } +}; + +template +struct fold_optional_t { + template + static constexpr std::optional fold(F&& f) { + return f(std::integral_constant{}); + } +}; + +template +constexpr std::optional fold_optional( + std::integer_sequence, F&& f) { + return fold_optional_t::template fold(f); +} + +} // namespace stdx + +#endif diff --git a/test/support/stdx/tuple.h b/test/support/stdx/tuple.h index 953c041b258..f3bf79666ea 100644 --- a/test/support/stdx/tuple.h +++ b/test/support/stdx/tuple.h @@ -76,6 +76,19 @@ std::tuple&...> reference_tuple( tuple); } +// Helper function used for `value_type_tuple_t`. +// This is not intended to be evaluated. +template +constexpr std::tuple value_type_tuple_f( + std::tuple tuple); + +/** + * Maps a tuple type `(T1, T2, ...)`, + * to a type `(T1::value_type, T2::value_type, ...)`. + */ +template +using value_type_tuple_t = decltype(value_type_tuple_f(std::declval())); + /** * Given two tuples of vectors, extends each of the fields of `dst` * with the corresponding field of `src`. @@ -131,4 +144,5 @@ std::tuple...> select( } } // namespace stdx + #endif diff --git a/test/support/tdb_rapidcheck.h b/test/support/tdb_rapidcheck.h index 2f7907de580..7b2c0893859 100644 --- a/test/support/tdb_rapidcheck.h +++ b/test/support/tdb_rapidcheck.h @@ -79,8 +79,10 @@ namespace rc { */ template struct NonShrinking { + using value_type = T; + NonShrinking(T&& inner) - : inner_(inner) { + : inner_(std::move(inner)) { } T inner_; diff --git a/tiledb/sm/query/readers/result_coords.h b/tiledb/sm/query/readers/result_coords.h index 79eacf6c133..1572cc133c8 100644 --- a/tiledb/sm/query/readers/result_coords.h +++ b/tiledb/sm/query/readers/result_coords.h @@ -227,7 +227,8 @@ struct GlobalOrderResultCoords * * @return Max slab length that can be merged for this tile. */ - uint64_t max_slab_length() { + uint64_t max_slab_length( + uint64_t upper_bound = std::numeric_limits::max()) const { uint64_t ret = 1; uint64_t cell_num = base::tile_->cell_num(); uint64_t next_pos = base::pos_ + 1; @@ -250,13 +251,14 @@ struct GlobalOrderResultCoords // With bitmap, find the longest contiguous set of bits in the bitmap // from the current position. - while (next_pos < cell_num && bitmap[next_pos] == 1) { + while (ret < upper_bound && next_pos < cell_num && + bitmap[next_pos] == 1) { next_pos++; ret++; } } else { // No bitmap, add all cells from current position. - ret = cell_num - base::pos_; + ret = std::min(upper_bound, cell_num - base::pos_); } return ret; @@ -273,15 +275,38 @@ struct GlobalOrderResultCoords * @param cmp The comparator class. Calling cmp(current, next) should tell us * if current is bigger or equal than next in the order of the comparator. * + * @postcondition this method has no side-effects if it is never called + * concurrently by multiple threads. If multiple threads call this method + * concurrently then results may be undefined. + * * @return Max slab length that can be merged for this tile. */ template uint64_t max_slab_length( - const GlobalOrderLowerBound& next, const CompType& cmp) { + const GlobalOrderLowerBound& next, const CompType& cmp) const { uint64_t cell_num = base::tile_->cell_num(); - // Store the original position. - uint64_t original_pos = base::pos_; + // The method is declared `const` to signal to the caller that it has + // no side effects. Internally we will modify `base::pos_` because + // the comparators need that to get the right coordinate. + // This is only a problem if this is called concurrently (and even then + // it would have been a problem anyway). + uint64_t* basepos = const_cast(&this->pos_); + + // Store the original position and use RAII to ensure we restore it. + struct BasePosGuard { + uint64_t* basepos; + uint64_t original_pos; + + BasePosGuard(uint64_t* basepos) + : basepos(basepos) + , original_pos(*basepos) { + } + ~BasePosGuard() { + *basepos = original_pos; + } + }; + BasePosGuard savepos(basepos); // Max posssible position in the tile. Defaults to the last cell in the // tile, it might get updated if we have a bitmap below. @@ -295,7 +320,7 @@ struct GlobalOrderResultCoords if (base::tile_->has_post_dedup_bmp()) { auto& bitmap = base::tile_->post_dedup_bitmap(); // Current cell is not in the bitmap. - if (!bitmap[base::pos_]) { + if (!bitmap[*basepos]) { return 0; } @@ -303,13 +328,13 @@ struct GlobalOrderResultCoords // return 1. const bool overlapping_ranges = std::is_same::value; if constexpr (overlapping_ranges) { - if (bitmap[base::pos_] != 1) { + if (bitmap[*basepos] != 1) { return 1; } } // Compute max position. - max_pos = base::pos_; + max_pos = *basepos; while (max_pos < cell_num && bitmap[max_pos] == 1) { max_pos++; } @@ -327,14 +352,13 @@ struct GlobalOrderResultCoords // ones. It will never take more comparisons that a linear search. uint64_t power_of_two = 1; bool return_max = true; - while (return_max && base::pos_ != max_pos) { - base::pos_ = std::min(original_pos + power_of_two, max_pos); + while (return_max && *basepos != max_pos) { + *basepos = std::min(savepos.original_pos + power_of_two, max_pos); if (cmp(*this, next)) { return_max = false; // If we exit on first comprarison, return 1. if (power_of_two == 1) { - base::pos_ = original_pos; return 1; } } else { @@ -346,28 +370,26 @@ struct GlobalOrderResultCoords // cell until max_pos is smaller than next. So return the maximum cell // slab. if (return_max) { - base::pos_ = original_pos; - return max_pos - original_pos + 1; + return max_pos - savepos.original_pos + 1; } // We have an upper bound and a lower bound for our search with our power // of twos found above. Run a bisection search in between to find the exact // cell. uint64_t left = power_of_two / 2; - uint64_t right = base::pos_; + uint64_t right = *basepos; while (left != right - 1) { // Check against mid. - base::pos_ = left + (right - left) / 2; + *basepos = left + (right - left) / 2; if (!cmp(*this, next)) { - left = base::pos_; + left = *basepos; } else { - right = base::pos_; + right = *basepos; } } // Restore the original position and return. - base::pos_ = original_pos; - return left - original_pos + 1; + return left - savepos.original_pos + 1; } private: diff --git a/tiledb/sm/query/readers/sparse_global_order_reader.cc b/tiledb/sm/query/readers/sparse_global_order_reader.cc index 4236ae60813..be306131e0a 100644 --- a/tiledb/sm/query/readers/sparse_global_order_reader.cc +++ b/tiledb/sm/query/readers/sparse_global_order_reader.cc @@ -1855,9 +1855,12 @@ SparseGlobalOrderReader::merge_result_cell_slabs( } } else { if (add_next_cell_result == AddNextCellResult::NeedMoreTiles) { - // e.g. because we were hitting duplicate coords and reached - // the end of a tile while de-duplicating - length = 1; + // This happens while de-duplicating, so this `NeedMoreTiles` + // came from a different fragment. Emit this coordinate + // (if it qualifies) but not anything past it, since they might + // need to be de-duplicated with coordinates from the next round + // of tiles. + length = to_process.max_slab_length(1); } else if (tile_queue.empty()) { length = to_process.max_slab_length(); } else {