Skip to content

Commit

Permalink
Adding extra in memory offset for var tiles on read path. (#4294)
Browse files Browse the repository at this point in the history
* Adding extra in memory offset for var tiles on read path.

This change adds one extra in memory offset for the var fields on the read path. This way we don't have to process the last cell of a var tile in special cases anymore.

---
TYPE: IMPROVEMENT
DESC: Adding extra in memory offset for var tiles on read path.

* Addressing feedback from @davisp.

* Add offset_t.

* Fix windows build.
  • Loading branch information
KiterLuc authored Sep 14, 2023
1 parent 6b2e9b1 commit d02d9c2
Show file tree
Hide file tree
Showing 40 changed files with 550 additions and 769 deletions.
2 changes: 1 addition & 1 deletion test/src/unit-DenseTiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ void DenseTilerFx::close_array() {
template <class T>
bool DenseTilerFx::check_tile(WriterTile& tile, const std::vector<T>& data) {
std::vector<T> tile_data(data.size());
CHECK(tile.size() == data.size() * sizeof(T));
CHECK(tile.size_as<T>() == data.size());
CHECK(tile.read(&tile_data[0], 0, data.size() * sizeof(T)).ok());
CHECK(tile_data == data);
return tile_data == data;
Expand Down
10 changes: 5 additions & 5 deletions test/src/unit-ReadCellSlabIter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -510,9 +510,9 @@ TEST_CASE_METHOD(

// Create result coordinates
std::vector<ResultCoords> result_coords;
ResultTile result_tile_2_0(1, 0, array_schema);
ResultTile result_tile_3_0(2, 0, array_schema);
ResultTile result_tile_3_1(2, 1, array_schema);
ResultTile result_tile_2_0(1, 0, *fragments[0]);
ResultTile result_tile_3_0(2, 0, *fragments[0]);
ResultTile result_tile_3_1(2, 1, *fragments[1]);

set_result_tile_dim(
array_schema, result_tile_2_0, "d", 0, {{1000, 3, 1000, 5}});
Expand Down Expand Up @@ -1361,8 +1361,8 @@ TEST_CASE_METHOD(

// Create result coordinates
std::vector<ResultCoords> result_coords;
ResultTile result_tile_3_0(2, 0, array_schema);
ResultTile result_tile_3_1(2, 1, array_schema);
ResultTile result_tile_3_0(2, 0, *fragments[0]);
ResultTile result_tile_3_1(2, 1, *fragments[1]);

set_result_tile_dim(
array_schema, result_tile_3_0, "d1", 0, {{1000, 3, 1000, 1000}});
Expand Down
10 changes: 5 additions & 5 deletions test/src/unit-Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,15 @@ TEST_CASE_METHOD(
CHECK(result_space_tiles.size() == 6);

// Result tiles for fragment #1
ResultTile result_tile_1_0_1(1, 0, *(schema.get()));
ResultTile result_tile_1_2_1(1, 2, *(schema.get()));
ResultTile result_tile_1_0_1(1, 0, *fragments[0]);
ResultTile result_tile_1_2_1(1, 2, *fragments[0]);

// Result tiles for fragment #2
ResultTile result_tile_1_0_2(2, 0, *(schema.get()));
ResultTile result_tile_1_0_2(2, 0, *fragments[1]);

// Result tiles for fragment #3
ResultTile result_tile_2_0_3(3, 0, *(schema.get()));
ResultTile result_tile_3_0_3(3, 2, *(schema.get()));
ResultTile result_tile_2_0_3(3, 0, *fragments[2]);
ResultTile result_tile_3_0_3(3, 2, *fragments[2]);

// Initialize result_space_tiles
ResultSpaceTile<int32_t> rst_1_0;
Expand Down
4 changes: 2 additions & 2 deletions test/src/unit-dense-reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1296,9 +1296,9 @@ TEST_CASE_METHOD(
a2_offsets.data(),
&a2_offsets_size);

// First var tiles is 91 and subequent are 100 bytes, this will only allow to
// First var tiles is 99 and subequent are 108 bytes, this will only allow to
// load two tiles the first loop and one on the subsequents.
tile_upper_memory_limit_ = "384";
tile_upper_memory_limit_ = "416";
update_config();

// Try to read.
Expand Down
77 changes: 38 additions & 39 deletions test/src/unit-result-coords.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,13 @@ struct CResultCoordsFx {
std::string array_name_;
const char* ARRAY_NAME = "test_result_coords";
tiledb_array_t* array_;
std::unique_ptr<FragmentMetadata> frag_md;
std::unique_ptr<FragmentMetadata> frag_md_;

CResultCoordsFx();
CResultCoordsFx(uint64_t num_cells);
~CResultCoordsFx();

GlobalOrderResultTile<uint8_t> make_tile_with_num_cells(uint64_t num_cells);
};

CResultCoordsFx::CResultCoordsFx() {
CResultCoordsFx::CResultCoordsFx(uint64_t num_cells) {
tiledb_config_t* config;
tiledb_error_t* error = nullptr;
REQUIRE(tiledb_config_alloc(&config, &error) == TILEDB_OK);
Expand All @@ -81,8 +79,8 @@ CResultCoordsFx::CResultCoordsFx() {
create_dir(temp_dir_, ctx_, vfs_);
array_name_ = temp_dir_ + ARRAY_NAME;

int64_t domain[] = {1, 10};
int64_t tile_extent = 5;
int64_t domain[] = {1, 2 * static_cast<int64_t>(num_cells)};
int64_t tile_extent = num_cells;
// Create array
create_array(
ctx_,
Expand All @@ -98,15 +96,15 @@ CResultCoordsFx::CResultCoordsFx() {
{tiledb::test::Compressor(TILEDB_FILTER_NONE, -1)},
TILEDB_ROW_MAJOR,
TILEDB_ROW_MAJOR,
5);
num_cells);

// Open array for reading.
auto rc = tiledb_array_alloc(ctx_, array_name_.c_str(), &array_);
REQUIRE(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array_, TILEDB_READ);
REQUIRE(rc == TILEDB_OK);

frag_md.reset(new FragmentMetadata(
frag_md_.reset(new FragmentMetadata(
nullptr,
nullptr,
array_->array_->array_schema_latest_ptr(),
Expand All @@ -125,26 +123,21 @@ CResultCoordsFx::~CResultCoordsFx() {
tiledb_vfs_free(&vfs_);
}

GlobalOrderResultTile<uint8_t> CResultCoordsFx::make_tile_with_num_cells(
uint64_t num_cells) {
GlobalOrderResultTile<uint8_t> result_tile(0, 0, false, false, *frag_md);
ResultTile::TileSizes tile_sizes(
num_cells * sizeof(int64_t),
0,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt);
ResultTile::TileData tile_data{nullptr, nullptr, nullptr};
result_tile.init_attr_tile(
constants::format_version,
array_->array_->array_schema_latest(),
constants::coords,
tile_sizes,
tile_data);

return result_tile;
}
struct CResultCoordsFxSmall {
CResultCoordsFxSmall()
: fx_(5) {
}

CResultCoordsFx fx_;
};

struct CResultCoordsFxLarge {
CResultCoordsFxLarge()
: fx_(100) {
}

CResultCoordsFx fx_;
};

/* ********************************* */
/* TESTS */
Expand All @@ -168,10 +161,10 @@ class Cmp {
};

TEST_CASE_METHOD(
CResultCoordsFx,
CResultCoordsFxSmall,
"GlobalOrderResultCoords: test max_slab_length",
"[globalorderresultcoords][max_slab_length]") {
auto tile = make_tile_with_num_cells(5);
GlobalOrderResultTile<uint8_t> tile(0, 0, false, false, *fx_.frag_md_);

// Test max_slab_length with no bitmap.
GlobalOrderResultCoords rc1(&tile, 1);
Expand All @@ -195,10 +188,10 @@ TEST_CASE_METHOD(
}

TEST_CASE_METHOD(
CResultCoordsFx,
CResultCoordsFxSmall,
"GlobalOrderResultCoords: test max_slab_length with comparator",
"[globalorderresultcoords][max_slab_length_with_comp]") {
auto tile = make_tile_with_num_cells(5);
GlobalOrderResultTile<uint8_t> tile(0, 0, false, false, *fx_.frag_md_);
Cmp cmp;

// Test max_slab_length with no bitmap and comparator.
Expand All @@ -222,24 +215,30 @@ TEST_CASE_METHOD(
tile.count_cells();
rc1.pos_ = 0;
REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 3), cmp) == 0);
}

auto tile2 = make_tile_with_num_cells(100);
rc1.tile_ = &tile2;
TEST_CASE_METHOD(
CResultCoordsFxLarge,
"GlobalOrderResultCoords: test max_slab_length with comparator, large tile",
"[globalorderresultcoords][max_slab_length_with_comp]") {
GlobalOrderResultTile<uint8_t> tile(0, 0, false, false, *fx_.frag_md_);
Cmp cmp;

GlobalOrderResultCoords rc1(&tile, 1);
for (uint64_t i = 0; i < 100; i++) {
for (uint64_t j = i + 1; j < 100; j++) {
rc1.pos_ = i;
REQUIRE(
rc1.max_slab_length(GlobalOrderResultCoords(&tile2, j), cmp) ==
j - i);
rc1.max_slab_length(GlobalOrderResultCoords(&tile, j), cmp) == j - i);
}
}
}

TEST_CASE_METHOD(
CResultCoordsFx,
CResultCoordsFxSmall,
"GlobalOrderResultCoords: advance_to_next_cell",
"[globalorderresultcoords][advance_to_next_cell]") {
auto tile = make_tile_with_num_cells(5);
GlobalOrderResultTile<uint8_t> tile(0, 0, false, false, *fx_.frag_md_);
Cmp cmp;

GlobalOrderResultCoords rc1(&tile, 0);
Expand Down
38 changes: 26 additions & 12 deletions test/src/unit-result-tile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,19 @@ TEST_CASE_METHOD(
uint64_t num_cells = 8;

auto& array_schema = array_->array_->array_schema_latest();
ResultTile rt(0, 0, array_schema);
FragmentMetadata frag_md(
nullptr,
nullptr,
array_->array_->array_schema_latest_ptr(),
URI(),
std::make_pair<uint64_t, uint64_t>(0, 0),
true);
ResultTile rt(0, 0, frag_md);

// Make sure cell_num() will return the correct value.
if (!first_dim) {
ResultTile::TileSizes tile_sizes(
num_cells * constants::cell_var_offset_size,
(num_cells + 1) * constants::cell_var_offset_size,
0,
0,
0,
Expand All @@ -208,7 +215,7 @@ TEST_CASE_METHOD(
}

ResultTile::TileSizes tile_sizes(
num_cells * constants::cell_var_offset_size,
(num_cells + 1) * constants::cell_var_offset_size,
0,
num_cells,
0,
Expand All @@ -227,13 +234,13 @@ TEST_CASE_METHOD(
Tile* const t_var = &tile_tuple->var_tile();

// Initialize offsets, use 1 character strings.
uint64_t* offsets = (uint64_t*)t->data();
for (uint64_t i = 0; i < num_cells; i++) {
offsets_t* offsets = t->data_as<offsets_t>();
for (uint64_t i = 0; i < num_cells + 1; i++) {
offsets[i] = i;
}

// Initialize data, use incrementing single string values starting with 'a'.
char* var = (char*)t_var->data();
char* var = t_var->data_as<char>();
for (uint64_t i = 0; i < num_cells; i++) {
var[i] = 'a' + i;
}
Expand Down Expand Up @@ -289,12 +296,19 @@ TEST_CASE_METHOD(
uint64_t num_cells = 8;

auto& array_schema = array_->array_->array_schema_latest();
ResultTile rt(0, 0, array_schema);
FragmentMetadata frag_md(
nullptr,
nullptr,
array_->array_->array_schema_latest_ptr(),
URI(),
std::make_pair<uint64_t, uint64_t>(0, 0),
true);
ResultTile rt(0, 0, frag_md);

// Make sure cell_num() will return the correct value.
if (!first_dim) {
ResultTile::TileSizes tile_sizes(
num_cells * constants::cell_var_offset_size,
(num_cells + 1) * constants::cell_var_offset_size,
0,
0,
0,
Expand All @@ -311,7 +325,7 @@ TEST_CASE_METHOD(
}

ResultTile::TileSizes tile_sizes(
num_cells * constants::cell_var_offset_size,
(num_cells + 1) * constants::cell_var_offset_size,
0,
num_cells,
0,
Expand All @@ -330,13 +344,13 @@ TEST_CASE_METHOD(
Tile* const t_var = &tile_tuple->var_tile();

// Initialize offsets, use 1 character strings.
uint64_t* offsets = (uint64_t*)t->data();
for (uint64_t i = 0; i < num_cells; i++) {
offsets_t* offsets = t->data_as<offsets_t>();
for (uint64_t i = 0; i < num_cells + 1; i++) {
offsets[i] = i;
}

// Initialize data, use incrementing single string values starting with 'a'.
char* var = (char*)t_var->data();
char* var = t_var->data_as<char>();
for (uint64_t i = 0; i < num_cells; i++) {
var[i] = 'a' + i;
}
Expand Down
14 changes: 7 additions & 7 deletions test/src/unit-tile-metadata-generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ TEMPLATE_LIST_TEST_CASE(
nullable,
cell_val_num * sizeof(T),
tiledb_type);
auto tile_buff = (T*)writer_tile.fixed_tile().data();
auto tile_buff = writer_tile.fixed_tile().data_as<T>();
uint8_t* nullable_buff = nullptr;
if (nullable) {
nullable_buff = (uint8_t*)writer_tile.validity_tile().data();
nullable_buff = writer_tile.validity_tile().data_as<uint8_t>();
}

// Compute correct values as the tile is filled with data.
Expand Down Expand Up @@ -264,7 +264,7 @@ TEMPLATE_LIST_TEST_CASE(
// Initialize a new tile.
auto tiledb_type = static_cast<Datatype>(type.tiledb_type);
WriterTileTuple writer_tile(schema, 4, false, false, sizeof(T), tiledb_type);
auto tile_buff = (T*)writer_tile.fixed_tile().data();
auto tile_buff = writer_tile.fixed_tile().data_as<T>();

// Once an overflow happens, the computation should abort, try to add a few
// min values after the overflow to confirm.
Expand All @@ -290,7 +290,7 @@ TEMPLATE_LIST_TEST_CASE(
// Initialize a new tile.
WriterTileTuple writer_tile(
schema, 4, false, false, sizeof(T), tiledb_type);
auto tile_buff = (T*)writer_tile.fixed_tile().data();
auto tile_buff = writer_tile.fixed_tile().data_as<T>();

// Once an overflow happens, the computation should abort, try to add a few
// max values after the overflow to confirm.
Expand Down Expand Up @@ -357,12 +357,12 @@ TEST_CASE(
// Initialize tile.
WriterTileTuple writer_tile(
schema, num_cells, true, nullable, 1, Datatype::CHAR);
auto offsets_tile_buff = (uint64_t*)writer_tile.offset_tile().data();
auto offsets_tile_buff = writer_tile.offset_tile().data_as<offsets_t>();

// Initialize a new nullable tile.
uint8_t* nullable_buff = nullptr;
if (nullable) {
nullable_buff = (uint8_t*)writer_tile.validity_tile().data();
nullable_buff = writer_tile.validity_tile().data_as<uint8_t>();
}

// Compute correct values as the tile is filled with data.
Expand Down Expand Up @@ -440,7 +440,7 @@ TEST_CASE(
// Store '123' and '12'
// Initialize offsets tile.
WriterTileTuple writer_tile(schema, 2, true, false, 1, Datatype::CHAR);
auto offsets_tile_buff = (uint64_t*)writer_tile.offset_tile().data();
auto offsets_tile_buff = writer_tile.offset_tile().data_as<offsets_t>();
offsets_tile_buff[0] = 0;
offsets_tile_buff[1] = 3;

Expand Down
5 changes: 5 additions & 0 deletions tiledb/common/common-std.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ using storage_size_t = uint64_t;
*/
using format_version_t = uint32_t;

/**
* Type for anything offsets related.
*/
using offsets_t = uint64_t;

/*
* Value manipulation
*/
Expand Down
Loading

0 comments on commit d02d9c2

Please sign in to comment.