Skip to content

Commit

Permalink
Subarray Rework B: FieldDataSize (#4891)
Browse files Browse the repository at this point in the history
Add `struct FieldDataSize`. There are already two incorrect versions of
this within `class Subarray`, one which uses `double` (??!) and one
which uses `uint64_t`. The correct type for sizes of objects held within
program memory is `size_t`, per the C++ standard.

Change return type of `get_max_memory_size` to `FieldDataSize` and
remove output arguments. Combine all four `get_max_memory_size*`
functions into a single one, thus removing both copypasta and extraneous
checks.

Change return type of `Subarray::get_est_result_size` to `FieldDataView`
and remove output arguments. Combine all four `get_est_result_size*`
functions into a single one, thus again removing both copypasta and
extraneous checks. Rework the `Query::get_est_result_size_*` functions
that directly serve the C API. Rationalize their names. Move validation
for `Query::get_est_result_size_*` to separate functions, which yet
again removes a large amount of copypasta. Add null pointer checks to C
API functions `tiledb_query_est_result_size*`. Previous null checks were
not exhaustive and did not happen in the C API implementation function.

Fix defect in `ArraySchema::is_field`, which did not consider a
dimension label as a field.

Removed unused function `get_next_range_coords`.

Added another item to `.gitignore` to cope with an increase requirement
from AWS for short lengths in the build directory prefix.

<long description>

---
TYPE: NO_HISTORY
DESC: Subarray Rework B: FieldDataSize
  • Loading branch information
eric-hughes-tiledb authored Apr 26, 2024
1 parent 5298c22 commit d1a5f22
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 772 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.vscode*
*.sw?
build/*
build-*/*
dist/*
cmake-build-*/*
core/bin/*
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/array_schema/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,8 @@ bool ArraySchema::is_dim_label(const std::string& name) const {
}

bool ArraySchema::is_field(const std::string& name) const {
return is_attr(name) || is_dim(name) || is_special_attribute(name);
return is_attr(name) || is_dim(name) || is_dim_label(name) ||
is_special_attribute(name);
}

bool ArraySchema::is_nullable(const std::string& name) const {
Expand Down
71 changes: 56 additions & 15 deletions tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1541,11 +1541,17 @@ int32_t tiledb_query_get_est_result_size(
const tiledb_query_t* query,
const char* name,
uint64_t* size) {
if (sanity_check(ctx, query) == TILEDB_ERR)
if (sanity_check(ctx, query) == TILEDB_ERR) {
return TILEDB_ERR;

throw_if_not_ok(query->query_->get_est_result_size(name, size));

}
if (name == nullptr) {
throw CAPIStatusException("Pointer to field name may not be NULL");
}
if (size == nullptr) {
throw CAPIStatusException("Pointer to size may not be NULL");
}
auto est_size{query->query_->get_est_result_size_fixed_nonnull(name)};
*size = est_size.fixed_;
return TILEDB_OK;
}

Expand All @@ -1555,10 +1561,22 @@ int32_t tiledb_query_get_est_result_size_var(
const char* name,
uint64_t* size_off,
uint64_t* size_val) {
if (sanity_check(ctx, query) == TILEDB_ERR)
if (sanity_check(ctx, query) == TILEDB_ERR) {
return TILEDB_ERR;
}
if (name == nullptr) {
throw CAPIStatusException("Pointer to field name may not be NULL");
}
if (size_off == nullptr) {
throw CAPIStatusException("Pointer to offset size may not be NULL");
}
if (size_val == nullptr) {
throw CAPIStatusException("Pointer to value size may not be NULL");
}

throw_if_not_ok(query->query_->get_est_result_size(name, size_off, size_val));
auto est_size{query->query_->get_est_result_size_variable_nonnull(name)};
*size_off = est_size.fixed_;
*size_val = est_size.variable_;

return TILEDB_OK;
}
Expand All @@ -1569,12 +1587,22 @@ int32_t tiledb_query_get_est_result_size_nullable(
const char* name,
uint64_t* size_val,
uint64_t* size_validity) {
if (sanity_check(ctx, query) == TILEDB_ERR)
if (sanity_check(ctx, query) == TILEDB_ERR) {
return TILEDB_ERR;
}
if (name == nullptr) {
throw CAPIStatusException("Pointer to field name may not be NULL");
}
if (size_val == nullptr) {
throw CAPIStatusException("Pointer to value size may not be NULL");
}
if (size_validity == nullptr) {
throw CAPIStatusException("Pointer to validity size may not be NULL");
}

throw_if_not_ok(query->query_->get_est_result_size_nullable(
name, size_val, size_validity));

auto est_size{query->query_->get_est_result_size_fixed_nullable(name)};
*size_val = est_size.fixed_;
*size_validity = est_size.validity_;
return TILEDB_OK;
}

Expand All @@ -1585,12 +1613,25 @@ int32_t tiledb_query_get_est_result_size_var_nullable(
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity) {
if (sanity_check(ctx, query) == TILEDB_ERR)
if (sanity_check(ctx, query) == TILEDB_ERR) {
return TILEDB_ERR;

throw_if_not_ok(query->query_->get_est_result_size_nullable(
name, size_off, size_val, size_validity));

}
if (name == nullptr) {
throw CAPIStatusException("Pointer to field name may not be NULL");
}
if (size_off == nullptr) {
throw CAPIStatusException("Pointer to offset size may not be NULL");
}
if (size_val == nullptr) {
throw CAPIStatusException("Pointer to value size may not be NULL");
}
if (size_validity == nullptr) {
throw CAPIStatusException("Pointer to validity size may not be NULL");
}
auto est_size{query->query_->get_est_result_size_variable_nullable(name)};
*size_off = est_size.fixed_;
*size_val = est_size.variable_;
*size_validity = est_size.validity_;
return TILEDB_OK;
}

Expand Down
215 changes: 77 additions & 138 deletions tiledb/sm/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,172 +151,111 @@ Query::~Query() {
/* API */
/* ****************************** */

Status Query::get_est_result_size(const char* name, uint64_t* size) {
if (type_ != QueryType::READ) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Operation currently "
"only unsupported for read queries"));
void Query::field_require_array_fixed(
const std::string_view origin, const char* name) {
if (!array_schema_->is_field(name)) {
throw QueryException(
std::string{origin} + ": '" + name + "' is not an array field");
}

if (name == nullptr) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Name cannot be null"));
if (array_schema_->var_size(name)) {
throw QueryException(
std::string{origin} + ": '" + name + "' is not fixed-sized");
}
}

if (name == constants::coords &&
!array_schema_->domain().all_dims_same_type()) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Not applicable to zipped "
"coordinates in arrays with heterogeneous domain"));
void Query::field_require_array_variable(
const std::string_view origin, const char* name) {
if (!array_schema_->is_field(name)) {
throw QueryException(
std::string{origin} + ": '" + name + "' is not an array field");
}

if (name == constants::coords && !array_schema_->domain().all_dims_fixed()) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Not applicable to zipped "
"coordinates in arrays with domains with variable-sized dimensions"));
if (!array_schema_->var_size(name)) {
throw QueryException(
std::string{origin} + ": '" + name + "' is not variable-sized");
}
}

if (array_schema_->is_nullable(name)) {
return logger_->status(Status_QueryError(
std::string(
"Cannot get estimated result size; Input attribute/dimension '") +
name + "' is nullable"));
void Query::field_require_array_nullable(
const std::string_view origin, const char* name) {
if (!array_schema_->attribute(name)) {
throw QueryException(
std::string{origin} + ": '" + name +
"' is not the name of an attribute");
}

if (array_->is_remote() && !subarray_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr) {
return logger_->status(
Status_QueryError("Error in query estimate result size; remote "
"array with no rest client."));
}

RETURN_NOT_OK(
rest_client->get_query_est_result_sizes(array_->array_uri(), this));
if (!array_schema_->is_nullable(name)) {
throw QueryException(
std::string{origin} + ": attribute '" + name + "' is not nullable");
}

subarray_.get_est_result_size_internal(
name, size, &config_, storage_manager_->compute_tp());
return Status::Ok();
}

Status Query::get_est_result_size(
const char* name, uint64_t* size_off, uint64_t* size_val) {
if (type_ != QueryType::READ) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Operation currently "
"only supported for read queries"));
}

void Query::field_require_array_nonnull(
const std::string_view origin, const char* name) {
if (array_schema_->is_nullable(name)) {
return logger_->status(Status_QueryError(
std::string(
"Cannot get estimated result size; Input attribute/dimension '") +
name + "' is nullable"));
}

if (array_->is_remote() && !subarray_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr) {
return logger_->status(
Status_QueryError("Error in query estimate result size; remote "
"array with no rest client."));
}

RETURN_NOT_OK(
rest_client->get_query_est_result_sizes(array_->array_uri(), this));
throw QueryException(
std::string(origin) + ": field '" + name +
"' is not a nonnull array field");
}

subarray_.get_est_result_size(
name, size_off, size_val, &config_, storage_manager_->compute_tp());
return Status::Ok();
}

Status Query::get_est_result_size_nullable(
const char* name, uint64_t* size_val, uint64_t* size_validity) {
if (type_ != QueryType::READ) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Operation currently "
"only supported for read queries"));
}

if (name == nullptr) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Name cannot be null"));
}
constexpr std::string_view origin_est_result_size{
"query estimated result size"};

if (!array_schema_->attribute(name)) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Nullable API is only"
"applicable to attributes"));
}

if (!array_schema_->is_nullable(name)) {
return logger_->status(Status_QueryError(
std::string("Cannot get estimated result size; Input attribute '") +
name + "' is not nullable"));
FieldDataSize Query::internal_est_result_size(const char* name) {
if (type_ != QueryType::READ) {
throw QueryException(
std::string{origin_est_result_size} +
": operation currently supported only for read queries");
}

if (array_->is_remote() && !subarray_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr) {
return logger_->status(
Status_QueryError("Error in query estimate result size; remote "
"array with no rest client."));
throw QueryStatusException(
"Error in query estimate result size; "
"remote array with no rest client.");
}

RETURN_NOT_OK(
throw_if_not_ok(
rest_client->get_query_est_result_sizes(array_->array_uri(), this));
}

subarray_.get_est_result_size_nullable(
name, size_val, size_validity, &config_, storage_manager_->compute_tp());
return Status::Ok();
return subarray_.get_est_result_size(
name, &config_, storage_manager_->compute_tp());
}

Status Query::get_est_result_size_nullable(
const char* name,
uint64_t* size_off,
uint64_t* size_val,
uint64_t* size_validity) {
if (type_ != QueryType::READ) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Operation currently "
"only supported for read queries"));
}

if (!array_schema_->attribute(name)) {
return logger_->status(Status_QueryError(
"Cannot get estimated result size; Nullable API is only"
"applicable to attributes"));
}

if (!array_schema_->is_nullable(name)) {
return logger_->status(Status_QueryError(
std::string("Cannot get estimated result size; Input attribute '") +
name + "' is not nullable"));
FieldDataSize Query::get_est_result_size_fixed_nonnull(const char* name) {
field_require_array_fixed(origin_est_result_size, name);
if (name == constants::coords) {
if (!array_schema_->domain().all_dims_same_type()) {
throw QueryException(
std::string{origin_est_result_size} +
": not applicable to zipped coordinates "
"in arrays with heterogeneous domain");
}
if (!array_schema_->domain().all_dims_fixed()) {
throw QueryException(
std::string{origin_est_result_size} +
": not applicable to zipped coordinates "
"in arrays with domains with variable-sized dimensions");
}
}
field_require_array_nonnull(origin_est_result_size, name);
return internal_est_result_size(name);
}

if (array_->is_remote() && !subarray_.est_result_size_computed()) {
auto rest_client = storage_manager_->rest_client();
if (rest_client == nullptr) {
return logger_->status(
Status_QueryError("Error in query estimate result size; remote "
"array with no rest client."));
}
FieldDataSize Query::get_est_result_size_variable_nonnull(const char* name) {
field_require_array_variable(origin_est_result_size, name);
field_require_array_nonnull(origin_est_result_size, name);
return internal_est_result_size(name);
}

RETURN_NOT_OK(
rest_client->get_query_est_result_sizes(array_->array_uri(), this));
}
FieldDataSize Query::get_est_result_size_fixed_nullable(const char* name) {
field_require_array_fixed(origin_est_result_size, name);
field_require_array_nullable(origin_est_result_size, name);
return internal_est_result_size(name);
}

subarray_.get_est_result_size_nullable(
name,
size_off,
size_val,
size_validity,
&config_,
storage_manager_->compute_tp());
return Status::Ok();
FieldDataSize Query::get_est_result_size_variable_nullable(const char* name) {
field_require_array_variable(origin_est_result_size, name);
field_require_array_nullable(origin_est_result_size, name);
return internal_est_result_size(name);
}

std::unordered_map<std::string, Subarray::ResultSize>
Expand Down
Loading

0 comments on commit d1a5f22

Please sign in to comment.