Skip to content

Commit

Permalink
feat(fuzzer): Add TopNRowNumberFuzzer
Browse files Browse the repository at this point in the history
  • Loading branch information
aditi-pandit committed Feb 18, 2025
1 parent b72a827 commit 2d7ec1a
Show file tree
Hide file tree
Showing 13 changed files with 958 additions and 314 deletions.
28 changes: 22 additions & 6 deletions velox/exec/fuzzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ target_link_libraries(
velox_type_parser
Folly::folly
velox_hive_connector
velox_dwio_dwrf_reader
velox_dwio_dwrf_writer
velox_dwio_catalog_fbhive
velox_dwio_faulty_file_sink)
Expand Down Expand Up @@ -79,22 +78,39 @@ target_link_libraries(
velox_aggregation_fuzzer_base
velox_temp_path)

add_library(velox_row_number_fuzzer_base_lib RowNumberFuzzerBase.cpp)

target_link_libraries(
velox_row_number_fuzzer_base_lib
velox_dwio_dwrf_reader
velox_fuzzer_util
velox_vector_fuzzer
velox_exec_test_lib)

add_library(velox_row_number_fuzzer_lib RowNumberFuzzer.cpp)

target_link_libraries(
velox_row_number_fuzzer_lib velox_row_number_fuzzer_base_lib velox_type
velox_expression_test_utility)

# RowNumber Fuzzer.
add_executable(velox_row_number_fuzzer RowNumberFuzzerRunner.cpp)

target_link_libraries(
velox_row_number_fuzzer velox_row_number_fuzzer_lib)

add_library(velox_topn_row_number_fuzzer_lib TopNRowNumberFuzzer.cpp)

target_link_libraries(
velox_row_number_fuzzer_lib
velox_fuzzer_util
velox_type
velox_vector_fuzzer
velox_exec_test_lib
velox_topn_row_number_fuzzer_lib velox_row_number_fuzzer_base_lib velox_type
velox_expression_test_utility)

# TopNRowNumber Fuzzer.
add_executable(velox_topn_row_number_fuzzer TopNRowNumberFuzzerRunner.cpp)

target_link_libraries(
velox_topn_row_number_fuzzer velox_topn_row_number_fuzzer_lib)

add_library(velox_join_fuzzer JoinFuzzer.cpp)

target_link_libraries(
Expand Down
51 changes: 51 additions & 0 deletions velox/exec/fuzzer/DuckQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ std::optional<std::string> DuckQueryRunner::toSql(
return toSql(rowNumberNode);
}

if (const auto topNRowNumberNode =
std::dynamic_pointer_cast<const core::TopNRowNumberNode>(plan)) {
return toSql(topNRowNumberNode);
}

if (const auto joinNode =
std::dynamic_pointer_cast<const core::HashJoinNode>(plan)) {
return toSql(joinNode);
Expand Down Expand Up @@ -377,4 +382,50 @@ std::optional<std::string> DuckQueryRunner::toSql(

return sql.str();
}

std::optional<std::string> DuckQueryRunner::toSql(
const std::shared_ptr<const core::TopNRowNumberNode>& topNRowNumberNode) {
std::stringstream sql;
sql << "SELECT * FROM (SELECT ";

const auto& inputType = topNRowNumberNode->sources()[0]->outputType();
for (auto i = 0; i < inputType->size(); ++i) {
appendComma(i, sql);
sql << inputType->nameOf(i);
}

sql << ", row_number() OVER (";

const auto& partitionKeys = topNRowNumberNode->partitionKeys();
if (!partitionKeys.empty()) {
sql << "partition by ";
for (auto i = 0; i < partitionKeys.size(); ++i) {
appendComma(i, sql);
sql << partitionKeys[i]->name();
}
}

const auto& sortingKeys = topNRowNumberNode->sortingKeys();
const auto& sortingOrders = topNRowNumberNode->sortingOrders();

if (!sortingKeys.empty()) {
sql << " ORDER BY ";
for (auto j = 0; j < sortingKeys.size(); ++j) {
appendComma(j, sql);
sql << sortingKeys[j]->name() << " " << sortingOrders[j].toString();
}
}

// TopNRowNumberNode should have a single source.
std::optional<std::string> source = toSql(topNRowNumberNode->sources()[0]);
if (!source) {
return std::nullopt;
}
sql << ") as row_number FROM " << *source << ") ";

sql << " where row_number <= " << topNRowNumberNode->limit();

return sql.str();
}

} // namespace facebook::velox::exec::test
3 changes: 3 additions & 0 deletions velox/exec/fuzzer/DuckQueryRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class DuckQueryRunner : public ReferenceQueryRunner {
std::optional<std::string> toSql(
const std::shared_ptr<const core::RowNumberNode>& rowNumberNode);

std::optional<std::string> toSql(
const std::shared_ptr<const core::TopNRowNumberNode>& topNRowNumberNode);

std::unordered_set<std::string> aggregateFunctionNames_;
};

Expand Down
6 changes: 6 additions & 0 deletions velox/exec/fuzzer/FuzzerUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "velox/dwio/dwrf/writer/Writer.h"
#include "velox/exec/fuzzer/DuckQueryRunner.h"
#include "velox/exec/fuzzer/PrestoQueryRunner.h"
#include "velox/exec/tests/utils/TempDirectoryPath.h"
#include "velox/expression/SignatureBinder.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"

Expand Down Expand Up @@ -205,6 +206,11 @@ bool isTableScanSupported(const TypePtr& type) {
if (type->kind() == TypeKind::HUGEINT) {
return false;
}
// Disable testing with TableScan when input contains TIMESTAMP type, due to
// the issue #8127.
if (type->kind() == TypeKind::TIMESTAMP) {
return false;
}

for (auto i = 0; i < type->size(); ++i) {
if (!isTableScanSupported(type->childAt(i))) {
Expand Down
2 changes: 1 addition & 1 deletion velox/exec/fuzzer/FuzzerUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ TypePtr sanitizeTryResolveType(
const std::unordered_map<std::string, TypePtr>& typeVariablesBindings,
std::unordered_map<std::string, int>& integerVariablesBindings);

// Invoked to set up memory system with arbitration.
/// Invoked to set up memory system with arbitration.
void setupMemory(
int64_t allocatorCapacity,
int64_t arbitratorCapacity,
Expand Down
54 changes: 54 additions & 0 deletions velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ std::optional<std::string> PrestoQueryRunner::toSql(
return toSql(rowNumberNode);
}

if (const auto topNRowNumberNode =
std::dynamic_pointer_cast<const core::TopNRowNumberNode>(plan)) {
return toSql(topNRowNumberNode);
}

if (auto tableWriteNode =
std::dynamic_pointer_cast<const core::TableWriteNode>(plan)) {
return toSql(tableWriteNode);
Expand Down Expand Up @@ -497,6 +502,55 @@ std::optional<std::string> PrestoQueryRunner::toSql(
return sql.str();
}

std::optional<std::string> PrestoQueryRunner::toSql(
const std::shared_ptr<const core::TopNRowNumberNode>& topNRowNumberNode) {
if (!isSupportedDwrfType(topNRowNumberNode->sources()[0]->outputType())) {
return std::nullopt;
}

std::stringstream sql;
sql << "SELECT * FROM (SELECT ";

const auto& inputType = topNRowNumberNode->sources()[0]->outputType();
for (auto i = 0; i < inputType->size(); ++i) {
appendComma(i, sql);
sql << inputType->nameOf(i);
}

sql << ", row_number() OVER (";

const auto& partitionKeys = topNRowNumberNode->partitionKeys();
if (!partitionKeys.empty()) {
sql << "partition by ";
for (auto i = 0; i < partitionKeys.size(); ++i) {
appendComma(i, sql);
sql << partitionKeys[i]->name();
}
}

const auto& sortingKeys = topNRowNumberNode->sortingKeys();
const auto& sortingOrders = topNRowNumberNode->sortingOrders();

if (!sortingKeys.empty()) {
sql << " ORDER BY ";
for (auto j = 0; j < sortingKeys.size(); ++j) {
appendComma(j, sql);
sql << sortingKeys[j]->name() << " " << sortingOrders[j].toString();
}
}

// TopNRowNumberNode should have a single source.
std::optional<std::string> source = toSql(topNRowNumberNode->sources()[0]);
if (!source) {
return std::nullopt;
}
sql << ") as row_number FROM " << *source << ") ";

sql << " where row_number <= " << topNRowNumberNode->limit();

return sql.str();
}

std::optional<std::string> PrestoQueryRunner::toSql(
const std::shared_ptr<const core::TableWriteNode>& tableWriteNode) {
auto insertTableHandle =
Expand Down
3 changes: 3 additions & 0 deletions velox/exec/fuzzer/PrestoQueryRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner {
std::optional<std::string> toSql(
const std::shared_ptr<const velox::core::RowNumberNode>& rowNumberNode);

std::optional<std::string> toSql(
const std::shared_ptr<const core::TopNRowNumberNode>& rowNumberNode);

std::optional<std::string> toSql(
const std::shared_ptr<const core::TableWriteNode>& tableWriteNode);

Expand Down
Loading

0 comments on commit 2d7ec1a

Please sign in to comment.