Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(clp-s): Add end-to-end test case for compression and extraction. #595

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5382e80
Adding end to end test case for clp-s
AVMatthews Nov 18, 2024
acce22c
Add required jq install
AVMatthews Nov 18, 2024
ad187f7
Additional error checking and small modification to test input files
AVMatthews Nov 19, 2024
692d8d8
Add install and check for diff command
AVMatthews Nov 19, 2024
8f76f0a
Adding extra debug prints to help solve ubuntu build problems
AVMatthews Nov 19, 2024
5486700
More debug prints
AVMatthews Nov 19, 2024
df6122c
change input files only include max int supported by jq
AVMatthews Nov 19, 2024
e1b636d
Remove debug prints
AVMatthews Nov 19, 2024
7a21d85
remove magic values, add addtional file cleanup, test on structurized…
AVMatthews Nov 20, 2024
508c39b
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 20, 2024
a039bf9
Fix compilation error form merging msot recent commit form main, remo…
AVMatthews Nov 20, 2024
ad18256
brace init, move variables, doc string, command construction
AVMatthews Nov 22, 2024
ea375b2
remove std::format due to lack of support
AVMatthews Nov 22, 2024
90c6cef
remove format include
AVMatthews Nov 22, 2024
1a16528
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 22, 2024
01ca9ca
fmt:format for command string building
AVMatthews Nov 22, 2024
03d6510
restructure into compress, extract, compare
AVMatthews Nov 29, 2024
c362b22
small declaration/assignment changes
AVMatthews Nov 29, 2024
d013e46
remove no lint line and change type to auto
AVMatthews Nov 30, 2024
1d187b3
update jsonl test file
AVMatthews Nov 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s)
add_subdirectory(src/reducer)

set(SOURCE_FILES_clp_s_unitTest
src/clp_s/ArchiveReader.cpp
src/clp_s/ArchiveReader.hpp
src/clp_s/ArchiveWriter.cpp
src/clp_s/ArchiveWriter.hpp
src/clp_s/ColumnReader.cpp
src/clp_s/ColumnReader.hpp
src/clp_s/ColumnWriter.cpp
src/clp_s/ColumnWriter.hpp
src/clp_s/DictionaryEntry.cpp
src/clp_s/DictionaryEntry.hpp
src/clp_s/DictionaryWriter.cpp
src/clp_s/DictionaryWriter.hpp
src/clp_s/FileReader.cpp
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
src/clp_s/JsonFileIterator.hpp
src/clp_s/JsonParser.cpp
src/clp_s/JsonParser.hpp
src/clp_s/PackedStreamReader.cpp
src/clp_s/PackedStreamReader.hpp
src/clp_s/ReaderUtils.cpp
src/clp_s/ReaderUtils.hpp
src/clp_s/Schema.cpp
src/clp_s/Schema.hpp
src/clp_s/SchemaMap.cpp
src/clp_s/SchemaMap.hpp
src/clp_s/SchemaReader.cpp
src/clp_s/SchemaReader.hpp
src/clp_s/SchemaTree.cpp
src/clp_s/SchemaTree.hpp
src/clp_s/SchemaWriter.cpp
src/clp_s/SchemaWriter.hpp
src/clp_s/search/AndExpr.cpp
src/clp_s/search/AndExpr.hpp
src/clp_s/search/BooleanLiteral.cpp
Expand Down Expand Up @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/search/StringLiteral.hpp
src/clp_s/search/Transformation.hpp
src/clp_s/search/Value.hpp
src/clp_s/SchemaTree.hpp
src/clp_s/TimestampDictionaryReader.cpp
src/clp_s/TimestampDictionaryReader.hpp
src/clp_s/TimestampDictionaryWriter.cpp
src/clp_s/TimestampDictionaryWriter.hpp
src/clp_s/TimestampEntry.cpp
src/clp_s/TimestampEntry.hpp
src/clp_s/TimestampPattern.cpp
src/clp_s/TimestampPattern.hpp
src/clp_s/Utils.cpp
src/clp_s/Utils.hpp
src/clp_s/VariableDecoder.cpp
src/clp_s/VariableDecoder.hpp
src/clp_s/VariableEncoder.cpp
src/clp_s/VariableEncoder.hpp
src/clp_s/ZstdCompressor.cpp
src/clp_s/ZstdCompressor.hpp
src/clp_s/ZstdDecompressor.cpp
src/clp_s/ZstdDecompressor.hpp
)

set(SOURCE_FILES_unitTest
Expand Down Expand Up @@ -501,6 +550,7 @@ set(SOURCE_FILES_unitTest
tests/test-BufferedFileReader.cpp
tests/test-EncodedVariableInterpreter.cpp
tests/test-encoding_methods.cpp
tests/test-end_to_end.cpp
tests/test-ffi_IrUnitHandlerInterface.cpp
tests/test-ffi_KeyValuePairLogEvent.cpp
tests/test-ffi_SchemaTree.cpp
Expand Down Expand Up @@ -542,6 +592,8 @@ target_link_libraries(unitTest
log_surgeon::log_surgeon
LibArchive::LibArchive
MariaDBClient::MariaDBClient
${MONGOCXX_TARGET}
simdjson
spdlog::spdlog
OpenSSL::Crypto
${sqlite_LIBRARY_DEPENDENCIES}
Expand Down
112 changes: 112 additions & 0 deletions components/core/tests/test-end_to_end.cpp
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include <cstdlib>
#include <filesystem>
#include <string>
#include <string_view>
#include <vector>
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

#include <Catch2/single_include/catch2/catch.hpp>
#include <msgpack.hpp>
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

#include "../src/clp/BufferReader.hpp"
#include "../src/clp/ffi/ir_stream/decoding_methods.hpp"
#include "../src/clp/ffi/ir_stream/Deserializer.hpp"
#include "../src/clp/ffi/ir_stream/Serializer.hpp"
#include "../src/clp/ffi/KeyValuePairLogEvent.hpp"
#include "../src/clp/ir/types.hpp"
#include "../src/clp/time_types.hpp"
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
#include "../src/clp_s/JsonConstructor.hpp"
#include "../src/clp_s/JsonParser.hpp"

using clp::BufferReader;
using clp::ffi::ir_stream::Deserializer;
using clp::ffi::ir_stream::IRErrorCode;
using clp::ffi::ir_stream::Serializer;
using clp::ffi::KeyValuePairLogEvent;
using clp::ir::eight_byte_encoded_variable_t;
using clp::ir::four_byte_encoded_variable_t;
using clp::size_checked_pointer_cast;
using clp::UtcOffset;
using std::string;
using std::string_view;
using std::vector;
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

auto const cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024;
auto const cDefaultMaxDocumentSize = 512ULL * 1024 * 1024;
auto const cDefaultMinTableSize = 1ULL * 1024 * 1024;
auto const cDeaultCompressionLevel = 3;
auto const cDefaultPrintArchiveStats = false;
auto const cDefaultStructurizeArrays = false;
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

namespace {
auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
return std::filesystem::path{"test_log_files"} / "test_no_floats_sorted.json";
}

auto get_test_input_local_path() -> std::string {
std::filesystem::path const current_file_path{__FILE__};
auto const tests_dir{current_file_path.parent_path()};
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
}
} // namespace

// NOLINTNEXTLINE(readability-function-cognitive-complexity)
TEMPLATE_TEST_CASE(
"clp-s_compression_and_extraction_no_floats",
"[clp-s][end-to-end]",
four_byte_encoded_variable_t,
eight_byte_encoded_variable_t
) {
std::filesystem::remove_all("test-end-to-end-archive");
std::filesystem::remove_all("test-end-to-end-out");
std::filesystem::remove("test-end-to-end_sorted.json");
std::filesystem::remove("diff_out.txt");

std::filesystem::create_directory("test-end-to-end-archive");
REQUIRE(std::filesystem::is_directory("test-end-to-end-archive"));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
clp_s::JsonParserOption parser_option{};
parser_option.file_paths.push_back(get_test_input_local_path());
parser_option.archives_dir = "test-end-to-end-archive";
parser_option.target_encoded_size = cDefaultTargetEncodedSize;
parser_option.max_document_size = cDefaultMaxDocumentSize;
parser_option.min_table_size = cDefaultMinTableSize;
parser_option.compression_level = cDeaultCompressionLevel;
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
parser_option.print_archive_stats = cDefaultPrintArchiveStats;
parser_option.structurize_arrays = cDefaultStructurizeArrays;

clp_s::JsonParser parser(parser_option);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(parser.parse());
parser.store();

REQUIRE(false == std::filesystem::is_empty("test-end-to-end-archive"));

std::filesystem::create_directory("test-end-to-end-out");
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(std::filesystem::is_directory("test-end-to-end-out"));

clp_s::JsonConstructorOption constructor_option{};
constructor_option.output_dir = "test-end-to-end-out";
constructor_option.ordered = false;
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
constructor_option.archives_dir = parser_option.archives_dir;
constructor_option.ordered_chunk_size = 0;
for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) {
if (false == entry.is_directory()) {
// Skip non-directories
continue;
}

constructor_option.archive_id = entry.path().filename();
clp_s::JsonConstructor constructor(constructor_option);
constructor.store();
}

REQUIRE(std::filesystem::exists("test-end-to-end-out/original"));

std::system("jq -S -c '.' test-end-to-end-out/original | sort > test-end-to-end_sorted.json");

REQUIRE(false == std::filesystem::is_empty("test-end-to-end_sorted.json"));

std::string const command = "diff -u test-end-to-end_sorted.json " + get_test_input_local_path()
+ " > diff_out.txt";
std::system(command.c_str());
REQUIRE(std::filesystem::is_empty("diff_out.txt"));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 4 additions & 0 deletions components/core/tests/test_log_files/test_sorted.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
Loading