Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzz #47

Merged
merged 45 commits into from
Jan 31, 2025
Merged

Fuzz #47

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d48cecd
some fuzz fixes
huangminghuang Jan 15, 2025
e1581d8
:art: Committing clang-format changes
huangminghuang Jan 17, 2025
29ed8ec
fix sfvint_parser error handling
huangminghuang Jan 17, 2025
1a2cfd6
:art: Committing clang-format changes
huangminghuang Jan 17, 2025
05acce6
fix invalid inputs
huangminghuang Jan 20, 2025
59b7719
:art: Committing clang-format changes
huangminghuang Jan 20, 2025
dd2de88
upgrade protobuf
huangminghuang Jan 20, 2025
df0715a
some code cleanup
huangminghuang Jan 20, 2025
92a70be
:art: Committing clang-format changes
huangminghuang Jan 20, 2025
01979aa
msvc fix
huangminghuang Jan 20, 2025
9befd65
another msvc fix
huangminghuang Jan 20, 2025
5db099b
avoid implicit signed/unsigned conversion
huangminghuang Jan 20, 2025
449f70e
:art: Committing clang-format changes
huangminghuang Jan 20, 2025
068e57b
more integer conversion fix
huangminghuang Jan 20, 2025
c1ee9b5
:art: Committing clang-format changes
huangminghuang Jan 20, 2025
1967bf2
more integer conversion fixes
huangminghuang Jan 20, 2025
a963dad
clang fix
huangminghuang Jan 20, 2025
4e1e89d
clang-tidy fix
huangminghuang Jan 21, 2025
448679f
:art: Committing clang-format changes
huangminghuang Jan 21, 2025
503940e
some fuzz fix
huangminghuang Jan 22, 2025
310d2ce
:art: Committing clang-format changes
huangminghuang Jan 22, 2025
a4a911f
fix non-owning recursive map
huangminghuang Jan 23, 2025
03b2b06
:art: Committing clang-format changes
huangminghuang Jan 23, 2025
7f22cce
fix equality_comparable_span
huangminghuang Jan 23, 2025
4ca04c8
:art: Committing clang-format changes
huangminghuang Jan 23, 2025
89a8fce
more equality_comparable_span fix
huangminghuang Jan 23, 2025
ce66dd8
test fixes
huangminghuang Jan 25, 2025
76a298a
fix arena_vector
huangminghuang Jan 25, 2025
39859ca
fix packed repeated bool
huangminghuang Jan 25, 2025
eb8d67b
fix varint parse terminate condition
huangminghuang Jan 27, 2025
bcfb911
fix dynamic_serializer
huangminghuang Jan 27, 2025
3f121af
:art: Committing clang-format changes
huangminghuang Jan 27, 2025
00c7031
msvc fix
huangminghuang Jan 28, 2025
73eda24
more msvc fix
huangminghuang Jan 28, 2025
d132a76
more packed bool fix
huangminghuang Jan 29, 2025
6dfec85
:art: Committing clang-format changes
huangminghuang Jan 29, 2025
ba245bc
more invalid input tests
huangminghuang Jan 30, 2025
5d2a965
:art: Committing clang-format changes
huangminghuang Jan 30, 2025
7830794
try clang-18 for coverage
huangminghuang Jan 30, 2025
31cf095
clang-18 fix
huangminghuang Jan 30, 2025
ec99397
macos coverage
huangminghuang Jan 30, 2025
75c90cd
add dynamic_serializer_skip_test
huangminghuang Jan 30, 2025
5e7f24c
:art: Committing clang-format changes
huangminghuang Jan 30, 2025
594dffa
fix zero sized packed repeated
huangminghuang Jan 31, 2025
2bc8df3
:art: Committing clang-format changes
huangminghuang Jan 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
- build_type: Debug
sanitize: ON
- build_type: Coverage
compiler: gcc
compiler: clang-18
protoc: compile
sanitize: OFF

Expand Down Expand Up @@ -89,7 +89,7 @@
ctest --build-config ${{ matrix.build_type }} --output-on-failure

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
if: matrix.build_type == 'Coverage'
with:
token: ${{ secrets.CODECOV_TOKEN }}
Expand Down
15 changes: 11 additions & 4 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
runs-on: macos-latest
strategy:
matrix:
build_type: [Release, Debug, Coverage]
protoc: ["find"]

steps:
Expand All @@ -47,14 +48,20 @@ jobs:
- name: configure
shell: bash
run: |
cmake -S . -B build -G Ninja -DHPP_PROTO_PROTOC=${{ matrix.protoc }} -DCMAKE_BUILD_TYPE=Debug \
cmake -S . -B build -G Ninja -DHPP_PROTO_PROTOC=${{ matrix.protoc }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache

- name: build
run: cmake --build build

- name: test
shell: bash
working-directory: ./build
run: |
cd build
ctest --build-config Debug --output-on-failure
ctest --build-config ${{ matrix.build_type }} --output-on-failure

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
if: matrix.build_type == 'Coverage'
with:
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ if(MSVC)
set(HPP_PROTO_DISABLE_GLAZE ON)
endif()
else()
set(HPP_PROTO_COMPILE_OPTIONS "-Wall" "-Wall" "-Wextra")
set(HPP_PROTO_COMPILE_OPTIONS "-Wall" "-Wall" "-Wextra" "-Werror=sign-conversion")
endif()

if(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_CXX_COMPILER_LAUNCHER STREQUAL "ccache")
Expand Down Expand Up @@ -106,6 +106,9 @@ if(HPP_PROTO_PROTOC_PLUGIN)
if(HPP_PROTO_TESTS)
add_subdirectory(tests)
add_subdirectory(tutorial)
if (HPP_PROTO_ENABLE_SANITIZER AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_subdirectory(fuzz)
endif()
endif()

if(HPP_PROTO_BENCHMARKS)
Expand Down
82 changes: 41 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Compared to Google’s implementation, hpp-proto adopts a minimalistic design th
| CPU | M1 Pro/MK193LL/A | Intel Core i9-11950H @ 2.60GHz |
| Compiler | Apple clang 16.0.0 | gcc 12.3.0 |

Google protobuf version 28.3
Google protobuf version 29.3

### Runtime Performance

Expand Down Expand Up @@ -61,30 +61,30 @@ We measured the runtime performance using the dataset and the benchmarks.proto d
</tr>
<tr>
<td>google CPU time</td>
<td><div align="right">475.0&nbsp;ns</div></td>
<td><div align="right">366.0&nbsp;ns</div></td>
<td><div align="right">382.0&nbsp;ns</div></td>
<td><div align="right">268.0&nbsp;ns</div></td>
<td><div align="right">509.0&nbsp;ns</div></td>
<td><div align="right">426.0&nbsp;ns</div></td>
<td><div align="right">472.0&nbsp;ns</div></td>
<td><div align="right">346.0&nbsp;ns</div></td>
<td><div align="right">372.0&nbsp;ns</div></td>
<td><div align="right">250.0&nbsp;ns</div></td>
<td><div align="right">516.0&nbsp;ns</div></td>
<td><div align="right">398.0&nbsp;ns</div></td>
</tr>
<tr>
<td>hpp_proto CPU time</td>
<td><div align="right">283.0&nbsp;ns</div></td>
<td><div align="right">170.0&nbsp;ns</div></td>
<td><div align="right">81.0&nbsp;ns</div></td>
<td><div align="right">294.0&nbsp;ns</div></td>
<td><div align="right">177.0&nbsp;ns</div></td>
<td><div align="right">72.6&nbsp;ns</div></td>
<td><div align="right">8.38&nbsp;ns</div></td>
<td><div align="right">285.0&nbsp;ns</div></td>
<td><div align="right">182.0&nbsp;ns</div></td>
<td><div align="right">275.0&nbsp;ns</div></td>
<td><div align="right">181.0&nbsp;ns</div></td>
</tr>
<tr>
<td>hpp_proto speedup factor</td>
<td><div align="right">1.67</div></td>
<td><div align="right">2.15</div></td>
<td><div align="right">4.71</div></td>
<td><div align="right">31.98</div></td>
<td><div align="right">1.78</div></td>
<td><div align="right">2.34</div></td>
<td><div align="right">1.61</div></td>
<td><div align="right">1.95</div></td>
<td><div align="right">5.12</div></td>
<td><div align="right">29.83</div></td>
<td><div align="right">1.88</div></td>
<td><div align="right">2.20</div></td>
</tr>
</tbody>
</table>
Expand All @@ -111,30 +111,30 @@ We measured the runtime performance using the dataset and the benchmarks.proto d
</tr>
<tr>
<td>google CPU time</td>
<td><div align="right">250.0&nbsp;ns</div></td>
<td><div align="right">253.0&nbsp;ns</div></td>
<td><div align="right">257.0&nbsp;ns</div></td>
<td><div align="right">114.0&nbsp;ns</div></td>
<td><div align="right">108.0&nbsp;ns</div></td>
<td><div align="right">225.0&nbsp;ns</div></td>
<td><div align="right">229.0&nbsp;ns</div></td>
<td><div align="right">117.0&nbsp;ns</div></td>
<td><div align="right">111.0&nbsp;ns</div></td>
<td><div align="right">220.0&nbsp;ns</div></td>
<td><div align="right">224.0&nbsp;ns</div></td>
</tr>
<tr>
<td>hpp_proto CPU time</td>
<td><div align="right">198.0&nbsp;ns</div></td>
<td><div align="right">142.0&nbsp;ns</div></td>
<td><div align="right">34.6&nbsp;ns</div></td>
<td><div align="right">202.0&nbsp;ns</div></td>
<td><div align="right">144.0&nbsp;ns</div></td>
<td><div align="right">33.6&nbsp;ns</div></td>
<td><div align="right">10.9&nbsp;ns</div></td>
<td><div align="right">146.0&nbsp;ns</div></td>
<td><div align="right">116.0&nbsp;ns</div></td>
<td><div align="right">140.0&nbsp;ns</div></td>
<td><div align="right">115.0&nbsp;ns</div></td>
</tr>
<tr>
<td>hpp_proto speedup factor</td>
<td><div align="right">1.26</div></td>
<td><div align="right">1.81</div></td>
<td><div align="right">3.29</div></td>
<td><div align="right">9.91</div></td>
<td><div align="right">1.54</div></td>
<td><div align="right">1.97</div></td>
<td><div align="right">1.25</div></td>
<td><div align="right">1.78</div></td>
<td><div align="right">3.48</div></td>
<td><div align="right">10.18</div></td>
<td><div align="right">1.57</div></td>
<td><div align="right">1.95</div></td>
</tr>
</tbody>
</table>
Expand All @@ -161,23 +161,23 @@ We compared the code sizes of three equivalent programs: [hpp_proto_decode_encod
</tr>
<tr>
<td> google_decode_encode </td>
<td><div align="right">2624344</div></td>
<td><div align="right">3410088</div></td>
<td><div align="right">2683720</div></td>
<td><div align="right">3467520</div></td>
</tr>
<tr>
<td> google_decode_encode_lite </td>
<td><div align="right">1106408</div></td>
<td><div align="right">1474208</div></td>
<td><div align="right">1128296</div></td>
<td><div align="right">1505200</div></td>
</tr>
<tr>
<td> hpp_proto_decode_encoded </td>
<td><div align="right">121208</div></td>
<td><div align="right">92520</div></td>
<td><div align="right">139608</div></td>
<td><div align="right">100640</div></td>
</tr>
</tbody>
</table>

The comparison highlights a significant reduction in code size when using hpp-proto compared to Google’s Protocol Buffers implementations. On macOS, hpp-proto offers a 21.65x reduction in size compared to google_decode_encode and a 9.13x reduction compared to google_decode_encode_lite. The reduction is even more pronounced on Linux, where hpp-proto reduces the code size by 36.86x compared to google_decode_encode and by 15.93x compared to google_decode_encode_lite.
The comparison highlights a significant reduction in code size when using hpp-proto compared to Google’s Protocol Buffers implementations. On macOS, hpp-proto offers a 19.22x reduction in size compared to google_decode_encode and a 8.08x reduction compared to google_decode_encode_lite. The reduction is even more pronounced on Linux, where hpp-proto reduces the code size by 34.45x compared to google_decode_encode and by 14.96x compared to google_decode_encode_lite.


## Getting Started
Expand Down
17 changes: 16 additions & 1 deletion cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,22 @@
"bigobj",
"libprotoc",
"ASAN",
"libhpp"
"libhpp",
"Wsign",
"Wextra",
"freea",
"binpb",
"ccache",
"fsanitize",
"Werror",
"STREQUAL",
"unittests",
"INLINES",
"PROTOFILES",
"COPYONLY",
"endforeach",
"endfunction",
"ARGN"
],
// flagWords - list of words to be always considered incorrect
// This is useful for offensive words and common spelling errors.
Expand Down
18 changes: 18 additions & 0 deletions fuzz/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

function(add_fuzz_target target source)
add_executable(${target} ${source})
target_link_libraries(${target} PRIVATE hpp_proto::libhpp_proto ${ARGN})
target_compile_options(${target} PRIVATE -fsanitize=fuzzer,address,undefined -fsanitize=undefined -fno-sanitize-recover=all)
target_link_options(${target} PRIVATE -fsanitize=fuzzer,address,undefined)

add_executable(${target}_debug_case ${source} fuzz_case_main.cpp)
target_compile_options(${target}_debug_case PRIVATE -fsanitize=undefined -fno-sanitize-recover=all)
target_link_libraries(${target}_debug_case PRIVATE hpp_proto::libhpp_proto ${ARGN})
endfunction()

add_fuzz_target(fuzz_pb_serializer fuzz_pb_serializer.cpp unittest_proto_lib)
add_fuzz_target(fuzz_dynamic_serializer fuzz_dynamic_serializer.cpp)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/corpus)



Binary file added fuzz/corpus/proto3_unittest.TestAllTypes.bin
Binary file not shown.
Binary file added fuzz/corpus/protobuf_unittest.TestAllTypes.bin
Binary file not shown.
Binary file added fuzz/corpus/protobuf_unittest.TestMap.bin
Binary file not shown.
23 changes: 23 additions & 0 deletions fuzz/fuzz_case_main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include <fstream>
#include <fuzzer/FuzzedDataProvider.h>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);

inline std::string read_file(const char *filename) {
std::ifstream in(filename, std::ios::in | std::ios::binary);
std::string contents;
in.seekg(0, std::ios::end);
contents.resize(in.tellg());
in.seekg(0, std::ios::beg);
in.read(contents.data(), static_cast<std::streamsize>(contents.size()));
return contents;
}

int main(int argc, const char **argv) {
if (argc != 2) {
return 1;
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
std::string data = read_file(argv[1]);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
return LLVMFuzzerTestOneInput(reinterpret_cast<const uint8_t *>(data.data()), data.size());
}
30 changes: 30 additions & 0 deletions fuzz/fuzz_dynamic_serializer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <fstream>
#include <fuzzer/FuzzedDataProvider.h>
#include <hpp_proto/dynamic_serializer.hpp>

using namespace std::string_view_literals;

const std::array messages_names = {"proto3_unittest.TestAllTypes"sv, "proto3_unittest.TestUnpackedTypes"sv,
"protobuf_unittest.TestAllTypes"sv, "protobuf_unittest.TestMap"sv,
"protobuf_unittest.TestPackedTypes"sv, "protobuf_unittest.TestUnpackedTypes"sv};

inline std::string read_file(const std::string &filename) {
std::ifstream in(filename.c_str(), std::ios::in | std::ios::binary);
std::string contents;
in.seekg(0, std::ios::end);
contents.resize(static_cast<std::string::size_type>(in.tellg()));
in.seekg(0, std::ios::beg);
in.read(contents.data(), static_cast<std::streamsize>(contents.size()));
return contents;
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
auto descriptors = read_file("../tests/unittest.desc.binpb");
auto ser = hpp::proto::dynamic_serializer::make(descriptors);

FuzzedDataProvider fdp(data, size);
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index)
auto message_name = messages_names[fdp.ConsumeIntegralInRange<unsigned>(0, messages_names.size() - 1)];
auto status = ser->proto_to_json(message_name, fdp.ConsumeRemainingBytes<char>());
return status.has_value() ? 0 : 1;
}
56 changes: 56 additions & 0 deletions fuzz/fuzz_pb_serializer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#include <fuzzer/FuzzedDataProvider.h>
#include <google/protobuf/map_unittest.pb.hpp>
#include <google/protobuf/unittest.pb.hpp>
#include <google/protobuf/unittest_proto3.pb.hpp>
#include <non_owning/google/protobuf/map_unittest.pb.hpp>
#include <non_owning/google/protobuf/unittest.pb.hpp>
#include <non_owning/google/protobuf/unittest_proto3.pb.hpp>

std::vector<std::vector<char>> split_input(FuzzedDataProvider &provider) {
std::vector<std::vector<char>> result;
while (result.size() < 9) {
auto v = provider.ConsumeBytes<char>(provider.ConsumeIntegralInRange<int>(10, 128));
if (v.empty()) {
break;
}
result.push_back(std::move(v));
};

auto v = provider.ConsumeRemainingBytes<char>();
if (!v.empty())
result.push_back(std::move(v));
return result;
}

using messages_t = std::tuple<proto3_unittest::TestAllTypes, protobuf_unittest::TestAllTypes,
protobuf_unittest::TestMap, non_owning::proto3_unittest::TestAllTypes,
non_owning::protobuf_unittest::TestAllTypes, non_owning::protobuf_unittest::TestMap>;

hpp::proto::status deserialize_data(FuzzedDataProvider &provider, uint32_t choice, std::index_sequence<>) { return {}; }

template <std::size_t FirstIndex, std::size_t... Indices>
hpp::proto::status deserialize_data(FuzzedDataProvider &provider, uint32_t choice,
std::index_sequence<FirstIndex, Indices...>) {
auto message_index = choice % std::tuple_size_v<messages_t>;
auto deserialize_message = [&] {
bool to_split = choice / std::tuple_size_v<messages_t>;
std::pmr::monotonic_buffer_resource mr;
typename std::tuple_element<FirstIndex, messages_t>::type message;
if (to_split) {
return hpp::proto::read_proto(message, split_input(provider), hpp::proto::strictly_alloc_from{mr});
} else {
return hpp::proto::read_proto(message, provider.ConsumeRemainingBytes<char>(),
hpp::proto::strictly_alloc_from{mr});
}
};

return (message_index == FirstIndex) ? deserialize_message()
: deserialize_data(provider, choice, std::index_sequence<Indices...>{});
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
FuzzedDataProvider provider(data, size);
auto choice = provider.ConsumeIntegralInRange<unsigned>(0, std::tuple_size_v<messages_t> * 2 - 1);
auto status = deserialize_data(provider, choice, std::make_index_sequence<std::tuple_size_v<messages_t>>{});
return status.ok() ? 0 : 1;
}
4 changes: 2 additions & 2 deletions include/hpp_proto/duration_codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct duration_codec {
assert(b.size() >= max_encode_size(value));

auto *buf = std::data(b);
auto ix = std::distance(buf, glz::to_chars(buf, value.seconds));
auto ix = static_cast<std::size_t>(std::distance(buf, glz::to_chars(buf, value.seconds)));

if (value.nanos != 0) {
int32_t nanos = std::abs(value.nanos);
Expand All @@ -51,7 +51,7 @@ struct duration_codec {
ix += 8;
}
glz::detail::dump_unchecked<'s'>(b, ix);
return ix;
return static_cast<int64_t>(ix);
}

// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
Expand Down
Loading