Skip to content

Commit

Permalink
feat: Expose non-io_buf API for fb_parser (#4683)
Browse files Browse the repository at this point in the history
Expose non-io_buf API for fb_parser

This creates an entry-point for the parsing logic operating over a buffer pointer rather than io_buf; previously this was only used for unit testing, but this will, in the future, be used by consumers of the library, e.g. RLClientLib.

Added unit tests for this behaviour found a couple of bugs in the implementation of the FB parser as well as FB parser unit test validation logic:
* Fix logic issues with detecting presence/absence of feature names and hashes in example data, leading to an access violation crash
* Fix issues with properly parsing ExampleCollection buffers, around reporting doneness and advancing when parsing inner MultiExample objects.
* Fix tests to include coverage for incoming flatbuffers with/without names/hashes
* Fix validation logic to avoid crashing when feature names/hashes are missing
  • Loading branch information
lokitoth authored Feb 14, 2024
1 parent db7f024 commit 7c2963e
Show file tree
Hide file tree
Showing 15 changed files with 703 additions and 384 deletions.
2 changes: 1 addition & 1 deletion vowpalwabbit/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ if(VW_FEAT_CSV)
endif()

if(VW_FEAT_FLATBUFFERS)
target_link_libraries(vw_core PRIVATE vw_fb_parser)
target_link_libraries(vw_core PUBLIC vw_fb_parser)
endif()

# Handle generated header
Expand Down
4 changes: 3 additions & 1 deletion vowpalwabbit/fb_parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ set(vw_fb_parser_test_sources
tests/prototype_label.h
tests/prototype_namespace.h

tests/flatbuffer_parser_test.cc
tests/affordance_validation_tests.cc
tests/read_span_tests.cc
tests/flatbuffer_parser_tests.cc
)

message(STATUS "vw_fb_parser_test_sources: ${vw_fb_parser_test_sources}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,37 @@
#pragma once

#include "vw/core/api_status.h"
#include "vw/core/example.h"
#include "vw/core/multi_ex.h"
#include "vw/core/shared_data.h"
#include "vw/core/vw_fwd.h"
#include "vw/fb_parser/generated/example_generated.h"

namespace VW
{

class api_status;

namespace parsers
{
namespace flatbuffer
{
int flatbuffer_to_examples(VW::workspace* all, io_buf& buf, VW::multi_ex& examples);
bool read_span_flatbuffer(
VW::workspace* all, const uint8_t* span, size_t length, example_factory_t example_factory, VW::multi_ex& examples);

class parser
{
public:
parser() = default;
const VW::parsers::flatbuffer::ExampleRoot* data();
int parse_examples(VW::workspace* all, io_buf& buf, VW::multi_ex& examples, uint8_t* buffer_pointer = nullptr,
int parse_examples(VW::workspace* all, io_buf& buf, VW::multi_ex& examples, const uint8_t* buffer_pointer = nullptr,
VW::experimental::api_status* status = nullptr);

private:
size_t _num_example_roots = 0;
const VW::parsers::flatbuffer::ExampleRoot* _data;
uint8_t* _flatbuffer_pointer;
const uint8_t* _flatbuffer_pointer;
flatbuffers::uoffset_t _object_size = 0;
bool _active_collection = false;
uint32_t _example_index = 0;
Expand All @@ -40,7 +45,7 @@ class parser
uint32_t _labeled_action = 0;
uint64_t _c_hash = 0;

int parse(io_buf& buf, uint8_t* buffer_pointer = nullptr, VW::experimental::api_status* status = nullptr);
int parse(io_buf& buf, const uint8_t* buffer_pointer = nullptr, VW::experimental::api_status* status = nullptr);
int process_collection_item(
VW::workspace* all, VW::multi_ex& examples, VW::experimental::api_status* status = nullptr);
int parse_example(VW::workspace* all, example* ae, const Example* eg, VW::experimental::api_status* status = nullptr);
Expand Down
Loading

0 comments on commit 7c2963e

Please sign in to comment.