Skip to content

Commit

Permalink
Merge branch 'master' into rapidjson-include
Browse files Browse the repository at this point in the history
  • Loading branch information
jackgerrits authored Feb 15, 2024
2 parents f605b1a + 7c2963e commit 4a51778
Show file tree
Hide file tree
Showing 46 changed files with 2,925 additions and 313 deletions.
16 changes: 16 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(ctest) Launch",
"type": "cppdbg",
"cwd": "${workspaceFolder}",
"request": "launch",
"program": "${cmake.testProgram}",
"args": [ "${cmake.testArgs}" ]
}
]
}
14 changes: 8 additions & 6 deletions python/docs/source/tutorials/DFtoVW_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -802,15 +802,17 @@
"\n",
"# Adding columns for easier visualization\n",
"weights_df[\"feature_name\"] = weights_df.apply(\n",
" lambda row: row.vw_feature_name.split(\"=\")[0]\n",
" if row.is_cat\n",
" else row.vw_feature_name,\n",
" lambda row: (\n",
" row.vw_feature_name.split(\"=\")[0] if row.is_cat else row.vw_feature_name\n",
" ),\n",
" axis=1,\n",
")\n",
"weights_df[\"feature_value\"] = weights_df.apply(\n",
" lambda row: row.vw_feature_name.split(\"=\")[1].zfill(2)\n",
" if row.is_cat\n",
" else row.vw_feature_name,\n",
" lambda row: (\n",
" row.vw_feature_name.split(\"=\")[1].zfill(2)\n",
" if row.is_cat\n",
" else row.vw_feature_name\n",
" ),\n",
" axis=1,\n",
")\n",
"weights_df.sort_values([\"feature_name\", \"feature_value\"], inplace=True)"
Expand Down
3 changes: 1 addition & 2 deletions python/tests/confidence_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,5 @@ def lblogwealth(self, *, t, sumXt, v, eta, s, alpha):

return max(
0,
(sumXt - sqrt(gamma1**2 * ll * v + gamma2**2 * ll**2) - gamma2 * ll)
/ t,
(sumXt - sqrt(gamma1**2 * ll * v + gamma2**2 * ll**2) - gamma2 * ll) / t,
)
32 changes: 17 additions & 15 deletions python/tests/crminustwo.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,21 +440,23 @@ def intervaldiff(
candidates.append(
(
gstar,
None
if isclose(kappa, 0)
else {
"kappastar": kappa,
"betastar": beta,
"gammastar": gamma,
"taustar": tau,
"ufake": ufake,
"wfake": wfake,
"rfake": rex,
"qfunc": lambda c, u, w, r, k=kappa, g=gamma, b=beta, t=tau, s=sign, num=n: -c
* (b + g * u + t * w + s * (u - w) * r)
/ ((num + 1) * k),
"mle": mle,
},
(
None
if isclose(kappa, 0)
else {
"kappastar": kappa,
"betastar": beta,
"gammastar": gamma,
"taustar": tau,
"ufake": ufake,
"wfake": wfake,
"rfake": rex,
"qfunc": lambda c, u, w, r, k=kappa, g=gamma, b=beta, t=tau, s=sign, num=n: -c
* (b + g * u + t * w + s * (u - w) * r)
/ ((num + 1) * k),
"mle": mle,
}
),
)
)

Expand Down
6 changes: 3 additions & 3 deletions python/vowpalwabbit/pyvw.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,9 @@ def parse(
for ex in str_ex
]
):
str_ex: List[
Example
] = str_ex # pytype: disable=annotation-type-mismatch
str_ex: List[Example] = (
str_ex # pytype: disable=annotation-type-mismatch
)
return str_ex

if not isinstance(str_ex, (list, str)):
Expand Down
16 changes: 10 additions & 6 deletions test/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,21 @@ def _are_same(expected: Any, actual: Any, key: str) -> Tuple[bool, str]:
elif isinstance(expected, (int, bool, str)):
return (
expected == actual,
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}'"
if expected != actual
else "",
(
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}'"
if expected != actual
else ""
),
)
elif isinstance(expected, (float)):
delta = abs(expected - actual)
return (
delta < epsilon,
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}' (using epsilon: '{epsilon}')"
if delta >= epsilon
else "",
(
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}' (using epsilon: '{epsilon}')"
if delta >= epsilon
else ""
),
)
elif isinstance(expected, dict):
expected_keys = set(expected.keys())
Expand Down
1 change: 1 addition & 0 deletions test/save_resume_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Test that the models generated with and without --predict_only_model produce the same predictions when loaded in test_mode.
"""

import sys
import os
import optparse
Expand Down
Binary file modified test/train-sets/0001.fb
Binary file not shown.
Binary file modified test/train-sets/ccb.fb
Binary file not shown.
Binary file modified test/train-sets/cs.fb
Binary file not shown.
Binary file modified test/train-sets/multiclass.fb
Binary file not shown.
Binary file modified test/train-sets/multilabel.fb
Binary file not shown.
Binary file modified test/train-sets/rcv1_cb_eval.fb
Binary file not shown.
Binary file modified test/train-sets/rcv1_raw_cb_small.fb
Binary file not shown.
Binary file modified test/train-sets/wiki256_no_label.fb
Binary file not shown.
91 changes: 69 additions & 22 deletions utl/flatbuffer/vw_to_flat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ void to_flat::create_no_label(VW::example* v, ExampleBuilder& ex_builder)
ex_builder.label = VW::parsers::flatbuffer::Createno_label(_builder, (uint8_t)'\000').Union();
}

flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespace(VW::features::audit_iterator begin,
VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash, bool audit)
// Create namespace when audit is true
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespace_audit(
VW::features::audit_iterator begin, VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash)
{
std::vector<flatbuffers::Offset<VW::parsers::flatbuffer::Feature>> fts;
std::stringstream ss;
ss << index;

Expand All @@ -316,26 +316,61 @@ flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespac
if (find_ns_offset == _share_examples.end())
{
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> namespace_offset;
std::vector<flatbuffers::Offset<flatbuffers::String>> feature_names;
std::vector<float> feature_values;
std::vector<uint64_t> feature_hashes;

// new namespace
if (audit)

std::string ns_name;
for (auto it = begin; it != end; ++it)
{
std::string ns_name;
for (auto it = begin; it != end; ++it)
{
ns_name = it.audit()->ns;
fts.push_back(
VW::parsers::flatbuffer::CreateFeatureDirect(_builder, it.audit()->name.c_str(), it.value(), it.index()));
}
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(_builder, ns_name.c_str(), index, &fts, hash);
if ((it.audit()->ns).c_str() != nullptr) ns_name = it.audit()->ns;

(feature_names).push_back(_builder.CreateString(it.audit()->name.c_str()));
(feature_values).push_back(it.value());
(feature_hashes).push_back(it.index());
}
else
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(
_builder, ns_name.c_str(), index, hash, &feature_names, &feature_values, &feature_hashes);

_share_examples[refid] = namespace_offset;
}

return _share_examples[refid];
}

// Create namespace when audit is false
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespace(
features::const_iterator begin, features::const_iterator end, VW::namespace_index index, uint64_t hash)

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-ubsan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-ubsan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-asan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-asan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]
{
std::stringstream ss;
ss << index;

for (auto it = begin; it != end; ++it) { ss << it.index() << it.value(); }
ss << ":" << hash;

std::string s = ss.str();
uint64_t refid = VW::uniform_hash(s.c_str(), s.size(), 0);
const auto find_ns_offset = _share_examples.find(refid);

if (find_ns_offset == _share_examples.end())
{
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> namespace_offset;
std::vector<float> feature_values;
std::vector<uint64_t> feature_hashes;

for (auto it = begin; it != end; ++it)
{
for (auto it = begin; it != end; ++it)
if (it.value() != 0) // store the feature data only if the value is non zero
{
fts.push_back(VW::parsers::flatbuffer::CreateFeatureDirect(_builder, nullptr, it.value(), it.index()));
(feature_values).push_back(it.value());
(feature_hashes).push_back(it.index());
}
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(_builder, nullptr, index, &fts, hash);
}
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(
_builder, nullptr, index, hash, nullptr, &feature_values, &feature_hashes);

_share_examples[refid] = namespace_offset;
}

Expand Down Expand Up @@ -438,13 +473,25 @@ void to_flat::convert_txt_to_flat(VW::workspace& all)
VW::details::flatten_namespace_extents(ae->feature_space[ns].namespace_extents, ae->feature_space[ns].size());
auto unflattened_with_ranges_that_dont_have_extents = unflatten_namespace_extents_dont_skip(flattened_extents);

for (const auto& extent : unflattened_with_ranges_that_dont_have_extents)
if (all.output_config.audit || all.output_config.hash_inv)
{
for (const auto& extent : unflattened_with_ranges_that_dont_have_extents)
{
// The extent hash for a non-hash-extent will be 0, which is the same as the field no existing to flatbuffers.
auto created_ns = create_namespace_audit(ae->feature_space[ns].audit_begin() + extent.begin_index,
ae->feature_space[ns].audit_begin() + extent.end_index, ns, extent.hash);
namespaces.push_back(created_ns);
}
}
else
{
// The extent hash for a non-hash-extent will be 0, which is the same as the field no existing to flatbuffers.
auto created_ns = create_namespace(ae->feature_space[ns].audit_begin() + extent.begin_index,
ae->feature_space[ns].audit_begin() + extent.end_index, ns, extent.hash,
all.output_config.audit || all.output_config.hash_inv);
namespaces.push_back(created_ns);
for (const auto& extent : unflattened_with_ranges_that_dont_have_extents)
{
// The extent hash for a non-hash-extent will be 0, which is the same as the field no existing to flatbuffers.
auto created_ns = create_namespace(ae->feature_space[ns].cbegin() + extent.begin_index,
ae->feature_space[ns].cbegin() + extent.end_index, ns, extent.hash);
namespaces.push_back(created_ns);
}
}
}
std::string tag(ae->tag.begin(), ae->tag.size());
Expand Down
6 changes: 4 additions & 2 deletions utl/flatbuffer/vw_to_flat.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class to_flat
void write_to_file(bool collection, bool is_multiline, MultiExampleBuilder& multi_ex_builder,
ExampleBuilder& ex_builder, std::ofstream& outfile);

flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> create_namespace(VW::features::audit_iterator begin,
VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash, bool audit);
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> create_namespace(
VW::features::const_iterator begin, VW::features::const_iterator end, VW::namespace_index index, uint64_t hash);
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> create_namespace_audit(
VW::features::audit_iterator begin, VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash);
};
3 changes: 2 additions & 1 deletion vowpalwabbit/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ if(VW_FEAT_CSV)
endif()

if(VW_FEAT_FLATBUFFERS)
target_link_libraries(vw_core PRIVATE vw_fb_parser)
target_link_libraries(vw_core PUBLIC vw_fb_parser)
endif()

# Handle generated header
Expand Down Expand Up @@ -481,6 +481,7 @@ set(vw_core_test_sources
tests/flat_example_test.cc
tests/guard_test.cc
tests/interactions_test.cc
tests/io_alignment_test.cc
tests/loss_functions_test.cc
tests/math_test.cc
tests/merge_header_opts_test.cc
Expand Down
11 changes: 11 additions & 0 deletions vowpalwabbit/core/include/vw/core/api_status.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,14 @@ int report_error(status_builder& sb, const First& first, const Rest&... rest)
return sb << VW::experimental::error_code::code##_s

#endif // RETURN_ERROR_LS

#ifndef RETURN_IF_FAIL
/**
* @brief Error reporting macro to test and return on error
*/
# define RETURN_IF_FAIL(x) \
do { \
int retval__LINE__ = (x); \
if (retval__LINE__ != 0) { return retval__LINE__; } \
} while (0)
#endif
5 changes: 1 addition & 4 deletions vowpalwabbit/core/include/vw/core/array_parameters_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,7 @@ class dense_parameters
dense_parameters(dense_parameters&&) noexcept;

bool not_null();
VW::weight* first()
{
return _begin.get();
} // TODO: Temporary fix for allreduce.
VW::weight* first() { return _begin.get(); } // TODO: Temporary fix for allreduce.

VW::weight* data() { return _begin.get(); }

Expand Down
12 changes: 12 additions & 0 deletions vowpalwabbit/core/include/vw/core/error_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,21 @@ ERROR_CODE_DEFINITION(
3, options_disagree, "Different values specified for two options that are constrained to be the same.")
ERROR_CODE_DEFINITION(4, not_implemented, "Not implemented.")
ERROR_CODE_DEFINITION(5, native_exception, "Native exception: ")
ERROR_CODE_DEFINITION(6, fb_parser_namespace_missing, "Missing Namespace. ")
ERROR_CODE_DEFINITION(7, fb_parser_feature_values_missing, "Missing Feature Values. ")
ERROR_CODE_DEFINITION(8, fb_parser_feature_hashes_names_missing, "Missing Feature Names and Feature Hashes. ")
ERROR_CODE_DEFINITION(9, nothing_to_parse, "No new object to be read from file. ")
ERROR_CODE_DEFINITION(10, fb_parser_unknown_example_type, "Unkown Example type. ")
ERROR_CODE_DEFINITION(11, fb_parser_name_hash_missing, "Missing name and hash field in namespace. ")
ERROR_CODE_DEFINITION(
12, fb_parser_size_mismatch_ft_hashes_ft_values, "Size of feature hashes and feature values do not match. ")
ERROR_CODE_DEFINITION(
13, fb_parser_size_mismatch_ft_names_ft_values, "Size of feature names and feature values do not match. ")
ERROR_CODE_DEFINITION(14, unknown_label_type, "Label type in Flatbuffer not understood. ")

// TODO: This is temporary until we switch to the new error handling mechanism.
ERROR_CODE_DEFINITION(10000, vw_exception, "vw_exception: ")
ERROR_CODE_DEFINITION(20000, internal_error, "BUGBUG: ")

#ifdef ERROR_CODE_DEFINITION_NOOP
#undef ERROR_CODE_DEFINITION
Expand Down
Loading

0 comments on commit 4a51778

Please sign in to comment.