From bdccec5d28308f450b543de0a788abc77296b2cd Mon Sep 17 00:00:00 2001 From: Mark Rucker Date: Tue, 10 Oct 2023 23:31:55 -0400 Subject: [PATCH 1/3] feat: Improved runtime performance of EMT when using the not_self_consistent_rank flag. --- .../core/src/reductions/eigen_memory_tree.cc | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc b/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc index 37c856644fa..3db524b2d85 100644 --- a/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc +++ b/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc @@ -489,15 +489,7 @@ void tree_bound(emt_tree& b, emt_example* ec) } } -void scorer_features(const emt_feats& f1, VW::features& out) -{ - for (auto p : f1) - { - if (p.second != 0) { out.push_back(p.second, p.first); } - } -} - -void scorer_features(const emt_feats& f1, const emt_feats& f2, VW::features& out) +void scorer_features_sub(const emt_feats& f1, const emt_feats& f2, VW::features& out) { auto iter1 = f1.begin(); auto iter2 = f2.begin(); @@ -535,15 +527,37 @@ void scorer_features(const emt_feats& f1, const emt_feats& f2, VW::features& out } } +void scorer_features_mul(const emt_feats& f1, const emt_feats& f2, VW::features& out) +{ + auto iter1 = f1.begin(); + auto iter2 = f2.begin(); + + while (iter1 != f1.end() && iter2 != f2.end()) + { + if (iter1->first < iter2->first) + { + iter1++; + } + else if (iter2->first < iter1->first) + { + iter2++; + } + else + { + out.push_back(std::abs(iter1->second - iter2->second), iter1->first); + iter1++; + iter2++; + } + } +} + void scorer_example(emt_tree& b, const emt_example& ex1, const emt_example& ex2) { VW::example& out = *b.ex; static constexpr VW::namespace_index X_NS = 'x'; - static constexpr VW::namespace_index Z_NS = 'z'; out.feature_space[X_NS].clear(); - out.feature_space[Z_NS].clear(); if (b.scorer_type == emt_scorer_type::SELF_CONSISTENT_RANK) { @@ -552,7 +566,7 @@ void scorer_example(emt_tree& b, const emt_example& ex1, const emt_example& ex2) out.interactions->clear(); - scorer_features(ex1.full, ex2.full, out.feature_space[X_NS]); + scorer_features_sub(ex1.full, ex2.full, out.feature_space[X_NS]); out.total_sum_feat_sq = out.feature_space[X_NS].sum_feat_sq; out.num_features = out.feature_space[X_NS].size(); @@ -565,26 +579,13 @@ void scorer_example(emt_tree& b, const emt_example& ex1, const emt_example& ex2) { out.indices.clear(); out.indices.push_back(X_NS); - out.indices.push_back(Z_NS); out.interactions->clear(); - out.interactions->push_back({X_NS, Z_NS}); - - b.all->feature_tweaks_config.ignore_some_linear = true; - b.all->feature_tweaks_config.ignore_linear[X_NS] = true; - b.all->feature_tweaks_config.ignore_linear[Z_NS] = true; - scorer_features(ex1.full, out.feature_space[X_NS]); - scorer_features(ex2.full, out.feature_space[Z_NS]); + scorer_features_mul(ex1.full, ex2.full, out.feature_space[X_NS]); - // when we receive ex1 and ex2 their features are indexed on top of eachother. In order - // to make sure VW recognizes the features from the two examples as separate features - // we apply a map of multiplying by 2 and then offseting by 1 on the second example. - for (auto& j : out.feature_space[X_NS].indices) { j = j * 2; } - for (auto& j : out.feature_space[Z_NS].indices) { j = j * 2 + 1; } - - out.total_sum_feat_sq = out.feature_space[X_NS].sum_feat_sq + out.feature_space[Z_NS].sum_feat_sq; - out.num_features = out.feature_space[X_NS].size() + out.feature_space[Z_NS].size(); + out.total_sum_feat_sq = out.feature_space[X_NS].sum_feat_sq; + out.num_features = out.feature_space[X_NS].size(); auto initial = emt_initial(b.initial_type, ex1.full, ex2.full); out.ex_reduction_features.get().initial = initial; From 86ddba6d47b27f2aed9d332f410df81c9ca4815c Mon Sep 17 00:00:00 2001 From: Mark Rucker Date: Wed, 11 Oct 2023 12:35:38 -0400 Subject: [PATCH 2/3] feat: Improved runtime performance of EMT when using the not_self_consistent_rank flag. --- vowpalwabbit/core/src/reductions/eigen_memory_tree.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc b/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc index 3db524b2d85..62c2a6dcd59 100644 --- a/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc +++ b/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc @@ -544,7 +544,7 @@ void scorer_features_mul(const emt_feats& f1, const emt_feats& f2, VW::features& } else { - out.push_back(std::abs(iter1->second - iter2->second), iter1->first); + out.push_back(iter1->second*iter2->second, iter1->first); iter1++; iter2++; } From b4ed911c93af20fd2ff562d39d74fc165f5963d3 Mon Sep 17 00:00:00 2001 From: Mark Rucker Date: Sat, 14 Oct 2023 14:41:36 -0400 Subject: [PATCH 3/3] Fixed code formatting. --- .../core/src/reductions/eigen_memory_tree.cc | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc b/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc index 62c2a6dcd59..40f6bfcfefa 100644 --- a/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc +++ b/vowpalwabbit/core/src/reductions/eigen_memory_tree.cc @@ -534,17 +534,11 @@ void scorer_features_mul(const emt_feats& f1, const emt_feats& f2, VW::features& while (iter1 != f1.end() && iter2 != f2.end()) { - if (iter1->first < iter2->first) - { - iter1++; - } - else if (iter2->first < iter1->first) - { - iter2++; - } + if (iter1->first < iter2->first) { iter1++; } + else if (iter2->first < iter1->first) { iter2++; } else { - out.push_back(iter1->second*iter2->second, iter1->first); + out.push_back(iter1->second * iter2->second, iter1->first); iter1++; iter2++; }