-
Notifications
You must be signed in to change notification settings - Fork 190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
patch: optimize selects by extracting exclusive branches #205
base: main
Are you sure you want to change the base?
Conversation
6586a49
to
8060195
Compare
@hanickadot have you experimented with creating a custom vtable for the select expression? Eg: something like namespace detail {
template<typename AlphabetType, size_t N, typename T>
constexpr bool overwrite_alphabet(std::array<AlphabetType, N> &table, T atom, uint8_t val) {
bool has_collision = false;
for (size_t i = 0; i < table.size(); i++) {
//check if we have a collision with the existing table
has_collision = (table[i] != 0 && (T::match_char(i)));
table[i] = T::match_char(i) ? val : table[i];
}
return has_collision;
}
template<typename AlphabetType, size_t N, typename... Ts>
constexpr bool overwrite_alphabet(std::array<AlphabetType, N> &table, ctll::list<Ts...> atom, uint8_t val) {
bool has_collision = false;
for (size_t i = 0; i < table.size(); i++) {
//check if we have a collision with the existing table
has_collision = (table[i] != 0 && ((Ts::match_char(i)) || ...));
table[i] = ((Ts::match_char(i)) || ...) ? val : table[i];
}
return has_collision;
}
template<typename AlphabetType, size_t N, typename... Ts>
constexpr auto write_vtable_cases(ctll::list<Ts...> atoms) {
std::array<AlphabetType, N> table{};
int dummy;
bool has_collision = false;
size_t idx = sizeof...(Ts);
//see foonathan's nifty fold expressions
//reverse order of overwrite_alphabet, make sure indexs count down*
(dummy = ... = ((has_collision |= overwrite_alphabet(table, Ts{}, idx--)), 0));
size_t count_nonzero = 0;
for (size_t i = 0; i < table.size(); i++) {
count_nonzero += table[i] != 0;
//now we shift all the indexs over by one and make room for fail state
table[i] = table[i] != 0 ? table[i] - 1 : sizeof...(Ts); //last index is reject state
}
return std::make_tuple(count_nonzero, has_collision, table);
}
template<typename ListAst, typename R, typename Iterator, typename EndIterator>
constexpr auto evaluate_wrapped(const Iterator begin, Iterator current, const EndIterator last, const flags & f, R captures) {
return evaluate(begin, current, last, f, captures, ListAst{});
}
}
// matching select in patterns
template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... TailOptions, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator last, const flags & f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {
if constexpr (sizeof...(TailOptions) > 0 && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
constexpr auto vtable_tuple = detail::write_vtable_cases<uint8_t,256>(ctll::list<decltype(calculate_first(sequence<HeadOptions, Tail...>{})), decltype(calculate_first(sequence<TailOptions, Tail...>{}))...>{});
constexpr auto vtable_cases = std::get<2>(vtable_tuple);
constexpr auto has_collisions = std::get<1>(vtable_tuple);
constexpr std::array<R(*)(const Iterator, Iterator, const EndIterator, const flags&, R), sizeof...(TailOptions) + 2> vtable = {
detail::evaluate_wrapped<sequence<HeadOptions, Tail...>, R, Iterator, EndIterator>,
detail::evaluate_wrapped<sequence<TailOptions, Tail...>, R, Iterator, EndIterator>...,
detail::evaluate_wrapped<sequence<reject>, R, Iterator, EndIterator>
};
uint8_t case_num = current != last ? vtable_cases[*current] : (vtable.size()-1);
if constexpr (has_collisions) {
for (size_t i = case_num; i < vtable.size(); i++) {
if (auto r = vtable[i](begin, current, last, f, captures)) {
return r;
}
}
} else {
return vtable[case_num](begin, current, last, f, captures);
}
return not_matched;
} else {
if (auto r = evaluate(begin, current, last, f, captures, ctll::list<HeadOptions, Tail...>())) {
return r;
} else {
return evaluate(begin, current, last, f, captures, ctll::list<select<TailOptions...>, Tail...>());
}
}
} So far as I can tell, with larger select expresions this cuts down on compile time, msvc performs better when the character table is made static (compile error with clang), but clang appears to do far better. For example in MSVC I can compile static constexpr auto real_lexer_pattern = ctll::fixed_string{
"(\\s++)|(//[^\r\n]*+)|(/[*](?:[^*]++|[*][^\\x2F])+[*]/)|"
"([a-zA-Z_][a-zA-Z0-9]*+)|"
"([0-9]+[eE][\\+\\-]?[0-9]+(?:[fFlL]?))|"
"([0-9]*[.][0-9]+(?:[eE][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"([0-9]+[.][0-9]*(?:[eE][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]+[pP][\\+\\-]?[0-9]+(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]*[.][0-9a-fA-F]+(?:[pP][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]+[.][0-9a-fA-F]*(?:[pP][\\+\\-]?[0-9]+)?(?:[fFlL]?))|"
"(0[xX][0-9a-fA-F]+)|"
"(0[0-7]+)|"
"(0[bB][0-1]+)|"
"([0-9]++)|"
"(\"(?:[^\\\\\"]+|\\\\[\\s\\S])*+\")|"
"('(?:[^\\\\']+|\\\\[\\s\\S])*+')|"
"(!=)|(!)|"
"(#)|($)|"
"(%=)|(%)|"
"(&&=)|(&&)|(&=)|(&)|"
"([(])|([)])|"
"([*]=)|([*])|"
"([+][+])|([+]=)|([+])|"
"[,]|"
"(--)|(-=)|(->)|(-)|"
"([.][.][.])|([.][.])|([.])|"
"(/=)|(/)|"
"(::)|(:=)|(:)|"
"(;)|"
"(<<=)|(<<)|(<=>)|(<=)|(<>)|(<)|"
"(>>=)|(>=)|(>)|"
"(==)|(=)|"
"(>=)|(>>=)|(>>)|(>)|"
"([?])|([@])|(\\[)|"
"(\\\\)|"
"(\\])|"
"(^=)|(^)|"
"([`])|([{])|"
"([|]=)|([|][|]=)|([|][|])|([|])|"
"([}])|"
"(~=)|(~)"
};
std::optional<lex_item> real_lexer(std::string_view v) noexcept {
auto m = ctre::starts_with<real_lexer_pattern>(v);
if (m) {
if (m.get<1>()) {
return lex_item{ type::space, m.view() };
}
else if (m.get<2>()) {
return lex_item{ type::space, m.view() };
}
else if (m.get<3>()) {
return lex_item{ type::space, m.view() };
}
else if (m.get<4>()) {
return lex_item{ type::identifier, m.view() };
}
else if (m.get<5>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<6>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<7>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<8>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<9>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<10>()) {
return lex_item{ type::flt, m.view() };
}
else if (m.get<11>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<12>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<13>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<14>()) {
return lex_item{ type::number, m.view() };
}
else if (m.get<15>()) {
return lex_item{ type::str, m.view() };
}
else if (m.get<16>()) {
return lex_item{ type::chr, m.view() };
} else {
return lex_item{ type::ops, m.view() };
}
}
return std::nullopt;
} in couple of seconds with the vtable type approach. I'm currently upwards of half an hour waiting for the original to compile, not sure if it'll even finish. |
Should supersede #158, actually extracts mutually exclusive paths.
EG: in lexer example, it'll recognize that "([a-z]+)|([0-9]+)" has mutually exclusive paths and will split them apart from one another based on the first character. In theory should reduce runtimes, needs testing.