-
Notifications
You must be signed in to change notification settings - Fork 1
/
tokenizer.cpp
56 lines (51 loc) · 1.55 KB
/
tokenizer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/* `tokenizer.cpp`
* By: Ivan Rubinson (c) 2016
* Licensed under the Lesser GNU Public License v3.
*/
#include <regex>
#include <algorithm>
#include "Clauparse/tokenizer.h"
namespace ClauParse
{
/* `tokenizeLine(std::string)`
* Creates a container of `Token`s from a single line.
* Everything after a `'#'` is ignored (`#` = comment).
* Empty lines don't create tokens.
* '{' creates a token of type `BLOCK_OPEN`.
* '}' creates a token of type `BLOCK_CLOSE`.
* '=' creates a token of type `EQUALS`.
* Everything else creates a token of type `LABEL`.
* Tokens must not have empty space(s).
*/
std::vector<Token> tokenizeLine(const std::wstring str)
{
std::vector<Token> tokens;
const std::wregex re{ L"\\s*(?:\n|(#[^\n]*)|(\\{)|(\\})|(=)|([^{}=\t\r\n]+))" };
std::for_each(std::wsregex_iterator(str.cbegin(), str.cend(), re), std::wsregex_iterator(), [&](const auto& i) {
if (i[1].length() > 0U) {
tokens.emplace_back(Token::TYPE_COUNT, i[1]);
}
else if (i[2].length() > 0U) {
tokens.emplace_back(Token::BLOCK_OPEN, std::wstring(L"{"));
}
else if (i[3].length() > 0U) {
tokens.emplace_back(Token::BLOCK_CLOSE, std::wstring(L"}"));
}
else if (i[4].length() > 0U) {
tokens.emplace_back(Token::EQUALS, std::wstring(L"="));
}
else if (i[5].length() > 0U) {
tokens.emplace_back(Token::LABEL, i[5]);
}
});
for (std::vector<Token>::iterator it = tokens.begin(); it < tokens.end(); it++)
{
if ((*it).type == Token::TYPE_COUNT)
{
tokens.erase(it, tokens.end());
break;
}
}
return tokens;
}
}