Skip to content

Commit

Permalink
feat: support for nested groups and quantifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
Demali-876 committed Nov 3, 2024
2 parents fdb77ad + af79c13 commit f4200ae
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 38 deletions.
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# [0.1.0](https://github.com/Demali-876/motoko_regex_engine/compare/v0.0.0...v0.1.0) (2024-10-29)


### Bug Fixes

* setup semantic release workflow ([823d429](https://github.com/Demali-876/motoko_regex_engine/commit/823d429f549ccde9812db741c5d22126282c9c29))
* setup semantic release workflow ([a7addf2](https://github.com/Demali-876/motoko_regex_engine/commit/a7addf26dd204370a4afe77208b0ec5913163d40))
* setup semantic release workflow ([8dffe34](https://github.com/Demali-876/motoko_regex_engine/commit/8dffe34f9726fe79baa14b6f5a20f96570ba5f91))


### Features

* add semantic release configuration ([9cbf0ab](https://github.com/Demali-876/motoko_regex_engine/commit/9cbf0abe8f6a944019bb36c504ebe90a319e0953))
* **engine:** change log automation ([bf83e73](https://github.com/Demali-876/motoko_regex_engine/commit/bf83e73b54c7b46c1810e6ace3a1e2997f6a3aa3))
* **parser:** add nested quantifier support ([f8055a6](https://github.com/Demali-876/motoko_regex_engine/commit/f8055a6f5a90cfb3bc56a28ba6a3f6f03826b34e))

# [0.0.0] Unreleased - 10-03-2024

## 🚀 New Features
Expand Down
61 changes: 23 additions & 38 deletions src/motoko_regex/Lexer.mo
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,30 @@ import Array "mo:base/Array";
import Int "mo:base/Int";
import Result "mo:base/Result";
import Buffer "mo:base/Buffer";
import Debug "mo:base/Debug";
import Types "Types";
import Extensions "Extensions";
import Cursor "Cursor";

module {
type Token = Types.Token;
type LexerError = Types.RegexError;
public type LexerError = Types.RegexError;
type CharacterClass = Types.CharacterClass;
public class Lexer(input: Text) {
let cursor = Cursor.Cursor(input);
let tokenBuffer = Buffer.Buffer<Token>(16);

public func tokenize(): [Token] {
public func tokenize(): Result.Result<[Token], LexerError> {
while (cursor.hasNext()) {
switch (nextToken()) {
case (#ok(token)) { tokenBuffer.add(token) };
case (#err(error)) { Debug.trap(Extensions.errorToText(error)) };
case (#ok(token)) {
tokenBuffer.add(token);
};
case (#err(error)) {
return #err(error);
};
};
};
Buffer.toArray(tokenBuffer)
#ok(Buffer.toArray(tokenBuffer))
};

private func nextToken(): Result.Result<Token, LexerError> {
Expand All @@ -44,7 +47,7 @@ module {
case (#err(error)) { return #err(error) };
};
};
case '[' {
case '[' {
switch (tokenizeCharacterClass()) {
case (#ok(token)) {
let quantifiedToken = checkAndApplyQuantifier(token);
Expand Down Expand Up @@ -234,7 +237,7 @@ module {
};
};

if (not cursor.hasNext() or cursor.current() != ']') {
if (not cursor.hasNext() and cursor.current() != ']') {
return #err(#GenericError("Unclosed character class at position " # Nat.toText(cursor.getPos())));
};

Expand Down Expand Up @@ -328,42 +331,24 @@ module {

private func tokenizeSubExpression(): Result.Result<Buffer.Buffer<Token>, LexerError> {
var subTokens = Buffer.Buffer<Token>(16);
var depth = 1;

while (cursor.hasNext()) {
if (cursor.current() == ')') {
return #ok(subTokens);
};

while (cursor.hasNext() and depth > 0) {
switch (cursor.current()) {
case '(' {
switch (tokenizeGroup()) {
case (#ok(token)) {
subTokens.add(token);
};
case (#err(error)) { return #err(error) };
};
};
case ')' {
depth -= 1;
if (depth == 0) {
return #ok(subTokens);
} else {
return #ok(subTokens);
};
switch (nextToken()) {
case (#ok(token)) {
subTokens.add(token);
};
case _ {
switch (nextToken()) {
case (#ok(token)) { subTokens.add(token) };
case (#err(error)) { return #err(error) };
};
case (#err(error)) {
return #err(error);
};
};
};

if (depth > 0) {
return #err(#GenericError("Unclosed group at position " # Nat.toText(cursor.getPos())));
};

#ok(subTokens)
};

#err(#GenericError("Unclosed group at position " # Nat.toText(cursor.getPos())))
};
private func tokenizeEscapedChar(): Result.Result<Token, LexerError> {
cursor.inc();
switch (cursor.current()) {
Expand Down

0 comments on commit f4200ae

Please sign in to comment.