From 786d76bde98f5ac9387bab2157ad158f99e4b6f2 Mon Sep 17 00:00:00 2001 From: Demali-876 <90882773+Demali-876@users.noreply.github.com> Date: Sun, 3 Nov 2024 12:32:14 -0500 Subject: [PATCH] refactor!: restructure repository and update compiler version - Removed the 'motoko_regex' folder and moved all files into 'src' for easier importing through mops. - Renamed 'RegexText.mo' to 'lib.test.mo' and 'Regex.mo' to 'lib.mo'. - Updated 'dfx.json' to use 'src/motoko_regex/test/lib.test.mo'. - Updated import paths in 'lib.test.mo'. - Reverted the compiler to an older version, causing potential compatibility issues. BREAKING CHANGE: The repository structure and file names have changed. Additionally, the compiler has been reverted to an older version. --- dfx.json | 2 +- src/Compiler.mo | 362 ++++++++++++++++++ src/{motoko_regex => }/Cursor.mo | 0 src/{motoko_regex => }/Extensions.mo | 0 src/{motoko_regex => }/Lexer.mo | 0 src/{motoko_regex => }/Matcher.mo | 0 src/{motoko_regex => }/Optimizer.mo | 0 src/{motoko_regex => }/Parser.mo | 0 src/{motoko_regex => }/Types.mo | 0 src/{motoko_regex/Regex.mo => lib.mo} | 0 src/motoko_regex/Compiler.mo | 326 ---------------- src/motoko_regex/Tests/compiler.test.mo | 0 src/motoko_regex/Tests/lexer.test.mo | 0 src/motoko_regex/Tests/parser.test.mo | 0 .../RegexTest.mo => test/lib.test.mo} | 11 +- src/{motoko_regex => }/teststrings.md | 0 16 files changed, 368 insertions(+), 333 deletions(-) create mode 100644 src/Compiler.mo rename src/{motoko_regex => }/Cursor.mo (100%) rename src/{motoko_regex => }/Extensions.mo (100%) rename src/{motoko_regex => }/Lexer.mo (100%) rename src/{motoko_regex => }/Matcher.mo (100%) rename src/{motoko_regex => }/Optimizer.mo (100%) rename src/{motoko_regex => }/Parser.mo (100%) rename src/{motoko_regex => }/Types.mo (100%) rename src/{motoko_regex/Regex.mo => lib.mo} (100%) delete mode 100644 src/motoko_regex/Compiler.mo delete mode 100644 src/motoko_regex/Tests/compiler.test.mo delete mode 100644 src/motoko_regex/Tests/lexer.test.mo delete mode 100644 src/motoko_regex/Tests/parser.test.mo rename src/{motoko_regex/RegexTest.mo => test/lib.test.mo} (91%) rename src/{motoko_regex => }/teststrings.md (100%) diff --git a/dfx.json b/dfx.json index f85c107..cc97bdf 100644 --- a/dfx.json +++ b/dfx.json @@ -1,7 +1,7 @@ { "canisters": { "motoko_regex": { - "main": "src/motoko_regex/Regex.mo", + "main": "src/motoko_regex/test/lib.test.mo", "type": "motoko" } }, diff --git a/src/Compiler.mo b/src/Compiler.mo new file mode 100644 index 0000000..4196016 --- /dev/null +++ b/src/Compiler.mo @@ -0,0 +1,362 @@ +import Types "Types"; +import Buffer "mo:base/Buffer"; +import Order "mo:base/Order"; +import Iter "mo:base/Iter"; +import Char "mo:base/Char"; +import Array "mo:base/Array"; +import Extensions "Extensions"; +import Optimizer "Optimizer"; + +module { /* + public class Compiler() { + type State = Types.State; + type NFA = Types.CompiledRegex; + type Transition = Types.Transition; + type TransitionTable = Types.TransitionTable; + + public func compile(ast: Types.ASTNode): NFA { + let startState: State = 0; + let (transitionTable, acceptStates) = buildNFA(ast, startState); + { + transitions = transitionTable; + startState = startState; + acceptStates = acceptStates; + } + }; + + public func buildNFA(ast: Types.ASTNode, startState: State): (TransitionTable, [State]) { + switch (ast) { + + case (#Character(char)) { + let acceptState: State = startState + 1; + let transition: Transition = #Char(char); + let transitionTable: TransitionTable = [(startState, transition, acceptState)]; + (transitionTable, [acceptState]); + }; + + case (#Concatenation(subExprs)) { + var currentStartState: State = startState; + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(subExprs.size()); + var acceptStates: [State] = []; + + for (subExpr in subExprs.vals()) { + let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, currentStartState); + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + currentStartState := subAcceptStates[0]; + acceptStates := subAcceptStates; + }; + + (Buffer.toArray(transitionBuffer), acceptStates); + }; + + case (#Alternation(subExprs)) { + let newStartState: State = startState; + let newAcceptState: State = newStartState + subExprs.size() + 1; + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(subExprs.size() * 2); + var acceptStates: [State] = [newAcceptState]; + + for (subExpr in subExprs.vals()) { + let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, newStartState + 1); + transitionBuffer.add((newStartState, #Epsilon, subTransitionTable[0].0)); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, newAcceptState)); + }; + }; + + (Buffer.toArray(transitionBuffer), acceptStates); + }; + + case (#Quantifier { subExpr; min; max; mode }) { + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(10); + let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, startState + 1); + let quantifierStartState: State = startState; + let quantifierAcceptState: State = startState + subTransitionTable.size() + 2; + + // Assign default max value (100) if max is null + let maxVal = switch (max) { + case (null) 100; + case (?value) value; + }; + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + if (min == 0 and max == null) { + switch (mode) { + case (#Greedy) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); // Exit loop + transitionBuffer.add((acceptState, #Epsilon, quantifierStartState)); // Loop back + }; + }; + case (#Lazy) { + transitionBuffer.add((quantifierStartState, #Epsilon, quantifierAcceptState)); // Match 0 times first + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); // Try to match sub-expression + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierStartState)); // Loop back for more matches + }; + }; + case (#Possessive) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); // Exit loop, no backtracking + }; + }; + }; + } else if (min == 1 and max == null) { + switch (mode) { + case (#Greedy) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); + transitionBuffer.add((acceptState, #Epsilon, quantifierStartState)); + }; + }; + case (#Lazy) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + transitionBuffer.add((quantifierStartState, #Epsilon, quantifierAcceptState)); // Try to match 0 times + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierStartState)); + }; + }; + case (#Possessive) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); + }; + }; + }; + } else if (min == 0 and max == ?1) { + switch (mode) { + case (#Greedy) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); + }; + }; + case (#Lazy) { + transitionBuffer.add((quantifierStartState, #Epsilon, quantifierAcceptState)); // Match 0 times first + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); // Try to match 1 time + }; + case (#Possessive) { + transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); // Exit, no backtracking + }; + }; + }; + } else if (min > 0 and max != null) { + var currentStartState = quantifierStartState; + for (i in Iter.range(0, min - 1)) { + let (subTrans, subAcc) = buildNFA(subExpr, currentStartState + 1); + for ((fromState, transition, toState) in subTrans.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + currentStartState := subAcc[0]; + }; + + for (i in Iter.range(min, maxVal - 1)) { + let (subTrans, subAcc) = buildNFA(subExpr, currentStartState + 1); + for ((fromState, transition, toState) in subTrans.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + currentStartState := subAcc[0]; + }; + }; + + (Buffer.toArray(transitionBuffer), [quantifierAcceptState]); + }; + + case (#Group { subExpr; captureIndex; modifier }) { + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(10); + let groupStartState: State = startState; + let groupEndState: State = groupStartState + 1; + + switch (modifier) { + case null { + let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, groupStartState + 1); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, groupEndState)); + }; + + if (captureIndex != null) { + transitionBuffer.add((groupStartState, #Group {startState = groupStartState; endState = groupEndState; captureIndex}, groupEndState)); + }; + + (Buffer.toArray(transitionBuffer), [groupEndState]); + }; + + case (?#NonCapturing) { + let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, groupStartState + 1); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + for (acceptState in subAcceptStates.vals()) { + transitionBuffer.add((acceptState, #Epsilon, groupEndState)); + }; + + (Buffer.toArray(transitionBuffer), [groupEndState]); + }; + + case (?#PositiveLookahead) { + let (subTransitionTable, _) = buildNFA(subExpr, groupStartState); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + (Buffer.toArray(transitionBuffer), [groupStartState]); + }; + + case (?#NegativeLookahead) { + let (subTransitionTable, _) = buildNFA(subExpr, groupStartState); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + (Buffer.toArray(transitionBuffer), [groupStartState]); // Group ends without advancing + }; + + case (?#PositiveLookbehind) { + let (subTransitionTable, _) = buildNFA(subExpr, groupStartState); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + (Buffer.toArray(transitionBuffer), [groupStartState]); + }; + + case (?#NegativeLookbehind) { + let (subTransitionTable, _) = buildNFA(subExpr, groupStartState); + + for ((fromState, transition, toState) in subTransitionTable.vals()) { + transitionBuffer.add((fromState, transition, toState)); + }; + + (Buffer.toArray(transitionBuffer), [groupStartState]); + }; + }; + }; + + case (#Metacharacter metacharType) { + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(1); + let acceptState: State = startState + 1; + switch (metacharType) { + case (#Dot) { + transitionBuffer.add((startState, #Any, acceptState)); + }; + case (_) { + let metaRanges = Extensions.metacharToRanges(metacharType); + for (range in metaRanges.vals()) { + transitionBuffer.add((startState, #Range(range.0, range.1), acceptState)); + }; + }; + }; + (Buffer.toArray(transitionBuffer), [acceptState]); + }; + + case (#CharacterClass { isNegated; classes }) { + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(classes.size()); + let acceptState: State = startState + 1; + let ranges = Buffer.Buffer<(Char, Char)>(classes.size()); + + for (charClass in classes.vals()) { + switch (charClass) { + case (#Single(char)) { + ranges.add((char, char)); + }; + case (#Range(from, to)) { + ranges.add((from, to)); + }; + case (#Metacharacter(metaType)) { + let metaRanges = Extensions.metacharToRanges(metaType); + for (range in metaRanges.vals()) { + ranges.add(range); + }; + }; + case (#Quantified(charClass, quantType)) { + ignore buildNFA(#Quantifier { + subExpr = #CharacterClass({isNegated = isNegated; classes = [charClass]}); + min = quantType.min; + max = quantType.max; + mode = quantType.mode; + }, startState); + }; + + }; + }; + + if (isNegated) { + var lastChar: Char = Char.fromNat32(0); + let sortedRanges = Buffer.toArray(ranges); + ignore Array.sort<(Char, Char)>(sortedRanges, func(a: (Char, Char), b: (Char, Char)) : Order.Order { + Char.compare(a.0, b.0) + }); + for (range in sortedRanges.vals()) { + if (Char.toNat32(lastChar) < Char.toNat32(range.0)) { + transitionBuffer.add((startState, #Range(lastChar, Char.fromNat32(Char.toNat32(range.0) - 1)), acceptState)); + }; + lastChar := Char.fromNat32(Char.toNat32(range.1) + 1); + }; + if (Char.toNat32(lastChar) <= 255) { + transitionBuffer.add((startState, #Range(lastChar, Char.fromNat32(255)), acceptState)); + }; + } else { + for (range in ranges.vals()) { + transitionBuffer.add((startState, #Range(range.0, range.1), acceptState)); + }; + }; + + (Buffer.toArray(transitionBuffer), [acceptState]); + }; + + case (#Anchor anchorType) { + let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(1); + let acceptState: State = startState + 1; + switch (anchorType) { + case (#StartOfString) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + case (#EndOfString) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + case (#WordBoundary) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + case (#NonWordBoundary) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + case (#StartOfStringOnly) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + case (#EndOfStringOnly) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + case (#PreviousMatchEnd) { + transitionBuffer.add((startState, #Epsilon, acceptState)); + }; + }; + (Buffer.toArray(transitionBuffer), [acceptState]); + }; + } + }; + };*/ +}; \ No newline at end of file diff --git a/src/motoko_regex/Cursor.mo b/src/Cursor.mo similarity index 100% rename from src/motoko_regex/Cursor.mo rename to src/Cursor.mo diff --git a/src/motoko_regex/Extensions.mo b/src/Extensions.mo similarity index 100% rename from src/motoko_regex/Extensions.mo rename to src/Extensions.mo diff --git a/src/motoko_regex/Lexer.mo b/src/Lexer.mo similarity index 100% rename from src/motoko_regex/Lexer.mo rename to src/Lexer.mo diff --git a/src/motoko_regex/Matcher.mo b/src/Matcher.mo similarity index 100% rename from src/motoko_regex/Matcher.mo rename to src/Matcher.mo diff --git a/src/motoko_regex/Optimizer.mo b/src/Optimizer.mo similarity index 100% rename from src/motoko_regex/Optimizer.mo rename to src/Optimizer.mo diff --git a/src/motoko_regex/Parser.mo b/src/Parser.mo similarity index 100% rename from src/motoko_regex/Parser.mo rename to src/Parser.mo diff --git a/src/motoko_regex/Types.mo b/src/Types.mo similarity index 100% rename from src/motoko_regex/Types.mo rename to src/Types.mo diff --git a/src/motoko_regex/Regex.mo b/src/lib.mo similarity index 100% rename from src/motoko_regex/Regex.mo rename to src/lib.mo diff --git a/src/motoko_regex/Compiler.mo b/src/motoko_regex/Compiler.mo deleted file mode 100644 index 8a7ab29..0000000 --- a/src/motoko_regex/Compiler.mo +++ /dev/null @@ -1,326 +0,0 @@ -import Types "Types"; -import Buffer "mo:base/Buffer"; -import Order "mo:base/Order"; -import Iter "mo:base/Iter"; -import Char "mo:base/Char"; -import Array "mo:base/Array"; -import Int "mo:base/Int"; -import Option "mo:base/Option"; -import Nat "mo:base/Nat"; -import Result "mo:base/Result"; -import Extensions "Extensions"; -import Set "Extensions"; -import Optimizer "Optimizer"; - -module { - public class Compiler() { - - type State = Types.State; - type AST = Types.AST; - type LabeledASTNode = Types.LabeledASTNode; - type ASTNode = Types.ASTNode; - type NFA = Types.CompiledRegex; - type Transition = Types.Transition; - type TransitionTable = Types.TransitionTable; - type CompilerError = Types.RegexError; - - public type NullableFirstLast = { - nullable: Bool; - firstSet: Set.Set; - lastSet: Set.Set; - }; - - private var nextLabel = 0; - func getnextLabel() : Nat { - nextLabel += 1; - nextLabel; - }; - - // Label the AST nodes recursively - public func labelAST(ast: AST): LabeledASTNode { - func labelNode(node: AST): LabeledASTNode { - switch (node) { - case (#Character(c)) { - let nlabel = getnextLabel(); - { nlabel = ?nlabel; node = #Character(c) }; - }; - case (#Range(start, end)) { - let nlabel = getnextLabel(); - { nlabel = ?nlabel; node = #Range(start, end) }; - }; - case (#Metacharacter(m)) { - let nlabel = getnextLabel(); - { nlabel = ?nlabel; node = #Metacharacter(m) }; - }; - case (#Anchor(a)) { - { nlabel = null; node = #Anchor(a) }; - }; - case (#Concatenation(nodes)) { - let labeledChildren = Array.map(nodes, labelNode); - { nlabel = null; node = #Concatenation(labeledChildren) }; - }; - case (#Alternation(nodes)) { - let labeledChildren = Array.map(nodes, labelNode); - { nlabel = null; node = #Alternation(labeledChildren) }; - }; - case (#Quantifier({ subExpr; quantifier })) { - let labeledSubExpr = labelNode(subExpr); - { nlabel = null; node = #Quantifier({ subExpr = labeledSubExpr; quantifier = quantifier }) }; - }; - case (#Group({ subExpr; modifier; captureIndex })) { - let labeledSubExpr = labelNode(subExpr); - { nlabel = null; node = #Group({ subExpr = labeledSubExpr; modifier = modifier; captureIndex = captureIndex }) }; - }; - case (#CharacterClass({ isNegated; classes })) { - let labeledClasses = Array.map(classes, labelNode); - { nlabel = null; node = #CharacterClass({ isNegated = isNegated; classes = labeledClasses }) }; - }; - } - }; - labelNode(ast); - }; - - // Compute Nullable, First, and Last Sets - public func computeNullableFirstLast(ast: LabeledASTNode): NullableFirstLast { - var firstSet = Set.Set(Int.hash, Nat.equal); - var lastSet = Set.Set(Int.hash, Nat.equal); - - switch (ast.node) { - case (#Character(_) or #Range(_, _) or #Metacharacter(_) or #Anchor(_)) { - if (ast.nlabel != null) { - let nlabel = Option.get(ast.nlabel,0); - firstSet.add(nlabel); - lastSet.add(nlabel); - }; - { - nullable = false; - firstSet = firstSet; - lastSet = lastSet; - }; - }; - - case (#Concatenation(nodes)) { - var nullable = true; - - // First Set - label l for (i in Iter.range(0, nodes.size() - 1)) { - let subExpr = nodes[i]; - let subResult = computeNullableFirstLast(subExpr); - firstSet := firstSet.union(subResult.firstSet); - if (not subResult.nullable) { - nullable := false; - break l; - }; - }; - - // Last Set - nullable := true; - label ll for (i in Iter.range(nodes.size() - 1, 0)) { - let subExpr = nodes[i]; - let subResult = computeNullableFirstLast(subExpr); - lastSet := lastSet.union(subResult.lastSet); - if (not subResult.nullable) { - nullable := false; - break ll; - }; - }; - - // Overall Nullable - nullable := true; - for (subExpr in nodes.vals()) { - let subResult = computeNullableFirstLast(subExpr); - nullable := nullable and subResult.nullable; - }; - - { - nullable = nullable; - firstSet = firstSet; - lastSet = lastSet; - }; - }; - - case (#Alternation(nodes)) { - var nullable = false; - for (subExpr in nodes.vals()) { - let subResult = computeNullableFirstLast(subExpr); - nullable := nullable or subResult.nullable; - firstSet := firstSet.union(subResult.firstSet); - lastSet := lastSet.union(subResult.lastSet); - }; - { - nullable = nullable; - firstSet = firstSet; - lastSet = lastSet; - }; - }; - - case (#Quantifier({ subExpr; quantifier })) { - let subResult = computeNullableFirstLast(subExpr); - var nullable = false; - - // Quantifier logic based on min and max - switch (quantifier) { - case ({ min; max = _ ; mode = _ ; }) { - if (min == 0) { - nullable := true; - } else { - nullable := subResult.nullable; - }; - }; - }; - - { - nullable = nullable; - firstSet = subResult.firstSet; - lastSet = subResult.lastSet; - }; - }; - - case (#Group({ subExpr; modifier = _; captureIndex = _ ; })) { - let subResult = computeNullableFirstLast(subExpr); - { - nullable = subResult.nullable; - firstSet = subResult.firstSet; - lastSet = subResult.lastSet; - }; - }; - - case (#CharacterClass({ isNegated = _; classes })) { - var nullable = false; - for (cls in classes.vals()) { - let subResult = computeNullableFirstLast(cls); - nullable := nullable or subResult.nullable; - firstSet := firstSet.union(subResult.firstSet); - lastSet := lastSet.union(subResult.lastSet); - }; - { - nullable = nullable; - firstSet = firstSet; - lastSet = lastSet; - }; - }; - } - }; - public func constructNFA(ast: LabeledASTNode): NFA { - let nfl = computeNullableFirstLast(ast); - let states = Set.Set(Int.hash, Nat.equal); - let transitions = Buffer.Buffer<(State, Transition, State)>(0); - - states.add(0); - func addCharTransitions(from: State, to: State, node: LabeledASTNode) { - switch (node.node) { - case (#Character(c)) { transitions.add((from, #Char(c), to)); }; - case (#Range(start, end)) { transitions.add((from, #Range(start, end), to)); }; - case (#Metacharacter(_)) { transitions.add((from, #Any, to)); }; - case (#CharacterClass({ isNegated =_; classes })) { - for (cls in classes.vals()) { - addCharTransitions(from, to, cls); - }; - }; - case _ {} // Other cases don't add character transitions - }; - }; - for (pos in nfl.firstSet.toIter()) { - addCharTransitions(0, pos, ast); - states.add(pos); - }; - - // Add transitions between positions - func addTransitions(node: LabeledASTNode) { - switch (node.node) { - case (#Character(c)) { - if (node.nlabel != null) { - let nlabel = Option.get(node.nlabel, 0); - transitions.add((nlabel, #Char(c), nlabel)); - }; - }; - case (#Range(start, end)) { - if (node.nlabel != null) { - let nlabel = Option.get(node.nlabel, 0); - transitions.add((nlabel, #Range(start, end), nlabel)); - }; - }; - case (#Metacharacter(_)) { - if (node.nlabel != null) { - let nlabel = Option.get(node.nlabel, 0); - transitions.add((nlabel, #Any, nlabel)); - }; - }; - case (#Anchor(_)) { - // Anchors don't add transitions in Glushkov's construction - }; - case (#Concatenation(nodes)) { - for (i in Iter.range(0, nodes.size() - 2)) { - let current = computeNullableFirstLast(nodes[i]); - let next = computeNullableFirstLast(nodes[i+1]); - for (from in current.lastSet.toIter()) { - for (to in next.firstSet.toIter()) { - addCharTransitions(from, to, nodes[i+1]); - }; - }; - }; - for (subNode in nodes.vals()) { addTransitions(subNode); }; - }; - case (#Alternation(nodes)) { - for (subNode in nodes.vals()) { addTransitions(subNode); }; - }; - case (#Quantifier({ subExpr; quantifier })) { - addTransitions(subExpr); - let subNFL = computeNullableFirstLast(subExpr); - switch (quantifier) { - case ({ min = _; max = null; mode = _ }) { - for (from in subNFL.lastSet.toIter()) { - for (to in subNFL.firstSet.toIter()) { - addCharTransitions(from, to, subExpr); - }; - }; - }; - case _ {} - }; - }; - case (#Group({ subExpr; modifier = _; captureIndex })) { - addTransitions(subExpr); - let subNFL = computeNullableFirstLast(subExpr); - transitions.add(( - Option.get(node.nlabel, 0), - #Group({ - startState = Option.get(subExpr.nlabel, 0); - endState = subNFL.lastSet.toArray()[subNFL.lastSet.toArray().size()-1]; - captureIndex = captureIndex - }), - subNFL.lastSet.toArray()[subNFL.lastSet.toArray().size()-1] - )); - }; - case (#CharacterClass({ isNegated = _; classes })) { - for (cls in classes.vals()) { addTransitions(cls); }; - }; - }; - }; - addTransitions(ast); - // Add final states - let finalStates : Set.Set = switch (nfl.nullable) { - case (true) { - let tempSet = Set.Set(Int.hash, Nat.equal).union(nfl.lastSet); - tempSet.add(0); - tempSet; - }; - case (false) { nfl.lastSet }; - }; - // Add all states to the set - for ((from, _, to) in transitions.vals()) { - states.add(from); - states.add(to); - }; - return { - states = states.toArray(); - startState = 0; - acceptStates = finalStates.toArray(); - transitions = Buffer.toArray(transitions); - } - }; - public func compile(ast: AST): NFA { - let labeledAST = labelAST(ast); - constructNFA(labeledAST); - }; - }; -}; \ No newline at end of file diff --git a/src/motoko_regex/Tests/compiler.test.mo b/src/motoko_regex/Tests/compiler.test.mo deleted file mode 100644 index e69de29..0000000 diff --git a/src/motoko_regex/Tests/lexer.test.mo b/src/motoko_regex/Tests/lexer.test.mo deleted file mode 100644 index e69de29..0000000 diff --git a/src/motoko_regex/Tests/parser.test.mo b/src/motoko_regex/Tests/parser.test.mo deleted file mode 100644 index e69de29..0000000 diff --git a/src/motoko_regex/RegexTest.mo b/src/test/lib.test.mo similarity index 91% rename from src/motoko_regex/RegexTest.mo rename to src/test/lib.test.mo index 07f7785..647c859 100644 --- a/src/motoko_regex/RegexTest.mo +++ b/src/test/lib.test.mo @@ -1,9 +1,9 @@ -import Lexer "Lexer"; -import Parser "Parser"; -import Compiler "Compiler"; +import Lexer "../Lexer"; +import Parser "../Parser"; +import Compiler "../Compiler"; import Result "mo:base/Result"; -import Extensions "Extensions"; -import Types "Types"; +import Extensions "../Extensions"; +import Types "../Types"; import Debug "mo:base/Debug"; actor { @@ -57,4 +57,3 @@ actor { }; }*/ }; - diff --git a/src/motoko_regex/teststrings.md b/src/teststrings.md similarity index 100% rename from src/motoko_regex/teststrings.md rename to src/teststrings.md