Skip to content

Commit

Permalink
refactor!: restructure repository and update compiler version
Browse files Browse the repository at this point in the history
- Removed the 'motoko_regex' folder and moved all files into 'src' for easier importing through mops.
- Renamed 'RegexText.mo' to 'lib.test.mo' and 'Regex.mo' to 'lib.mo'.
- Updated 'dfx.json' to use 'src/motoko_regex/test/lib.test.mo'.
- Updated import paths in 'lib.test.mo'.
- Reverted the compiler to an older version, causing potential compatibility issues.

BREAKING CHANGE: The repository structure and file names have changed. Additionally, the compiler has been reverted to an older version.
  • Loading branch information
Demali-876 committed Nov 3, 2024
1 parent 0434709 commit 786d76b
Show file tree
Hide file tree
Showing 16 changed files with 368 additions and 333 deletions.
2 changes: 1 addition & 1 deletion dfx.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"canisters": {
"motoko_regex": {
"main": "src/motoko_regex/Regex.mo",
"main": "src/motoko_regex/test/lib.test.mo",
"type": "motoko"
}
},
Expand Down
362 changes: 362 additions & 0 deletions src/Compiler.mo
Original file line number Diff line number Diff line change
@@ -0,0 +1,362 @@
import Types "Types";
import Buffer "mo:base/Buffer";
import Order "mo:base/Order";
import Iter "mo:base/Iter";
import Char "mo:base/Char";
import Array "mo:base/Array";
import Extensions "Extensions";
import Optimizer "Optimizer";

module { /*
public class Compiler() {
type State = Types.State;
type NFA = Types.CompiledRegex;
type Transition = Types.Transition;
type TransitionTable = Types.TransitionTable;
public func compile(ast: Types.ASTNode): NFA {
let startState: State = 0;
let (transitionTable, acceptStates) = buildNFA(ast, startState);
{
transitions = transitionTable;
startState = startState;
acceptStates = acceptStates;
}
};
public func buildNFA(ast: Types.ASTNode, startState: State): (TransitionTable, [State]) {
switch (ast) {
case (#Character(char)) {
let acceptState: State = startState + 1;
let transition: Transition = #Char(char);
let transitionTable: TransitionTable = [(startState, transition, acceptState)];
(transitionTable, [acceptState]);
};
case (#Concatenation(subExprs)) {
var currentStartState: State = startState;
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(subExprs.size());
var acceptStates: [State] = [];
for (subExpr in subExprs.vals()) {
let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, currentStartState);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
currentStartState := subAcceptStates[0];
acceptStates := subAcceptStates;
};
(Buffer.toArray(transitionBuffer), acceptStates);
};
case (#Alternation(subExprs)) {
let newStartState: State = startState;
let newAcceptState: State = newStartState + subExprs.size() + 1;
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(subExprs.size() * 2);
var acceptStates: [State] = [newAcceptState];
for (subExpr in subExprs.vals()) {
let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, newStartState + 1);
transitionBuffer.add((newStartState, #Epsilon, subTransitionTable[0].0));
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, newAcceptState));
};
};
(Buffer.toArray(transitionBuffer), acceptStates);
};
case (#Quantifier { subExpr; min; max; mode }) {
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(10);
let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, startState + 1);
let quantifierStartState: State = startState;
let quantifierAcceptState: State = startState + subTransitionTable.size() + 2;
// Assign default max value (100) if max is null
let maxVal = switch (max) {
case (null) 100;
case (?value) value;
};
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
if (min == 0 and max == null) {
switch (mode) {
case (#Greedy) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); // Exit loop
transitionBuffer.add((acceptState, #Epsilon, quantifierStartState)); // Loop back
};
};
case (#Lazy) {
transitionBuffer.add((quantifierStartState, #Epsilon, quantifierAcceptState)); // Match 0 times first
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); // Try to match sub-expression
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierStartState)); // Loop back for more matches
};
};
case (#Possessive) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); // Exit loop, no backtracking
};
};
};
} else if (min == 1 and max == null) {
switch (mode) {
case (#Greedy) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState));
transitionBuffer.add((acceptState, #Epsilon, quantifierStartState));
};
};
case (#Lazy) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
transitionBuffer.add((quantifierStartState, #Epsilon, quantifierAcceptState)); // Try to match 0 times
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierStartState));
};
};
case (#Possessive) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState));
};
};
};
} else if (min == 0 and max == ?1) {
switch (mode) {
case (#Greedy) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState));
};
};
case (#Lazy) {
transitionBuffer.add((quantifierStartState, #Epsilon, quantifierAcceptState)); // Match 0 times first
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0)); // Try to match 1 time
};
case (#Possessive) {
transitionBuffer.add((quantifierStartState, #Epsilon, subTransitionTable[0].0));
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, quantifierAcceptState)); // Exit, no backtracking
};
};
};
} else if (min > 0 and max != null) {
var currentStartState = quantifierStartState;
for (i in Iter.range(0, min - 1)) {
let (subTrans, subAcc) = buildNFA(subExpr, currentStartState + 1);
for ((fromState, transition, toState) in subTrans.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
currentStartState := subAcc[0];
};
for (i in Iter.range(min, maxVal - 1)) {
let (subTrans, subAcc) = buildNFA(subExpr, currentStartState + 1);
for ((fromState, transition, toState) in subTrans.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
currentStartState := subAcc[0];
};
};
(Buffer.toArray(transitionBuffer), [quantifierAcceptState]);
};
case (#Group { subExpr; captureIndex; modifier }) {
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(10);
let groupStartState: State = startState;
let groupEndState: State = groupStartState + 1;
switch (modifier) {
case null {
let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, groupStartState + 1);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, groupEndState));
};
if (captureIndex != null) {
transitionBuffer.add((groupStartState, #Group {startState = groupStartState; endState = groupEndState; captureIndex}, groupEndState));
};
(Buffer.toArray(transitionBuffer), [groupEndState]);
};
case (?#NonCapturing) {
let (subTransitionTable, subAcceptStates) = buildNFA(subExpr, groupStartState + 1);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
for (acceptState in subAcceptStates.vals()) {
transitionBuffer.add((acceptState, #Epsilon, groupEndState));
};
(Buffer.toArray(transitionBuffer), [groupEndState]);
};
case (?#PositiveLookahead) {
let (subTransitionTable, _) = buildNFA(subExpr, groupStartState);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
(Buffer.toArray(transitionBuffer), [groupStartState]);
};
case (?#NegativeLookahead) {
let (subTransitionTable, _) = buildNFA(subExpr, groupStartState);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
(Buffer.toArray(transitionBuffer), [groupStartState]); // Group ends without advancing
};
case (?#PositiveLookbehind) {
let (subTransitionTable, _) = buildNFA(subExpr, groupStartState);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
(Buffer.toArray(transitionBuffer), [groupStartState]);
};
case (?#NegativeLookbehind) {
let (subTransitionTable, _) = buildNFA(subExpr, groupStartState);
for ((fromState, transition, toState) in subTransitionTable.vals()) {
transitionBuffer.add((fromState, transition, toState));
};
(Buffer.toArray(transitionBuffer), [groupStartState]);
};
};
};
case (#Metacharacter metacharType) {
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(1);
let acceptState: State = startState + 1;
switch (metacharType) {
case (#Dot) {
transitionBuffer.add((startState, #Any, acceptState));
};
case (_) {
let metaRanges = Extensions.metacharToRanges(metacharType);
for (range in metaRanges.vals()) {
transitionBuffer.add((startState, #Range(range.0, range.1), acceptState));
};
};
};
(Buffer.toArray(transitionBuffer), [acceptState]);
};
case (#CharacterClass { isNegated; classes }) {
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(classes.size());
let acceptState: State = startState + 1;
let ranges = Buffer.Buffer<(Char, Char)>(classes.size());
for (charClass in classes.vals()) {
switch (charClass) {
case (#Single(char)) {
ranges.add((char, char));
};
case (#Range(from, to)) {
ranges.add((from, to));
};
case (#Metacharacter(metaType)) {
let metaRanges = Extensions.metacharToRanges(metaType);
for (range in metaRanges.vals()) {
ranges.add(range);
};
};
case (#Quantified(charClass, quantType)) {
ignore buildNFA(#Quantifier {
subExpr = #CharacterClass({isNegated = isNegated; classes = [charClass]});
min = quantType.min;
max = quantType.max;
mode = quantType.mode;
}, startState);
};
};
};
if (isNegated) {
var lastChar: Char = Char.fromNat32(0);
let sortedRanges = Buffer.toArray(ranges);
ignore Array.sort<(Char, Char)>(sortedRanges, func(a: (Char, Char), b: (Char, Char)) : Order.Order {
Char.compare(a.0, b.0)
});
for (range in sortedRanges.vals()) {
if (Char.toNat32(lastChar) < Char.toNat32(range.0)) {
transitionBuffer.add((startState, #Range(lastChar, Char.fromNat32(Char.toNat32(range.0) - 1)), acceptState));
};
lastChar := Char.fromNat32(Char.toNat32(range.1) + 1);
};
if (Char.toNat32(lastChar) <= 255) {
transitionBuffer.add((startState, #Range(lastChar, Char.fromNat32(255)), acceptState));
};
} else {
for (range in ranges.vals()) {
transitionBuffer.add((startState, #Range(range.0, range.1), acceptState));
};
};
(Buffer.toArray(transitionBuffer), [acceptState]);
};
case (#Anchor anchorType) {
let transitionBuffer = Buffer.Buffer<(State, Transition, State)>(1);
let acceptState: State = startState + 1;
switch (anchorType) {
case (#StartOfString) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
case (#EndOfString) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
case (#WordBoundary) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
case (#NonWordBoundary) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
case (#StartOfStringOnly) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
case (#EndOfStringOnly) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
case (#PreviousMatchEnd) {
transitionBuffer.add((startState, #Epsilon, acceptState));
};
};
(Buffer.toArray(transitionBuffer), [acceptState]);
};
}
};
};*/
};
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 786d76b

Please sign in to comment.