diff --git a/.github/workflows/ci_test.yml b/.github/workflows/linux_ci.yml similarity index 100% rename from .github/workflows/ci_test.yml rename to .github/workflows/linux_ci.yml diff --git a/.github/workflows/windows_ci.yml b/.github/workflows/windows_ci.yml new file mode 100644 index 00000000..2409e1f1 --- /dev/null +++ b/.github/workflows/windows_ci.yml @@ -0,0 +1,21 @@ +name: CI Windows + +on: [push, pull_request] + +jobs: + build: + runs-on: windows-latest + steps: + - name: Set git to use LF + run: | + git config --global core.autocrlf false + git config --global core.eol lf + - name: Checkout Koral + uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + + - name: Build and install Koral + run: mvn --file pom.xml install diff --git a/Changes b/Changes index 3e98baaa..c7704005 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,11 @@ +0.42 2024-01-11 + - [feature] Support #REG in C2 (bodmer) + - [bugfix] Fix comma in #BED in C2 (bodmer) + 0.41 2023-09-13 + - [feature] Finish support for CQP (irimia) - [bugfix] Disallow empty regex in PQ+ (diewald) + - [cleanup] Change of groupID. 0.40 2023-07-26 - [feature] Initial support for CQP diff --git a/README.md b/README.md index d7370080..280ea328 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ J. Bingel and N. Diewald, "KoralQuery – a General Corpus Query Protocol," in P ## Authorship Koral and KoralQuery were developed by Joachim Bingel, -Nils Diewald, Michael Hanl and Eliza Margaretha at the +Nils Diewald, Michael Hanl, Eliza Margaretha, and Franck Bodmer at the [Leibniz Institute for the German Language (IDS)](https://www.ids-mannheim.de/), member of the [Leibniz Association](https://www.leibniz-gemeinschaft.de). @@ -142,7 +142,7 @@ The ANTLR grammars for parsing ANNIS QL and COSMAS II QL were developed by Thomas Krause (HU Berlin) and Franck Bodmer (IDS Mannheim), respectively. Minor adaptations of those grammars were implemented by the Koral authors. -The authors wish to thank Piotr Bański, Franck Bodmer, Elena Frick and +The authors wish to thank Piotr Bański, Elena Frick and Carsten Schnober for their valuable input. ## License diff --git a/pom.xml b/pom.xml index 6bdd5574..7b89fc9a 100644 --- a/pom.xml +++ b/pom.xml @@ -2,9 +2,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - de.ids_mannheim.korap + de.ids-mannheim.korap.koral Koral - 0.41.0 + 0.42.0 jar Koral https://korap.ids-mannheim.de @@ -39,11 +39,11 @@ UTF-8 - 2.15.2 + 2.16.1 4.9.3 3.5.3 11 - 2.20.0 + 2.22.1 @@ -83,7 +83,7 @@ com.google.guava guava - 32.1.2-jre + 33.0.0-jre com.fasterxml.jackson.core @@ -145,7 +145,7 @@ org.slf4j slf4j-api - 2.0.9 + 2.0.11 eu.clarin.sru.fcs @@ -160,7 +160,7 @@ org.apache.maven.plugins maven-clean-plugin - 3.3.1 + 3.3.2 @@ -177,6 +177,7 @@ **/c2ps_opIN.java **/c2ps_opOV.java **/c2ps_opPROX.java + **/c2ps_opREG.java **/c2ps_opWF.java **/c2ps_optCase.java **/.gitignore @@ -190,7 +191,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.1.2 + 3.2.5 false @@ -201,7 +202,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.11.0 + 3.12.1 diff --git a/src/main/antlr/cosmas/c2ps.g b/src/main/antlr/cosmas/c2ps.g index c264ea63..269f27f5 100644 --- a/src/main/antlr/cosmas/c2ps.g +++ b/src/main/antlr/cosmas/c2ps.g @@ -1,16 +1,26 @@ - // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -// // -// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) // -// globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf). // -// 17.12.12/FB // -// v-0.6 // -// TODO: // -// - se1: Einsetzen des Default-Operators in den kumulierten AST. // +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +// +// COSMAS II zeilenorientierten Suchanfragesprache (C2 plain syntax) +// globale Grammatik (ruft lokale c2ps_x.g Grammatiken auf). +// 17.12.12/FB +// v-0.6 +// TODO: +// - se1: Einsetzen des Default-Operators in den kumulierten AST. +// +// v0.7 - 25.07.23/FB +// - added: #REG(x) +// v0.8 - 06.11.23/FB +// - accepts #BED(searchword, sa) : comma attached to searchword. +// - more generally: comma at end of searchword, which is not enclosed by "..." is +// excluded from searchword now. +// - a comma inside a searchword is accepted if enclosed by "...". +// // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * grammar c2ps; options { output=AST; backtrack=true; k=5;} +// tokens that will appear as node names in the resulting AST: tokens {C2PQ; OPBED; OPTS; OPBEG; OPEND; OPNHIT; OPALL; OPLEM; OPPROX; ARG1; ARG2; OPWF; OPLEM; OPANNOT; @@ -21,6 +31,7 @@ tokens {C2PQ; OPBED; OPTS; OPBEG; OPEND; OPNHIT; OPALL; OPLEM; OPPROX; OPNOT; OPEXPR1; OPMORPH; OPELEM; + OPREG; } @header {package de.ids_mannheim.korap.query.parse.cosmas;} @@ -63,19 +74,50 @@ WS : (' '|'\r'|'\n')+ {skip();}; fragment DISTVALUE : ('0' .. '9')+ (':' ('0'..'9')+)? ; - + +fragment DISTTYPE // 30.11.23/FB + : ('w'|'s'|'p'|'t'); + +fragment DISTDIR // 30.11.23/FB + : ('+'|'-'); + +/* old version (before 30.11.23/FB) fragment DIST : ('+'|'-')? (DISTVALUE ('w'|'s'|'p'|'t') | ('w'|'s'|'p'|'t') DISTVALUE); - +*/ + +// accept these 3 options in any order. +// afterwards, we will have to check if any of them is missing. +// 30.11.23/FB + +fragment DIST // 30.11.23/FB + : (DISTDIR | DISTTYPE | DISTVALUE )+; + fragment GROUP : ('min' | 'max'); -OP_PROX : ('/' | '%') DIST (',' DIST)* (',' GROUP)? ; +// version (12.01.24/FB): +// accept correct and incorrect chars till the next blank, that way the incorrect chars +// are submitted to the sub-grammer c2ps_opPROX where they are detected and an appropriate +// error message is inserted: +OP_PROX : ('/' | '%') DIST (~' ')*; + +// old version: accepts only correctly formulated options, so the incorrect +// chars/options are hard to detect: +// OP_PROX : ('/' | '%') DIST (',' DIST)* (',' GROUP)? ; OP_IN : '#IN' | '#IN(' OP_IN_OPTS? ')' ; OP_OV : '#OV' | '#OV(' OP_OV_OPTS? ')' ; +// #REG(abc['"]) or #REG('abc\'s') or #REG("abc\"s"): + +OP_REG : '#REG(' ' '* '\'' ('\\\''|~'\'')+ '\'' (' ')* ')' + | + '#REG(' ' '* '"' ('\\"'|~'"')+ '"' (' ')* ')' + | + '#REG(' ' '* ~('\''|'"'|' ') (~(')'))* ')'; + // EAVEXP wird hier eingesetzt für eine beliebige Sequenz von Zeichen bis zu ')'. fragment OP_IN_OPTS : EAVEXPR ; @@ -111,8 +153,23 @@ SEARCHLEMMA : '&' SEARCHWORD1 ; // rewrite rules funktionieren im lexer nicht: -> ^(OPLEM $SEARCHWORD1.text); // SEARCHWORD2: schluckt Blanks. Diese müssen nachträglich als Wortdelimiter erkannt werden. + +// current syntax, drawback is: +// e.g. aber, -> SEARCHWORD1 = "aber," +// but correct should be -> SEARCHWORD1 = "aber" +//SEARCHWORD1 +// : ~('"' | ' ' | '#' | ')' | '(' )+ ; + +// new syntax (06.11.23/FB): +// accept for searchword1 either a single ',' or exclude trailing ',' from searchword1: +// E.g. Haus, -> searchword1=Haus. +// For a ',' inside a search word, see searchword2. +// exclude trailing "," from searchword1. SEARCHWORD1 - : ~('"' | ' ' | '#' | ')' | '(' )+ ; + : (',' | ~('"' | ' ' | '#' | ')' | '(' | ',')+) ; + +// searchword2 accepts a ',' inside a searchword enclosed by "...". +// E.g. "Haus,tür": OK. SEARCHWORD2 : '"' (~('"') | '\\"')+ '"' ; @@ -226,7 +283,7 @@ searchLabel op2 : (opPROX | opIN | opOV | opAND | opOR | opNOT) ; // AST with Options for opPROX is returned by c2ps_opPROX.check(): -opPROX : OP_PROX -> ^(OPPROX {c2ps_opPROX.check($OP_PROX.text, $OP_PROX.index)} ); +opPROX : OP_PROX -> ^(OPPROX {c2ps_opPROX.check($OP_PROX.text, $OP_PROX.pos)} ); opIN : OP_IN -> {c2ps_opIN.check($OP_IN.text, $OP_IN.index)}; @@ -241,7 +298,7 @@ opNOT : ('nicht' | 'NICHT' | 'not' | 'NOT') -> ^(OPNOT); // OP1: Suchoperatoren mit 1 Argument: // ----------------------------------- -op1 : opBEG | opEND | opNHIT | opALL | opBED; +op1 : opBEG | opEND | opNHIT | opALL | opBED | opREG; // #BED(serchExpr, B). // B muss nachträglich in einer lokalen Grammatik überprüft werden. @@ -259,3 +316,5 @@ opEND : ( '#END(' | '#RECHTS(' ) searchExpr ')' -> ^(OPEND searchExpr) ; opNHIT : ( '#NHIT(' | '#INKLUSIVE(' ) searchExpr ')' -> ^(OPNHIT searchExpr) ; opALL : ( '#ALL(' | '#EXKLUSIVE(' ) searchExpr ')' -> ^(OPALL searchExpr) ; + +opREG : OP_REG -> ^(OPREG {c2ps_opREG.encode($OP_REG.text, OPREG)}) ; diff --git a/src/main/antlr/cosmas/c2ps_opPROX.g b/src/main/antlr/cosmas/c2ps_opPROX.g index f7a42f5d..1569d1a5 100644 --- a/src/main/antlr/cosmas/c2ps_opPROX.g +++ b/src/main/antlr/cosmas/c2ps_opPROX.g @@ -1,9 +1,10 @@ // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -// // -// lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache (= c2ps) // -// für den Abstandsoperator /w... und %w... // -// v-1.0 - 07.12.12/FB // -// // +// +// lokale Grammatik der COSMAS II zeilenorientierten Suchanfragesprache (= c2ps) +// für den Abstandsoperator /w... und %w... +// v-1.0 - 07.12.12/FB +// v-1.1 - 30.11.23/FB opPROX accepts any order of direction, measure and value. +// // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * grammar c2ps_opPROX; @@ -15,10 +16,15 @@ tokens { PROX_OPTS; DIST_LIST; DIST; RANGE; VAL0; MEAS; // measure DIR; PLUS; MINUS; BOTH; - GRP; MIN; MAX; } -@header {package de.ids_mannheim.korap.query.parse.cosmas;} + GRP; MIN; MAX; + } + +@header {package de.ids_mannheim.korap.query.parse.cosmas; + import de.ids_mannheim.korap.util.C2RecognitionException;} + @lexer::header {package de.ids_mannheim.korap.query.parse.cosmas;} + // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // PROX-Lexer @@ -28,6 +34,12 @@ tokens { PROX_OPTS; DISTVALUE : ('0' .. '9')+ ; +// trying to catch everything (at the end of the option sequence) that should not appear inside the prox. options: +// e.g. /w5umin -> remain = 'umin'. + +PROX_REMAIN + : (',')? ('b'..'h'|'j'..'l'|'n'|'o'|'q'|'r'|'u'|'v'|'y'|'z'|'B'..'H'|'J'..'L'|'N'|'O'|'Q'|'R'|'U'|'V'|'Y'|'Z') (~ ' ')* ; + // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // PROX-Parser @@ -35,36 +47,49 @@ DISTVALUE // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -opPROX : proxTyp proxDist (',' proxDist)* (',' proxGroup)? +opPROX[int pos] : proxTyp proxDist[$pos] (',' proxDist[$pos])* (',' proxGroup)? (proxRemain[$pos])? - -> ^(PROX_OPTS {$proxTyp.tree} ^(DIST_LIST proxDist+) {$proxGroup.tree}); + -> ^(PROX_OPTS {$proxTyp.tree} ^(DIST_LIST proxDist+) {$proxGroup.tree} {$proxRemain.tree}); -proxTyp : '/' -> ^(TYP PROX) // klassischer Abstand. - | '%' -> ^(TYP EXCL); // ausschließender Abstand. +proxRemain[int pos] : PROX_REMAIN + + -> { c2ps_opPROX.checkRemain(DIST, $PROX_REMAIN.text, $pos) }; + +proxTyp : '/' -> ^(TYP PROX) // klassischer Abstand. + | '%' -> ^(TYP EXCL); // ausschließender Abstand. // proxDist: e.g. +5w or -s0 or /w2:4 etc. // kein proxDirection? hier, weil der Default erst innerhalb von Regel proxDirection erzeugt werden kann. -proxDist: proxDirection (v1=proxDistValue m1=proxMeasure | m2=proxMeasure v2=proxDistValue) - -> {$v1.tree != null}? ^(DIST {$proxDirection.tree} {$v1.tree} {$m1.tree}) - -> ^(DIST {$proxDirection.tree} {$v2.tree} {$m2.tree}); +// new rule: accepts options in any order: +// count each option type and find out if any one is missing or occures multiple times. +// 28.11.23/FB + +proxDist[int pos] +@init{ int countM=0; int countD=0; int countV=0;} + : + ((m=proxMeasure {countM++;})|(d=proxDirection {countD++;})|(v=proxDistValue {countV++;}) )+ + + -> {c2ps_opPROX.encodeDIST(DIST, DIR, $d.tree, $m.tree, $v.tree, $proxDist.text, countD, countM, countV, $pos)}; + + +// new rule accepts only '+' and '-'; default tree for direction is +// set in c2ps_opPROX.encodeDIST() now. +// 28.11.23/FB proxDirection - : (p='+'|m='-')? -> {$p != null}? ^(DIR PLUS) - -> {$m != null}? ^(DIR MINUS) - -> ^(DIR BOTH) ; -/* -proxDistValue // proxDistMin ( ':' proxDistMax)? ; - : (m1=proxDistMin -> ^(DIST_RANGE VAL0 $m1)) (':' m2=proxDistMax -> ^(DIST_RANGE $m1 $m2))? ; -*/ -proxDistValue // proxDistMin ( ':' proxDistMax)? ; - : (m1=proxDistMin ) (':' m2=proxDistMax)? + : '+' -> ^(DIR PLUS) + | '-' -> ^(DIR MINUS); + +proxDistValue : (m1=proxDistMin ) (':' m2=proxDistMax)? -> {$m2.text != null}? ^(RANGE $m1 $m2) - -> ^(RANGE VAL0 $m1); - + -> ^(RANGE VAL0 $m1); + +// mentioning >1 measures will be checked/rejected in c2ps_opPROX.encodeDIST(). + proxMeasure - : (m='w'|m='s'|m='p'|m='t') -> ^(MEAS $m); + : (meas='w'|meas='s'|meas='p'|meas='t') -> ^(MEAS $meas) ; proxDistMin : DISTVALUE; @@ -73,6 +98,8 @@ proxDistMax : DISTVALUE; proxGroup - : 'min' -> ^(GRP MIN) - | 'max' -> ^(GRP MAX); + : ('min'|'MIN') -> ^(GRP MIN) + | ('max'|'MAX') -> ^(GRP MAX); + + \ No newline at end of file diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java index fb9df4e8..35f64379 100644 --- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java +++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opBED.java @@ -17,7 +17,8 @@ public static Tree check (String input, int index) { c2ps_opBEDParser.opBEDOpts_return c2PQReturn = null; /* - System.out.println("check opBED: " + index + ": " + input); + System.out.format("opBED: check: input='%s', index=%d.\n", input, index); + System.out.format("opBED: tokens ='%s'.\n", tokens.toString()); System.out.flush(); */ @@ -68,7 +69,7 @@ public static Tree checkTPos (String input, int index) { public static void main (String args[]) throws Exception { - String[] input = { ",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)" }; + String[] input = { ",sa,se,-ta,-te/pa,-pe)", ",sa)", ",/pa,-pe)"}; Tree tree; for (int i = 0; i < input.length; i++) { diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java index 2a5b1634..62297195 100644 --- a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java +++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opPROX.java @@ -3,29 +3,177 @@ import org.antlr.runtime.*; import org.antlr.runtime.tree.*; +import de.ids_mannheim.korap.query.serialize.Antlr3AbstractQueryProcessor; +import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener; +import de.ids_mannheim.korap.query.serialize.util.StatusCodes; +import de.ids_mannheim.korap.util.*; + /* * parses Opts of PROX: /w3:4,s0,min or %w3:4,s0,min. */ -public class c2ps_opPROX +public class c2ps_opPROX { - - public static Tree check (String input, int index) { + final static boolean bDebug = false; + + // type of an Error CommonToken: + final static int typeERROR = 1; + // Prox error codes defined in StatusCodes.java. + + private static CommonTree buildErrorTree(String text, int errCode, int typeDIST, int pos) + + { + CommonTree + errorTree = new CommonTree(new CommonToken(typeDIST, "DIST")); + CommonTree + errorNode = new CommonTree(new CommonToken(typeERROR, "ERROR")); + CommonTree + errorPos = new CommonTree(new CommonToken(typeERROR, String.valueOf(pos))); + CommonTree + errorCode = new CommonTree(new CommonToken(typeERROR, String.valueOf(errCode))); + CommonTree + errorMes; + String + mess; + + switch( errCode ) + { + case StatusCodes.ERR_PROX_MEAS_NULL: + mess = String.format("Abstandsoperator an der Stelle '%s' es fehlt eine der folgenden Angaben: w,s,p!", text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + break; + case StatusCodes.ERR_PROX_MEAS_TOOGREAT: + mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 der folgenden Angaben einsetzen: w,s,p! " + + "Falls Mehrfachangabe erwünscht, müssen diese durch Kommata getrennt werden (z.B.: /+w2,s0).", text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + break; + case StatusCodes.ERR_PROX_VAL_NULL: + mess = String.format("Abstandsoperator an der Stelle '%s': Bitte einen numerischen Wert einsetzen (z.B. /+w5)! ", text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + break; + case StatusCodes.ERR_PROX_VAL_TOOGREAT: + mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 numerischen Wert einsetzen (z.B. /+w5)! ", text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + break; + case StatusCodes.ERR_PROX_DIR_TOOGREAT: + mess = String.format("Abstandsoperator an der Stelle '%s': Bitte nur 1 Angabe '+' oder '-' oder keine! ", text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + break; + case StatusCodes.ERR_PROX_WRONG_CHARS: + mess = String.format("Abstandsoperator an der Stelle '%s': unbekannte Abstandsoption(en)!", text); + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + break; + default: + mess = String.format("Abstandsoperator an der Stelle '%s': unbekannter Fehler. Korrekte Syntax z.B.: /+w2 oder /w10,s0.", text); + + errorMes = new CommonTree(new CommonToken(typeERROR, mess)); + } + + errorTree.addChild(errorNode); + errorNode.addChild(errorPos); + errorNode.addChild(errorCode); + errorNode.addChild(errorMes); + + return errorTree; + } + + /* encodeDIST(): + * - returns a CommonTree built from the Direction/Measure/Distance value. + * - accepts options in any order. + * - creates CommonTree in that order: Direction .. Distance value .. Measure. + * - sets default direction to BOTH if not set yet. + * - unfortunately, in ANTLR3 it seems that there is no way inside the Parser Grammar to get + * the absolute token position from the beginning of the query. Something like $ProxDist.pos or + * $start.pos is not available, so we have no info in this function about the position at which + * an error occurs. + * - For multiple prox options, e.g. /w2,s2,p0, this function if called 3 times. + * Arguments: + * countD : how many occurences of distance: + or - or nothing. If 0 insert the default BOTH. + * countM : how many occurences of measure: w,s,p,t: should be 1. + * countV : how many occurences of distance value: should be 1. + * 28.11.23/FB + */ + + public static Object encodeDIST(int typeDIST, int typeDIR, Object ctDir, Object ctMeas, Object ctVal, String text, + int countD, int countM, int countV, int pos) + + { + CommonTree tree1 = (CommonTree)ctDir; + CommonTree tree2 = (CommonTree)ctMeas; + CommonTree tree3 = (CommonTree)ctVal; + + if( bDebug ) + System.err.printf("Debug: encodeDIST: scanned input='%s' countM=%d countD=%d countV=%d pos=%d.\n", + text, countM, countD, countV, pos); + + if( countM == 0 ) + return buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_NULL, typeDIST, pos); + if( countM > 1 ) + return buildErrorTree(text, StatusCodes.ERR_PROX_MEAS_TOOGREAT, typeDIST, pos); + if( countV == 0 ) + return buildErrorTree(text, StatusCodes.ERR_PROX_VAL_NULL, typeDIST, pos); + if( countV > 1 ) + return buildErrorTree(text, StatusCodes.ERR_PROX_VAL_TOOGREAT, typeDIST, pos); + + if( countD == 0 ) + { + // if direction is not specified (ctDir == null or countD==0), return default = BOTH: + CommonTree treeDIR = new CommonTree(new CommonToken(typeDIR, (String)"DIR")); + CommonTree treeBOTH = new CommonTree(new CommonToken(typeDIR, "BOTH")); + treeDIR.addChild(treeBOTH); + + if( bDebug ) + System.err.printf("Debug: encodeDIST: tree for DIR: '%s'.\n", treeDIR.toStringTree()); + tree1 = treeDIR; + } + else if( countD > 1 ) + return buildErrorTree(text, StatusCodes.ERR_PROX_DIR_TOOGREAT, typeDIST, pos); + + // create DIST tree: + CommonTree + tree = new CommonTree(new CommonToken(typeDIST, "DIST")); + + tree.addChild(tree1); + tree.addChild(tree3); // tree3 before tree2 expected by serialization. + tree.addChild(tree2); + + if( bDebug ) + System.err.printf("Debug: encodeDIST: returning '%s'.\n", tree.toStringTree()); + + return tree; + } // encodeDIST + + /* checkRemain: + * + * - the chars in proxRemain are not allowed in prox. options. + * - return an error tree. + * 12.01.24/FB + */ + + public static Object checkRemain(int typeDIST, String proxRemain, int pos) + + { + if( bDebug ) + System.out.printf("Debug: checkRemain: '%s' at pos %d.\n", proxRemain, pos); + + return buildErrorTree(proxRemain, StatusCodes.ERR_PROX_WRONG_CHARS, typeDIST, pos); + } + + public static Tree check (String input, int pos) throws RecognitionException + { ANTLRStringStream ss = new ANTLRStringStream(input); c2ps_opPROXLexer lex = new c2ps_opPROXLexer(ss); CommonTokenStream tokens = new CommonTokenStream(lex); c2ps_opPROXParser g = new c2ps_opPROXParser(tokens); c2ps_opPROXParser.opPROX_return c2PQReturn = null; - /* - System.out.println("check opPROX:" + index + ": " + input); - System.out.flush(); - */ + if( bDebug ) + System.out.printf("check opPROX: pos=%d input='%s'.\n", pos, input); try { - c2PQReturn = g.opPROX(); - } + c2PQReturn = g.opPROX(pos); + } catch (RecognitionException e) { e.printStackTrace(); } @@ -37,7 +185,19 @@ public static Tree check (String input, int index) { return tree; } - + public static boolean checkFalse() + { + + return false; // testwise + } + + public static boolean checkMeasure( Object measure) + { + System.err.printf("Debug: checkMeasure: measure = %s.\n", + measure == null ? "null" : "not null"); + return true; + } + /* * main testprogram: */ diff --git a/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java new file mode 100644 index 00000000..a798647a --- /dev/null +++ b/src/main/java/de/ids_mannheim/korap/query/parse/cosmas/c2ps_opREG.java @@ -0,0 +1,235 @@ +package de.ids_mannheim.korap.query.parse.cosmas; + +import org.antlr.runtime.*; +import org.antlr.runtime.tree.*; + +import de.ids_mannheim.korap.query.serialize.util.Antlr3DescriptiveErrorListener; +import de.ids_mannheim.korap.util.StringUtils; + +/* + * 1. transforms and encodes a regular COSMAS II like expression #REG(regexpr) + * into a AST tree -> encode(). + * 2. transforms tree into the corresponding Koral:token/Koral:term, like: + * e.g. #REG(abc[']?s) -> + * { + * "@type": "koral:term", + * "match": "match:eq", + * "type" : "type:regex", + * "key" : "abc[']?s", + * "layer": "orth" + * }... + * + * - see doc: http://korap.github.io/Koral/ + * - generation of koral:term -> processOPREG(). + * 06.09.23/FB + */ + +public class c2ps_opREG + +{ + private static boolean DEBUG = false; + + /* + * encode(): + * + * input = e.g. "#REG('abc(d|e)*')" -> return AST = (OPREG abc(d|e)*): + * + * Returned String: no enclosing "..." needed, so no escaping of " nor \ needed. + * 06.09.23/FB + */ + public static Tree encode (String input, int tokenType) + + { + if( DEBUG ) + { + System.out.printf("opREG.encode: input = >>%s<<, token type=%d.\n", input, tokenType); + System.out.flush(); + } + + if( input.substring(0, 5).compareToIgnoreCase("#REG(") != 0 || input.charAt(input.length()-1) != ')' ) + { + // error: '#REG(' and ')' not found: return input unchanged. + if( DEBUG ) System.out.printf("opREG.encode: unexpected input = >>%s<<: nothing encoded!\n", input); + return new CommonTree(new CommonToken(tokenType, input)); + } + + + StringBuffer sb = new StringBuffer(input.substring(5)); + sb.deleteCharAt(sb.length()-1); + + // #REG("a"), #REG(a), #REG('a') -> >>a<<. + // enclosing ".." are appended at the end of this function. + // a. remove blanks around ".." and '..', + // e.g. a. #REG( ' abc ' ) -> #REG(' abc '). + + StringUtils.removeBlanksAtBothSides(sb); + + if( sb.charAt(0) == '\'' || sb.charAt(0) == '"') + { + // remove pairwise at both ends. + sb.deleteCharAt(0); + if( sb.charAt(sb.length()-1) == '\'' || sb.charAt(sb.length()-1) == '"' ) + sb.deleteCharAt(sb.length()-1); + } + + // b. remove blanks inside '..' or "..", + // E.g. #REG(' abc ') -> #REG('abc'): + + StringUtils.removeBlanksAtBothSides(sb); + + /* unescape >>'<<, >>"<< and >>\<<. + * e.g. #REG('that\'s') -> "that\'s" -> >>that's<<. + */ + + for(int i=0; i>%s<<.\n", sb.toString()); + + return new CommonTree(new CommonToken(tokenType, sb.toString())); + + } // encode + + /* + * printTokens: + * Notes: + * - must build a separate CommonTokenStream here, because + * tokens.fill() will consume all tokens. + * - prints to stdout list of tokens from lexer. + * - mainly for debugging. + * 14.09.23/FB + * + */ + + private static void printTokens(String query, Antlr3DescriptiveErrorListener errorListener) + + { + ANTLRStringStream + ss = new ANTLRStringStream(query); + c2psLexer + lex = new c2psLexer(ss); + org.antlr.runtime.CommonTokenStream + tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3 + + lex.setErrorReporter(errorListener); + + // get all tokens from lexer: + tokens.fill(); + + System.out.printf("opREG.check: no. of tokens = %d.\n", tokens.size()); + for(int i=0; i>#REG(\" a"s\")<<. + lex.setErrorReporter(errorListener); + ((c2psParser) g).setErrorReporter(errorListener); + + if( DEBUG ) + { + //System.out.format("opREG.check: input='%s', index=%d.\n", query, index); + printTokens(query, errorListener); + System.out.flush(); + } + + + try { + c2psParser.c2ps_query_return + c2Return = ((c2psParser) g).c2ps_query(); // statt t(). + + // AST Tree anzeigen: + tree = (Tree) c2Return.getTree(); + //if (DEBUG) + // System.out.printf("opREG.check: tree = '%s'.\n", tree.toStringTree()); + } + catch (RecognitionException e) { + System.err.printf("c2po_opREG.check: Recognition Exception!\n"); + } + + return tree; + } // check + + + /** + * main + */ + + public static void main (String args[]) throws Exception + + { + String input[] = { "#REG(abc)", + "#REG(def's)", + "#REG( def's )", // all blanks should be removed. + "#REG( ' def\\'s ' )", // same + "#REG( \" def's \" )", // same + "#REG(abc[\"]ef)", + "#REG('abc')", // ' fehlt: generates Syntax Error . + "#REG('abc\')", // User input = #REG('abc\') : OK, nothing escaped. + "#REG('abc\'')", // User input = #REG('abc\') : OK, nothing escaped. + "#REG('abc\\')", // User input = #REG('abc\') : OK, same behavior: \\ == \. + "#REG((a|b))", // broken input, should use ".." or '..'. + "#REG('(a|b)')", // OK. + "#REG(\"(a|b)\")", // OK. + "#REG(^[A-Z]+abc[\']*ung$)", + "#REG('ab(cd|ef)*')", + "#REG('abc(def|g)*[)(]')", + "#REG(\"abc(def|g)*[)(]\")", + "#REG('abc[\"]')", // User input = #REG('abc["]') : OK, needs escape => #REG("...\"...") + "#REG(\"abc[\\\"]\")", // User input = #REG("abc["]") : broken because of 2nd " -> syntax error. + "#REG(\"abc[\\\"]\")", // User input = #REG("abc[\"]"): OK, already escaped by user => #REG("...\"...") + "#REG(\"abc[\\\\\"]\")" // User input = #REG("abc[\\"]") : broken. with escaped " => #REG("...\"...") + }; + Tree tree; + + for (int i = 0; i < input.length; i++) + { + System.out.printf("c2ps_opREG: Parsing input %02d: >>%s<<\n", i, input[i]); + tree = check(input[i], 0); + System.out.printf("c2ps_opREG: tree %02d: >>%s<<.\n\n", i, tree.toStringTree()); + } + + + } // main + +} diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java index 69a6293a..285a3e71 100644 --- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java +++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java @@ -1,5 +1,6 @@ package de.ids_mannheim.korap.query.serialize; +import de.ids_mannheim.korap.query.parse.cosmas.c2ps_opPROX; // error codes. import de.ids_mannheim.korap.query.object.ClassRefCheck; import de.ids_mannheim.korap.query.object.ClassRefOp; import de.ids_mannheim.korap.query.object.CosmasPosition; @@ -15,14 +16,19 @@ import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator; import de.ids_mannheim.korap.query.serialize.util.ResourceMapper; import de.ids_mannheim.korap.query.serialize.util.StatusCodes; +import de.ids_mannheim.korap.util.StringUtils; import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.FailedPredicateException; import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; import org.antlr.v4.runtime.tree.ParseTree; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.HashBasedTable; import com.google.common.collect.Table; @@ -127,6 +133,82 @@ public class Cosmas2QueryProcessor extends Antlr3AbstractQueryProcessor { public static Pattern wildcardPlusPattern = Pattern.compile("([+])"); public static Pattern wildcardQuestionPattern = Pattern.compile("([?])"); + /** + * reportErrorsinTree: + * - traverse the AST tree and search for nodes of type ERROR, they contain + * the errCode, the error message and the error char position. + * - returns true if an error node is found in the tree referenced by 'node'. + * - adds error code, error position and error message to the error list. + * Arguments: + * node : might be null if it has been reseted previously by another error handler. + * @param node + * @return: true: error node was found, + * false; no error node found. + * 19.12.23/FB + */ + + private boolean reportErrorsinTree(Tree node) + + { + final String func = "reportErrorsinTree"; + + //System.err.printf("Debug: %s: '%s' has %d children.\n", + // func, node.getText(), node.getChildCount()); + if( node == null ) + { + // System.err.printf("Warning: %s: node == null: no action requested.\n", func); + return false; + } + + if( node.getType() == 1 && node.getText().compareTo("ERROR") == 0 ) + { + // error node found: + // child[0] : error pos. + // child[1] : error code. + // child[2] : error message, containing offending string. + /* + System.err.printf("Debug: %s: child[0]='%s' child[1]='%s' child[2]='%s'.\n", func, + node.getChild(0) != null ? node.getChild(0).getText() : "???", + node.getChild(1) != null ? node.getChild(1).getText() : "???", + node.getChild(2) != null ? node.getChild(2).getText() : "???"); + */ + + int + errPos = node.getChild(0) != null ? Integer.parseInt(node.getChild(0).getText()) : 0; + int + errCode = node.getChild(1) != null ? Integer.parseInt(node.getChild(1).getText()) : StatusCodes.ERR_PROX_UNKNOWN; + String + errMess = node.getChild(2) != null ? node.getChild(2).getText() : "Genaue Fehlermeldung nicht auffindbar."; + + ArrayList + errorSpecs = new ArrayList(); + + errorSpecs.add(errCode); + errorSpecs.add(errMess); + errorSpecs.add(errPos); + addError(errorSpecs); + return true; + } + + for(int i=0; i>> " + requestMap.get("query") + " <<<"); - } - } + if (verbose) + { + //log.debug(">>> " + requestMap.get("query") + " <<<"); + try { + // query from requestMap is unformatted JSON. Make it pretty before displaying: + ObjectMapper mapper = new ObjectMapper(); + String jsonQuery = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(requestMap.get("query")); + System.out.printf("Cosmas2QueryProcessor: JSON output:\n%s\n\n", jsonQuery); + } + catch (JsonProcessingException e) + { + System.out.printf("Cosmas2QueryProcessor: >>%s<<.\n", requestMap.get("query")); + //e.printStackTraObjectMapper mapper = new ObjectMapper();ce(); + } + } + } @Override public void process (String query) { Tree tree = null; tree = parseCosmasQuery(query); - if (DEBUG) { + if (DEBUG) + { + System.out.printf("\nProcessing COSMAS II query: %s.\n\n", query); log.debug("Processing CosmasII query: " + query); - } - if (tree != null) { - if (DEBUG) { - log.debug("ANTLR parse tree: " + tree.toStringTree()); - } + } + + if (tree != null) + { + if (verbose) { + log.debug("ANTLR parse tree: " + tree.toStringTree()); + System.out.printf("\nANTLR parse tree: %s.\n\n", tree.toStringTree()); + } + processNode(tree); - } + } } @@ -175,11 +275,13 @@ private void processNode (Tree node) { stackedObjects = 0; stackedToWrap = 0; - if (verbose) { + /* + if (verbose) { System.err.println(" " + objectStack); System.out.println(openNodeCats); } - + */ + /* *************************************** * Processing individual node categories * * *************************************** @@ -278,6 +380,11 @@ private void processNode (Tree node) { if (nodeCat.equals("OPBED")) { processOPBED(node); } + + if (nodeCat.equals("OPREG")) { + processOPREG(node); + } + objectsToPop.push(stackedObjects); toWrapsToPop.push(stackedToWrap); @@ -444,6 +551,88 @@ else if (conditionCount < conditionGroups.size()) { } } + /* processOPREG: + * + * - input Node structure is: (OPREG "regexpr"). + * - transforms tree into the corresponding Koral:token/Koral:term, like: + * e.g. #REG(abc[']?s) -> + * { + * "@type": "koral:term", + * "match": "match:eq", // optional + * "type" : "type:regex", + * "key" : "abc[']?s", + * "layer": "orth" + * }. + * + * - see doc: http://korap.github.io/Koral/ + * + * 06.09.23/FB + */ + + private void processOPREG (Tree node) + + { + int + nChild = node.getChildCount() - 1; + Tree + nodeChild = node.getChild(0); + boolean + bDebug = false; + + if( bDebug ) + { + //System.out.printf("Debug: processOPREG: node='%s' nChilds=%d.\n", node.toStringTree(), nChild+1); + System.out.printf("Debug: processOPREG: child: >>%s<< cat=%s type=%d.\n", + nodeChild.getText(), getNodeCat(node), nodeChild.getType()); + } + + // empty case (is that possible?): + if( nChild < 0 ) + return; + + // see processOPWF_OPWF_OPLEM + // for how to insert regexpr into Koral JSON-LD + + Map + token = KoralObjectGenerator.makeToken(); + + objectStack.push(token); + stackedObjects++; + + Map + fieldMap = KoralObjectGenerator.makeTerm(); + + token.put("wrap", fieldMap); + + // make category-specific fieldMap entry: + /* + System.out.printf("Debug: processOPREG: before replaceALL: >>%s<<.\n", nodeChild.toStringTree()); + String + value = nodeChild.toStringTree().replaceAll("\"", ""); + System.out.printf("Debug: processOPREG: after replaceALL: >>%s<<.\n", value); + */ + + /* replace replaceALL() by replaceIfNotEscaped() to delete every occurence of >>"<< + * which is not escaped by >>\<<, as it is important to keep the escaped sequence for + * the argument of #REG(). + * This is not possible with replaceALL(). + */ + String + value = nodeChild.toStringTree(); // old version: replaceDoubleQuotes(nodeChild.toStringTree()); + + if( bDebug ) + System.out.printf("Debug: processOPREG: key: >>%s<<.\n", value); + + fieldMap.put("key", value); + fieldMap.put("layer", "orth"); + fieldMap.put("type", "type:regex"); + fieldMap.put("match", "match:eq"); + + // decide where to put (objPos=1, not clear why, but it works only like that - 20.09.23/FB): + putIntoSuperObject(token,1); + + } // processOPREG + private void processOPNHIT (Tree node) { Integer[] classRef = new Integer[] { classCounter + 128 + 1, @@ -553,7 +742,8 @@ private void processOPIN_OPOV (Tree node) { // Map posgroup = // makePosition(null); boolean isExclusion = isExclusion(node); - + boolean bDebug = false; + int focusClassCounter = classCounter; Map posGroup; @@ -567,7 +757,7 @@ private void processOPIN_OPOV (Tree node) { } else { posGroup = KoralObjectGenerator.makeGroup(KoralOperation.POSITION); - if (DEBUG) log.debug(posGroup.toString()); + if (bDebug) log.debug(posGroup.toString()); } Map positionOptions; @@ -622,11 +812,13 @@ private void processOPIN_OPOV (Tree node) { @SuppressWarnings("unchecked") private void processOPPROX (Tree node) { + // collect info Tree prox_opts = node.getChild(0); Tree typ = prox_opts.getChild(0); Tree dist_list = prox_opts.getChild(1); - // Step I: create group + + // Step I: create group Map group = KoralObjectGenerator.makeGroup(KoralOperation.SEQUENCE); @@ -1511,19 +1703,42 @@ private Map wrap (Map[] wrapCascade) { @SuppressWarnings("unchecked") - private void putIntoSuperObject (Map object, - int objStackPosition) { - if (objectStack.size() > objStackPosition) { + private void putIntoSuperObject (Map object, int objStackPosition) + + { + boolean bDebug = false; + + if( bDebug ) + { + System.out.printf("Debug: putIntosuperObject(<>,int): objectStack.size=%d objStackPos=%d object=%s.\n", + objectStack.size(), objStackPosition, object == null ? "null" : "not null"); + + if( objectStack != null && objectStack.size() > 0 ) + System.out.printf("Debug: putIntosuperObject: objectStack = %s.\n", objectStack.toString()); + + if( invertedOperandsLists != null ) + System.out.printf("Debug: putIntosuperObject: invertedOperandsLists: [%s].\n", invertedOperandsLists.toString()); + } + + + if (objectStack.size() > objStackPosition) + { ArrayList topObjectOperands = - (ArrayList) objectStack.get(objStackPosition) - .get("operands"); - if (!invertedOperandsLists.contains(topObjectOperands)) { + (ArrayList) objectStack.get(objStackPosition).get("operands"); + + if( bDebug ) + System.out.printf("Debug: putIntosuperObject: topObjectOperands = [%s].\n", topObjectOperands == null ? "null" : "not null"); + + objectStack.get(objStackPosition); + + if (!invertedOperandsLists.contains(topObjectOperands)) + { topObjectOperands.add(object); - } + } else { topObjectOperands.add(0, object); - } - } + } + } else { requestMap.put("query", object); } @@ -1618,7 +1833,8 @@ private Map termToFieldMap (String term) { private Tree parseCosmasQuery (String query) { - query = rewritePositionQuery(query); + + query = rewritePositionQuery(query); Tree tree = null; Antlr3DescriptiveErrorListener errorListener = new Antlr3DescriptiveErrorListener(query); @@ -1627,24 +1843,42 @@ private Tree parseCosmasQuery (String query) { c2psLexer lex = new c2psLexer(ss); org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); // v3 + parser = new c2psParser(tokens); + // Use custom error reporters lex.setErrorReporter(errorListener); ((c2psParser) parser).setErrorReporter(errorListener); + c2psParser.c2ps_query_return c2Return = ((c2psParser) parser).c2ps_query(); // statt t(). + // AST Tree anzeigen: tree = (Tree) c2Return.getTree(); - if (DEBUG) log.debug(tree.toStringTree()); - } + + if (DEBUG) + { + System.out.printf("Debug: parseCosmasQuery: tree = '%s'.\n", tree.toStringTree()); + log.debug(tree.toStringTree()); + } + } + catch (FailedPredicateException fe) + { // unused so far - 11.01.24/FB + System.out.printf("parseCosmasQuery: FailedPredicateException!\n"); + addError(StatusCodes.MALFORMED_QUERY, + "failed predicate on prox something."); + } catch (RecognitionException e) { + // unused so far - 11.01.24/FB + System.out.printf("Debug: out: parseCosmasQuery: RecognitionException!\n"); log.error( "Could not parse query. Please make sure it is well-formed."); addError(StatusCodes.MALFORMED_QUERY, "Could not parse query. Please make sure it is well-formed."); } - String treestring = tree.toStringTree(); + String treestring = tree.toStringTree(); + boolean erroneous = false; if (parser.failed() || parser.getNumberOfSyntaxErrors() > 0) { erroneous = true; @@ -1653,10 +1887,28 @@ private Tree parseCosmasQuery (String query) { if (erroneous || treestring.contains(" errors; private List warnings; private List messages; - + + private boolean DEBUG = false; + public QuerySerializer () { this.errors = new ArrayList<>(); this.warnings = new ArrayList<>(); @@ -102,20 +105,31 @@ public static void main (String[] args) { int i = 0; String[] queries = null; String ql = "poliqarpplus"; + boolean + bDebug = false; + if (args.length < 2) { - System.err - .println("Usage: QuerySerializer \"query\" queryLanguage"); + System.err.println("\nUsage: QuerySerializer \"query\" queryLanguage [-show]"); System.exit(1); } else { queries = new String[] { args[0] }; ql = args[1]; } + if( args.length >= 3 ) + { + if( args[2].compareToIgnoreCase("-show") == 0 ) + bDebug = true; + } + for (String q : queries) { i++; try { - jg.run(q, ql); - System.out.println(); + if( bDebug ) + System.out.printf("QuerySerialize: query = >>%s<< lang = %s.\n", q, ql); + + jg.run(q, ql, bDebug); + System.out.println(); } catch (NullPointerException npe) { npe.printStackTrace(); @@ -139,7 +153,10 @@ public static void main (String[] args) { * 'poliqarpplus', 'cqp', 'cosmas2', 'annis' or 'cql'. * @throws IOException */ - public void run (String query, String queryLanguage) throws IOException { + public void run (String query, String queryLanguage, boolean bDebug) throws IOException { + + ast.verbose = bDebug; // debugging: 01.09.23/FB + if (queryLanguage.equalsIgnoreCase("poliqarp")) { ast = new PoliqarpPlusQueryProcessor(query); } @@ -165,7 +182,9 @@ else if (queryLanguage.equalsIgnoreCase("annis")) { throw new IllegalArgumentException( queryLanguage + " is not a supported query language!"); } - System.out.println(this.toJSON()); + + if( bDebug ) + System.out.println(this.toJSON()); } public QuerySerializer setQuery (String query, String ql, String version) { @@ -221,7 +240,7 @@ public void setVerbose (boolean verbose) { public final String toJSON () { String ser; try { - ser = mapper.writeValueAsString(raw()); + ser = mapper.writeValueAsString(raw()); // System.out.println(ser); } catch (JsonProcessingException e) { diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java index 087ae32d..2618d577 100644 --- a/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java +++ b/src/main/java/de/ids_mannheim/korap/query/serialize/TreeTemplate.java @@ -167,7 +167,7 @@ private ParserRuleContext parseQuery (String q) { // Some things went wrong ... catch (Exception e) { - System.err.println(e.getMessage()); + System.err.println("parseQuery: " + e.getMessage()); } // Return the generated tree diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java index 6e574fd4..e5f5d71d 100644 --- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java +++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/Antlr3DescriptiveErrorListener.java @@ -31,7 +31,8 @@ public Antlr3DescriptiveErrorListener (String query) { @Override public void reportError (String error) { - String charPositionStr = null; + + String charPositionStr = null; String offendingSymbol = null; String expected = null; Pattern p = Pattern @@ -52,7 +53,8 @@ public void reportError (String error) { public ArrayList generateFullErrorMsg () { - ArrayList errorSpecs = new ArrayList(); + + ArrayList errorSpecs = new ArrayList(); String msg = getDetailedErrorMessage(); errorSpecs.add(StatusCodes.MALFORMED_QUERY); errorSpecs.add(msg); @@ -62,7 +64,14 @@ public ArrayList generateFullErrorMsg () { private String getDetailedErrorMessage () { - // default message, in case no detailed info is available; + + /* + System.err.printf("Debug: getDetailedErrorMessage: pos=%d expected='%s' offend='%s' query='%s'.\n", + charPosition, expected != null ? expected : "null", offendingSymbol != null ? offendingSymbol : "null", + query != null ? query : "null"); + */ + + // default message, in case no detailed info is available; String msg = "Malformed query. Could not parse."; char offendingSymbol = query.charAt(0); if (query.length() > charPosition) diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java index 656228d6..b8c07650 100644 --- a/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java +++ b/src/main/java/de/ids_mannheim/korap/query/serialize/util/StatusCodes.java @@ -14,4 +14,13 @@ public class StatusCodes { public final static int QUERY_TOO_COMPLEX = 311; public final static int UNKNOWN_QUERY_ERROR = 399; public final static int SERIALIZATION_FAILED = 300; + + // error codes for PROX syntax errors: + final public static int ERR_PROX_UNKNOWN = 320; + public final static int ERR_PROX_MEAS_NULL = 321; + public final static int ERR_PROX_MEAS_TOOGREAT = 322; + public final static int ERR_PROX_VAL_NULL = 323; + public final static int ERR_PROX_VAL_TOOGREAT = 324; + public final static int ERR_PROX_DIR_TOOGREAT = 325; + public final static int ERR_PROX_WRONG_CHARS = 326; } \ No newline at end of file diff --git a/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java new file mode 100644 index 00000000..92ba9ef6 --- /dev/null +++ b/src/main/java/de/ids_mannheim/korap/util/C2RecognitionException.java @@ -0,0 +1,27 @@ +package de.ids_mannheim.korap.util; + +import org.antlr.runtime.*; + +/* general String manipulation functions moved + * from de.ids_mannheim.de.korap.query.parse.cosmas.c2ps_opREG.java and Cosmas2QueryProcessor.java. + * 24.10.23/FB + */ + +public final class C2RecognitionException extends RecognitionException { + + private static final boolean DEBUG = false; + public String mismatchedToken; + + public C2RecognitionException(String mismatchedToken) + + { + this.mismatchedToken = mismatchedToken; + + } // constructor C2RecognitionException + + public String getMismatchedToken() + { + return this.mismatchedToken; + } + +} diff --git a/src/main/java/de/ids_mannheim/korap/util/StringUtils.java b/src/main/java/de/ids_mannheim/korap/util/StringUtils.java new file mode 100644 index 00000000..29410d18 --- /dev/null +++ b/src/main/java/de/ids_mannheim/korap/util/StringUtils.java @@ -0,0 +1,157 @@ +package de.ids_mannheim.korap.util; + +/* general String manipulation functions moved + * from de.ids_mannheim.de.korap.query.parse.cosmas.c2ps_opREG.java and Cosmas2QueryProcessor.java. + * 24.10.23/FB + */ + +public final class StringUtils { + + private static final boolean DEBUG = false; + + /** + * replaceIfNotEscaped: + * - kind of adhoc alternative to String.replaceAll(). + * - replaces every occurence of >>"<< in buf IF it isn't escaped by >>\<<. + * Notes: + * - first intention: replace String.replaceALL() in processOPREG() because + * replaceALL() cannot be used in that special case. + * Returns the replaced string. + * 25.09.23/FB + */ + + public static String replaceIfNotEscaped(String buf) + + { + StringBuffer + sb = new StringBuffer(buf); + + for(int i=0; i>"<< for #REG(expr) + * instead of String.replaceAll(). + * - replaces every occurence of >>"<< in buf that is not escaped by >>\<<. + * - If the >>"<< is escaped, the escape char is removed: >>\"<< -> >>"<<. + * Notes: + * - the converted string is intented to be greped. + * E.g.: + * - >>"\"Abend\"-Ticket"<< -> >>"Abend"-Ticket<<. + * Returns the replaced string. + * 26.09.23/FB + */ + + public static String replaceDoubleQuotes(String buf) + + { + StringBuffer + sb = new StringBuffer(buf); + + if( DEBUG ) System.out.printf("replaceDoubleQuotes: input: >>%s<<.\n", buf); + + for(int i=0; i>\"<< -> >>"<<. + sb.deleteCharAt(i); + else if( sb.codePointAt(i+1) == '\\' ) // >>\\<< unchanged. + i++; // keep >>\\<< unchanged. + } + } + else if( sb.codePointAt(i) == '"' ) + { + sb.deleteCharAt(i); // unescaped >>"<< is removed. + i--; + } + } + + if( DEBUG ) System.out.printf("replaceDoubleQuotes: output: >>%s<<.\n", sb.toString()); + + return sb.toString(); + + } // replaceDoubleQuotes + + /* encode2DoubleQuoted: + * transforms an unquoted string into an double quoted string + * and escapes >>"<< and >>/<<. + * E.g. >>.."..<< -> >>"..\".."<<. + * E.g. >>..\..<< -> >>"..\\.."<<. + * E.g. >>..\"..<< -> >>"..\\\".."<<, etc. + * + * escaping >>"<< and >>\<<, because they will be + * enclosed in >>"..."<<. + * >>"<< -> >>\"<< + * >>\<< -> >>\\<< + * + * 28.09.23/FB + * + * E.g. from previous, olddated version: + * \\" -> \\\" + * \\\" -> \\\" + */ + + public static void encode2DoubleQuoted(StringBuffer sb) + + { + if( DEBUG ) System.out.printf("encode2DoubleQuoted: input = >>%s<<.\n", sb.toString()); + + for(int i=0; i>%s<<.\n", sb.toString()); + } // encode2DoubleQuoted + + /* + * removeBlanksAtBothSides + * 28.09.23/FB + */ + + public static void removeBlanksAtBothSides(StringBuffer sb) + + { + int len; + + // remove leading blanks: >> abc << -> >>abc <<: + while( sb.length() > 0 && sb.charAt(0) == ' ') + sb.deleteCharAt(0); + + // remove trailing blanks: >>abc << -> >>abc<<: + while( (len=sb.length()) > 0 && sb.charAt(len-1) == ' ' ) + sb.deleteCharAt(len-1); + + } // removeBlanksAtBothSides + +} diff --git a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java index 0722c9b8..bb4319c7 100644 --- a/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java +++ b/src/test/java/de/ids_mannheim/korap/test/cosmas2/Cosmas2QueryProcessorTest.java @@ -5,6 +5,7 @@ import org.junit.Test; +import com.fasterxml.jackson.core.JsonPointer; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -14,12 +15,15 @@ import static org.junit.Assert.*; +import static de.ids_mannheim.korap.query.parse.cosmas.c2ps_opREG.*; +import de.ids_mannheim.korap.util.StringUtils; /** * Tests for JSON-LD serialization of Cosmas II queries. * * @author Joachim Bingel (bingel@ids-mannheim.de) * @author Nils Diewald - * @version 1.1 + * @author Franck Bodmer + * @version 1.2 - 21.09.23 */ public class Cosmas2QueryProcessorTest { @@ -625,6 +629,32 @@ public void testOPPROX () throws JsonProcessingException, IOException { assertEquals("Mond", res.at("/query/operands/1/operands/0/wrap/key") .asText()); assertFalse(res.at("/query/inOrder").asBoolean()); + + // 15.01.24/FB: checking syntax error detectiong: + + query = "Sonne /+w Mond"; // distance value missing. + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.get("errors") != null); + + query = "Sonne /+2sw Mond"; // 2 distance types instead of 1. + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.get("errors") != null); + + query = "Sonne /+2s- Mond"; // 2 distance directions instead of 1. + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.get("errors") != null); + + query = "Sonne /+2s7 Mond"; // 2 distance values instead of 1. + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.get("errors") != null); } @@ -1194,7 +1224,9 @@ public void testOPNHIT () throws JsonProcessingException, IOException { } - + /* some tests added - 08.11.23/FB + */ + @Test public void testOPBED () throws JsonProcessingException, IOException { query = "#BED(der , sa)"; @@ -1224,6 +1256,95 @@ public void testOPBED () throws JsonProcessingException, IOException { assertEquals("s", res.at("/query/operands/0/operands/0/wrap/key") .asText()); + // 08.11.23/FB + // treats now "der," as "der" + ",": + query = "#BED(der, sa)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + assertEquals("koral:reference", res.at("/query/@type").asText()); + assertEquals("operation:focus", res.at("/query/operation").asText()); + assertEquals(129, res.at("/query/classRef/0").asInt()); + assertEquals("koral:group", res.at("/query/operands/0/@type").asText()); + assertEquals("operation:position", res + .at("/query/operands/0/operation").asText()); + assertEquals("frames:startsWith", res.at("/query/operands/0/frames/0") + .asText()); + assertEquals("koral:group", res.at("/query/operands/0/@type").asText()); + assertEquals("operation:class", + res.at("/query/operands/0/operands/1/operation").asText()); + assertEquals(129, res.at("/query/operands/0/operands/1/classOut") + .asInt()); + assertEquals("koral:token", + res.at("/query/operands/0/operands/1/operands/0/@type") + .asText()); + assertEquals("der", + res.at("/query/operands/0/operands/1/operands/0/wrap/key") + .asText()); + assertEquals("koral:span", res.at("/query/operands/0/operands/0/@type") + .asText()); + assertEquals("s", res.at("/query/operands/0/operands/0/wrap/key") + .asText()); + + + // 08.11.23/FB + // treats now "der,sa" as "der" + "," + "sa": + query = "#BED(der,sa)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + assertEquals("koral:reference", res.at("/query/@type").asText()); + assertEquals("operation:focus", res.at("/query/operation").asText()); + assertEquals(129, res.at("/query/classRef/0").asInt()); + assertEquals("koral:group", res.at("/query/operands/0/@type").asText()); + assertEquals("operation:position", res + .at("/query/operands/0/operation").asText()); + assertEquals("frames:startsWith", res.at("/query/operands/0/frames/0") + .asText()); + assertEquals("koral:group", res.at("/query/operands/0/@type").asText()); + assertEquals("operation:class", + res.at("/query/operands/0/operands/1/operation").asText()); + assertEquals(129, res.at("/query/operands/0/operands/1/classOut") + .asInt()); + assertEquals("koral:token", + res.at("/query/operands/0/operands/1/operands/0/@type") + .asText()); + assertEquals("der", + res.at("/query/operands/0/operands/1/operands/0/wrap/key") + .asText()); + assertEquals("koral:span", res.at("/query/operands/0/operands/0/@type") + .asText()); + assertEquals("s", res.at("/query/operands/0/operands/0/wrap/key") + .asText()); + + // 08.11.23/FB + // treats now "der,s0," as "der,s0" unchanged while written inside "...": + query = "#BED(\"der,so\", sa)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertEquals("koral:reference", res.at("/query/@type").asText()); + assertEquals("operation:focus", res.at("/query/operation").asText()); + assertEquals(129, res.at("/query/classRef/0").asInt()); + assertEquals("koral:group", res.at("/query/operands/0/@type").asText()); + assertEquals("operation:position", res + .at("/query/operands/0/operation").asText()); + assertEquals("frames:startsWith", res.at("/query/operands/0/frames/0") + .asText()); + assertEquals("koral:group", res.at("/query/operands/0/@type").asText()); + assertEquals("operation:class", + res.at("/query/operands/0/operands/1/operation").asText()); + assertEquals(129, res.at("/query/operands/0/operands/1/classOut") + .asInt()); + assertEquals("koral:token", + res.at("/query/operands/0/operands/1/operands/0/@type") + .asText()); + assertEquals("der,so", + res.at("/query/operands/0/operands/1/operands/0/wrap/key") + .asText()); + assertEquals("koral:span", res.at("/query/operands/0/operands/0/@type") + .asText()); + assertEquals("s", res.at("/query/operands/0/operands/0/wrap/key") + .asText()); + query = "#COND(der , sa)"; qs.setQuery(query, "cosmas2"); res = mapper.readTree(qs.toJSON()); @@ -1702,4 +1823,227 @@ public void testMultipleParenthesis () throws JsonProcessingException, IOExcepti assertEquals("s", res.at("/query/distances/0/key").asText()); assertEquals("operation:sequence", res.at("/query/operation").asText()); } + + /* Testing #REG(expr), #REG('expr') and #REG("expr"). + * 21.09.23/FB + */ + + @Test + public void testREG () throws JsonProcessingException, IOException { + + boolean debug = false; + + query = "#REG(^aber$)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("koral:token", res.at("/query/@type").asText()); + assertEquals("koral:term", res.at("/query/wrap/@type").asText()); + assertEquals("^aber$", res.at("/query/wrap/key").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("match:eq", res.at("/query/wrap/match").asText()); + + query = "#REG('été\\'')"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("été'" , res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG('été\' )"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("été" , res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG('été\\')"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("été\\", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG(l'été)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("l'été", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG(l\\'été)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("l'été", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG(\"l'été\")"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("l'été", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG(\"l\\'été\")"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("l'été", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG('l\\'été.*')"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("l'été.*", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG('\\\"été\\\"$')"; // means user input is #REG('\"été\"'). + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("\"été\"$", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + // checks the >>"<<: + query = "#REG(\\\"Abend\\\"-Ticket)"; // means user input = #REG(\"Abend\"-Ticket). + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG('\\\"Abend\\\"-Ticket')"; // means user input = #REG(\"Abend\"-Ticket). + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG('\"Abend\"-Ticket')"; // means user input = #REG('"Abend"-Ticket'). + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText()); // key must be escaped, because converted to in "...". + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG(\"\\\"Abend\\\"-Ticket\")"; // means user input = #REG("\"Abend\"-Ticket") -> key: >>"Abend"-Ticket<<. + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + if( debug ) System.out.printf("testREG: query: >>%s<< -> key: >>%s<<.\n", query, res.at("/query/wrap/key").asText()); + assertEquals("\"Abend\"-Ticket",res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + // + + query = "#REG('^(a|b)?+*$')"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertEquals("^(a|b)?+*$", res.at("/query/wrap/key").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + + query = "#REG(\"[A-Z()]\")"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertEquals("[A-Z()]", res.at("/query/wrap/key").asText()); + assertEquals("orth", res.at("/query/wrap/layer").asText()); + assertEquals("type:regex", res.at("/query/wrap/type").asText()); + + query = "#REG(^klein.*) /s0 #REG(A.*ung)"; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + //System.out.printf("Debug: res: pretty: %s.\n", res.toPrettyString()); + + assertEquals("^klein.*", res.at("/query/operands/0/operands/0/wrap/key").asText()); + assertEquals("orth", res.at("/query/operands/0/operands/0/wrap/layer").asText()); + assertEquals("type:regex", res.at("/query/operands/0/operands/0/wrap/type").asText()); + + assertEquals("A.*ung", res.at("/query/operands/1/operands/0/wrap/key").asText()); + assertEquals("orth", res.at("/query/operands/1/operands/0/wrap/layer").asText()); + assertEquals("type:regex", res.at("/query/operands/1/operands/0/wrap/type").asText()); + + query = "#REG( ) "; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.toString().contains("Failing to parse")); + + query = "#REG('' ) "; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.toString().contains("Failing to parse")); + + query = "#REG(\"\") "; + qs.setQuery(query, "cosmas2"); + res = mapper.readTree(qs.toJSON()); + + assertTrue(res.toString().contains("Failing to parse")); + + } + + @Test + public void testREGencode2DoubleQuoted () { + StringBuffer sb = new StringBuffer("..\".."); + StringUtils.encode2DoubleQuoted(sb); + assertEquals("\"..\\\"..\"",sb.toString()); + + sb = new StringBuffer("..\\.."); + StringUtils.encode2DoubleQuoted(sb); + assertEquals("\"..\\\\..\"", sb.toString()); + + sb = new StringBuffer("..\".."); + StringUtils.encode2DoubleQuoted(sb); + assertEquals("\"..\\\"..\"", sb.toString()); + } + + @Test + public void testREGremoveBlanksAtBothSides () { + StringBuffer sb = new StringBuffer(" aabc cjs ss "); + StringUtils.removeBlanksAtBothSides(sb); + assertEquals("aabc cjs ss",sb.toString()); + + sb = new StringBuffer("abc "); + StringUtils.removeBlanksAtBothSides(sb); + assertEquals("abc",sb.toString()); + + sb = new StringBuffer(" abc"); + StringUtils.removeBlanksAtBothSides(sb); + assertEquals("abc",sb.toString()); + } + + + }