diff --git a/src/main/java/liqp/LValue.java b/src/main/java/liqp/LValue.java index 8e78eb8f..aaa49350 100644 --- a/src/main/java/liqp/LValue.java +++ b/src/main/java/liqp/LValue.java @@ -494,7 +494,7 @@ public Map asMap(Object value) { } public static boolean isBlank(final String string) { - if (string == null || string.length() == 0) + if (string == null || string.isEmpty()) return true; int l = string.length(); diff --git a/src/main/java/liqp/filters/date/fuzzy/LookupResult.java b/src/main/java/liqp/filters/date/fuzzy/LookupResult.java index 59ae1b59..58114514 100644 --- a/src/main/java/liqp/filters/date/fuzzy/LookupResult.java +++ b/src/main/java/liqp/filters/date/fuzzy/LookupResult.java @@ -2,13 +2,13 @@ import java.util.List; -class LookupResult { +public class LookupResult { private final String name; final List parts; - final boolean found; + final public boolean found; - LookupResult(String name, List parts, boolean found) { + public LookupResult(String name, List parts, boolean found) { this.name = name; this.parts = parts; this.found = found; diff --git a/src/main/java/liqp/filters/date/fuzzy/Part.java b/src/main/java/liqp/filters/date/fuzzy/Part.java index 64e3c83a..1d8d80f2 100644 --- a/src/main/java/liqp/filters/date/fuzzy/Part.java +++ b/src/main/java/liqp/filters/date/fuzzy/Part.java @@ -21,7 +21,7 @@ class NewPart implements Part { final int end; protected final String source; - NewPart(int start, int end, String source) { + public NewPart(int start, int end, String source) { this.start = start; this.end = end; this.source = source; @@ -106,7 +106,7 @@ class RecognizedPart implements Part { protected final List patterns; public final String source; - RecognizedPart(int start, int end, List patterns, String source) { + public RecognizedPart(int start, int end, List patterns, String source) { this.start = start; this.end = end; this.patterns = patterns; @@ -148,7 +148,7 @@ public String toString() { } class RecognizedMonthNamePart extends RecognizedPart { - RecognizedMonthNamePart(int start, int end, List patterns, String source) { + public RecognizedMonthNamePart(int start, int end, List patterns, String source) { super(start, end, patterns, source); } diff --git a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java index 38417564..b2a4859e 100644 --- a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java @@ -4,17 +4,23 @@ import java.util.List; import java.util.function.Supplier; import java.util.stream.Collectors; +import liqp.filters.date.fuzzy.Part.NewPart; +import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; +import liqp.filters.date.fuzzy.Part.RecognizedPart; import liqp.filters.date.fuzzy.extractors.PartExtractorResult; -public interface PartExtractor { +public abstract class PartExtractor { + + public PartExtractorResult extract(String source, List parts, int i) { + throw new UnsupportedOperationException("Not supported yet."); + } - PartExtractorResult extract(String source, List parts, int i); - default List newList(String... el) { + protected List newList(String... el) { return Arrays.asList(el); } - default List appendToExisting(List start, Supplier> supplier) { + protected List appendToExisting(List start, Supplier> supplier) { if (start.isEmpty()) { return supplier.get(); } @@ -22,4 +28,51 @@ default List appendToExisting(List start, Supplier> .flatMap(prefix -> supplier.get().stream().map(suffix -> prefix + suffix)) .collect(Collectors.toList()); } + + public LookupResult extract(List parts) { + for (int i = 0; i < parts.size(); i++) { + Part part = parts.get(i); + + if (part.state() == Part.PartState.NEW) { + String source = part.source(); + PartExtractorResult per = extract(source, parts, i); + if (per.found) { + return getLookupResult(parts, i, per); + } + } + } + return new LookupResult("", parts, false); + } + + protected LookupResult getLookupResult(List parts, int i, PartExtractorResult per) { + + Part part = parts.get(i); + String source = part.source(); + + parts.remove(i); + + if (per.end != source.length()) { + NewPart after = new NewPart(part.start() + per.end, part.end(), source.substring(per.end)); + parts.add(i, after); + } + + RecognizedPart recognized; + if (per.isMonthName) { + recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring( + per.start, per.end)); + } else { + recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring( + per.start, per.end)); + } + parts.add(i, recognized); + + if (per.start != 0) { + NewPart before = new NewPart( + part.start(), part.start() + per.start, source.substring(0, per.start)); + parts.add(i, before); + } + + return new LookupResult(per.extractorName, parts, true); + } + } diff --git a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java index 7b1599bc..ba8e38fe 100644 --- a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java +++ b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java @@ -102,38 +102,7 @@ private boolean notSet(Boolean val) { return val == null; } private LookupResult lookup(List parts, PartExtractor partExtractor) { - for (int i = 0; i < parts.size(); i++) { - Part part = parts.get(i); - - if (part.state() == Part.PartState.NEW) { - String source = part.source(); - PartExtractorResult per = partExtractor.extract(source, parts, i); - if (per.found) { - parts.remove(i); - - if (per.end != source.length()) { - NewPart after = new NewPart(part.start() + per.end, part.end(), source.substring(per.end)); - parts.add(i, after); - } - - RecognizedPart recognized; - if (per.isMonthName) { - recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end)); - } else { - recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end)); - } - parts.add(i, recognized); - - if (per.start != 0) { - NewPart before = new NewPart(part.start(), part.start() + per.start, source.substring(0, per.start)); - parts.add(i, before); - } - - return new LookupResult(per.extractorName, parts, true); - } - } - } - return new LookupResult("", parts, false); + return partExtractor.extract(parts); } private List markAsUnrecognized(List parts) { diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java index 436e4657..14fa5eab 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java @@ -11,7 +11,7 @@ import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; -public class AllYMDPatternExtractor implements PartExtractor { +public class AllYMDPatternExtractor extends PartExtractor { private final List extractors = new ArrayList<>(); diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java index 1c845c8f..f0a5f71c 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java @@ -1,59 +1,154 @@ package liqp.filters.date.fuzzy.extractors; +import static liqp.LValue.isBlank; + import java.util.List; import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.LookupResult; import liqp.filters.date.fuzzy.Part; +import liqp.filters.date.fuzzy.Part.NewPart; import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; import liqp.filters.date.fuzzy.Part.RecognizedPart; import liqp.filters.date.fuzzy.PartExtractor; -public class MonthDateExtractor implements PartExtractor { +public class MonthDateExtractor extends PartExtractor { + enum Mode { + SPACES_ONLY, + /** + * not ",;" so far + */ + NON_SEPARATORS + } + enum Direction { + LEFT, + RIGHT + } @Override - public PartExtractorResult extract(String source, List parts, int i) { - // closest right or closest left should be a month - if (rightIsMonth(parts, i)) { - return leftDateExtractor.extract(source, parts, i); + public LookupResult extract(List parts) { + // 1. find named month + // if not - return empty result + // then look both left and right for a day + // comparing them (left vs right) by priority + // so the situation like ' 11 december, 11 ' vs '11, december 11' will be resolved + int monthIndex = lookForNamedMonth(parts); + if (monthIndex == -1) { + return new LookupResult("MonthDateExtractor", parts, false); + } + + LookupResult rightResult = rightDate(monthIndex, parts, Mode.SPACES_ONLY); + LookupResult leftResult = leftDate(monthIndex, parts, Mode.SPACES_ONLY); + + if (rightResult.found && leftResult.found) { + throw new IllegalArgumentException("Month have date candidates on both sides"); + } + if (leftResult.found) { + return leftResult; + } + if (rightResult.found) { + return rightResult; + } + + + rightResult = rightDate(monthIndex, parts, Mode.NON_SEPARATORS); + leftResult = leftDate(monthIndex, parts, Mode.NON_SEPARATORS); + if (rightResult.found && leftResult.found) { + throw new IllegalArgumentException("Month have date candidates on both sides"); + } + if (rightResult.found) { + return rightResult; } - if (leftIsMonth(parts, i)) { - return rightDateExtractor.extract(source, parts, i); + if (leftResult.found) { + return leftResult; } - return new PartExtractorResult("MonthDateExtractor"); + + + return new LookupResult("MonthDateExtractor", parts, false); } - private boolean leftIsMonth(List parts, int i) { - int left = i - 1; - while (left >= 0) { - Part part = parts.get(left); - if (part instanceof RecognizedMonthNamePart) { - return true; + private LookupResult leftDate(int monthIndex, List parts, Mode mode) { + RegexPartExtractor extractor = getExtractorByModeAndDirection(mode, Direction.LEFT); + + int index = monthIndex - 1; + while (index >= 0) { + LookupResult result = locateDate(parts, extractor, index); + if (result != null) { + return result; } - if (part instanceof RecognizedPart) { - return false; + index--; + } + + return new LookupResult("MonthDateExtractor", parts, false); + } + + private LookupResult rightDate(int monthIndex, List parts, Mode mode) { + RegexPartExtractor extractor = getExtractorByModeAndDirection(mode, Direction.RIGHT); + + int index = monthIndex + 1; + while (index < parts.size()) { + LookupResult result = locateDate(parts, extractor, index); + if (result != null) { + return result; } - left--; + index++; } - return false; + + return new LookupResult("MonthDateExtractor", parts, false); } - private boolean rightIsMonth(List parts, int i) { - int right = i + 1; - while (right < parts.size()) { - Part part = parts.get(right); - if (part instanceof RecognizedMonthNamePart) { - return true; + private LookupResult locateDate(List parts, RegexPartExtractor extractor, int index) { + Part part = parts.get(index); + if (part instanceof RecognizedPart) { + return new LookupResult("MonthDateExtractor", parts, false); + } + if (part instanceof NewPart) { + NewPart newPart = (NewPart) part; + String source = newPart.source(); + if (!isBlank(source) && extractor != null) { + PartExtractorResult leftResult = extractor.extract(source, parts, index); + if (leftResult.found) { + return getLookupResult(parts, index, leftResult); + } } - if (part instanceof RecognizedPart) { - return false; + } + return null; + } + + private RegexPartExtractor getExtractorByModeAndDirection(Mode mode, Direction direction) { + if (direction == Direction.LEFT) { + if (mode == Mode.SPACES_ONLY) { + return leftDateSpacesOnlyExtractor; + } else if (mode == Mode.NON_SEPARATORS) { + return leftDateExtractor; + } + } else { + if (mode == Mode.SPACES_ONLY) { + return rightDateSpacesOnlyExtractor; + } else if (mode == Mode.NON_SEPARATORS) { + return rightDateExtractor; } - right++; } - return false; + return null; } + private int lookForNamedMonth(List parts) { + for (int i = 0; i < parts.size(); i++) { + Part part = parts.get(i); + if (part instanceof RecognizedMonthNamePart) { + return i; + } + } + return -1; + } - private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\D+?$"); - private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", "^\\D+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); + private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", + "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])[^,\\d;]+?$"); + private static final RegexPartExtractor leftDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", + "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\s+?$"); + private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", + "^[^,\\d;]+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); + private static final RegexPartExtractor rightDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", + "^\\s+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); private static class MonthDatePartExtractor extends RegexPartExtractor { public MonthDatePartExtractor(String name, String regex) { diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java index 3b0783c9..07f65daf 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java @@ -7,7 +7,7 @@ import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; -public class MonthExtractor implements PartExtractor { +public class MonthExtractor extends PartExtractor { private final List monthExtractors; public MonthExtractor(Locale locale) { diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java index 91a438f5..73b00192 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java @@ -4,7 +4,7 @@ import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; -public class PartExtractorDelegate implements PartExtractor { +public class PartExtractorDelegate extends PartExtractor { protected PartExtractor delegate; diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java index a18a90ad..a7083137 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java @@ -6,7 +6,7 @@ import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; -class RegexPartExtractor implements PartExtractor { +class RegexPartExtractor extends PartExtractor { protected final String name; protected final Pattern pattern; diff --git a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java new file mode 100644 index 00000000..0c307a58 --- /dev/null +++ b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java @@ -0,0 +1,50 @@ +package liqp.filters.date.fuzzy; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.Collection; +import java.util.Locale; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class FuzzyDateParserParametrizedErrorsTest { + + private final String input; + private final Class exceptionClass; + private final Locale locale; + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][]{ + {null, " 11 december 11", RuntimeException.class}, + {null, " 11 -december- 11", RuntimeException.class}, + }); + } + + public FuzzyDateParserParametrizedErrorsTest(Locale locale, String input, Class exceptionClass) { + this.locale = locale == null ? Locale.ENGLISH : locale; + this.input = FuzzyDateParser.removeSequentialSuffixes(input); + this.exceptionClass = exceptionClass; + } + + @Test + public void shouldParse() { + try { + final FuzzyDateParser parser = new FuzzyDateParser(); + parser.guessPattern(input, locale, null); + fail(String.format("input is: [%s] and should be wrong", input)); + } catch (Exception e) { + if (!exceptionClass.isInstance(e)) { + fail(String.format("for input %s exception class should be %s, but it was %s instead", input, exceptionClass, e.getClass())); + } + } + } + + +} diff --git a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java index b7a59c11..990e04b3 100644 --- a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java +++ b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java @@ -111,9 +111,8 @@ public static Collection data() { {null, "01/01/23 12:34 ", "MM/dd/yy HH:mm "}, {null, "11 31st of december 1996 ", "'11' dd 'of' MMMM yyyy "}, {null, "december.31st", "MMMM.dd"}, - {null, " 11 december 11", " dd MMMM '11'"}, // incorrect - {null, " 11 december, 11 ", " dd MMMM, '11' "}, // incorrect, comma should have less priority - {null, " 11, december 11 ", " dd, MMMM '11' "}, // incorrect, comma should have less priority + {null, " 11 december, 11 ", " dd MMMM, '11' "}, + {null, " 11, december 11 ", " '11', MMMM dd "}, }); }