From 0931d5faa331a44e8b1a3fd71130f29f3992ca16 Mon Sep 17 00:00:00 2001
From: msangel
Date: Sun, 5 Jan 2025 08:02:00 +0200
Subject: [PATCH] [WIP]
---
ruby/cases_date.rb | 11 +-
.../liqp/filters/date/BasicDateParser.java | 43 ++++---
.../fuzzy/DatePatternRecognizingContext.java | 1 +
.../filters/date/fuzzy/FuzzyDateParser.java | 7 +-
.../java/liqp/filters/date/fuzzy/Part.java | 14 +++
.../filters/date/fuzzy/PartExtractor.java | 56 +++++++--
.../filters/date/fuzzy/PartRecognizer.java | 44 +++++---
.../extractors/AllYMDPatternExtractor.java | 41 ++++---
.../extractors/AnyYMDPatternExtractor.java | 49 ++++++--
.../date/fuzzy/extractors/EnumExtractor.java | 1 +
.../extractors/EraAfterYearExtractor.java | 57 ++++++++++
.../date/fuzzy/extractors/Extractors.java | 20 +++-
.../fuzzy/extractors/MonthDateExtractor.java | 74 ++++--------
...Extractor.java => MonthNameExtractor.java} | 7 +-
.../extractors/PartExtractorDelegate.java | 6 +-
.../fuzzy/extractors/PartExtractorResult.java | 1 +
.../fuzzy/extractors/RegexPartExtractor.java | 3 +-
.../date/fuzzy/extractors/YearWithEra.java | 6 +
src/test/java/liqp/filters/DateTest.java | 52 ++++-----
...FuzzyDateParserParametrizedErrorsTest.java | 3 +-
.../filters/date/fuzzy/StandardsTest.java | 106 +++++++++++++++---
21 files changed, 432 insertions(+), 170 deletions(-)
create mode 100644 src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java
rename src/main/java/liqp/filters/date/fuzzy/extractors/{MonthExtractor.java => MonthNameExtractor.java} (86%)
diff --git a/ruby/cases_date.rb b/ruby/cases_date.rb
index 8d5dc132..d9838008 100755
--- a/ruby/cases_date.rb
+++ b/ruby/cases_date.rb
@@ -32,9 +32,18 @@
# assertEqual("", render({"null" => "bad"}, "{{null}}"))
# assertEqual("", render({"empty" => "bad"}, "{{empty}}"))
# assertEqual("", render({"blank" => "bad"}, "{{blank}}"))
-assertEqual("2007-11-01...", render({"a" => t }, "{{ a | truncate: 13 }}"))
+# assertEqual("2007-11-01...", render({"a" => t }, "{{ a | truncate: 13 }}"))
if isJekyll
+ pp render({"a" => '2004-12-31'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2004-12-31 00:00:00 +0200"
+ pp render({"a" => '31 December'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2025-12-31 00:00:00 +0200"
+ pp render({"a" => '12:00'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2025-01-04 12:00:00 +0200"
+ pp render({"a" => 'Friday'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # Friday
+ pp render({"a" => 'Friday 12/24'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2025-12-24 00:00:00 +0200"
+ pp render({"a" => '2004-12-31 11:23:58 Z'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2004-12-31 11:23:58 +0000"
+ pp render({"a" => 'September 1969'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "1969-09-01 00:00:00 +0300"
+ pp render({"a" => '06 Nov 04'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2004-11-06 00:00:00 +0200"
+ pp render({"a" => '1994-11-06T08'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "1994-11-06 00:00:00 +0200"
# target is string representation, source is iterated as collection(and so = match in "year" part)
assertEqual("target is string representation: 2007-11-01 15:25:00 +0900", render({"a" => [{ "time" => t }], "b" => "2007"}, "target is string representation: {{ a | where: 'time', b | map: 'time'}}"))
diff --git a/src/main/java/liqp/filters/date/BasicDateParser.java b/src/main/java/liqp/filters/date/BasicDateParser.java
index 920f35f1..9d7c7085 100644
--- a/src/main/java/liqp/filters/date/BasicDateParser.java
+++ b/src/main/java/liqp/filters/date/BasicDateParser.java
@@ -3,6 +3,7 @@
import java.time.*;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
+import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalAdjusters;
import java.time.temporal.TemporalField;
@@ -97,16 +98,6 @@ public static ZonedDateTime getFullDateIfPossible(TemporalAccessor temporal, Zon
if (temporal instanceof Instant) {
return ZonedDateTime.ofInstant((Instant) temporal, defaultZone);
}
- TemporalField[] copyThese = new TemporalField[]{
- YEAR,
- MONTH_OF_YEAR,
- DAY_OF_MONTH,
- HOUR_OF_DAY,
- MINUTE_OF_HOUR,
- SECOND_OF_MINUTE,
- NANO_OF_SECOND
- };
-
ZoneId zoneId = temporal.query(TemporalQueries.zone());
if (zoneId == null) {
@@ -115,6 +106,16 @@ public static ZonedDateTime getFullDateIfPossible(TemporalAccessor temporal, Zon
final LocalDateTime now = LocalDateTime.now(zoneId);
+ TemporalField[] copyThese = new TemporalField[]{
+ NANO_OF_SECOND,
+ SECOND_OF_MINUTE,
+ MINUTE_OF_HOUR,
+ HOUR_OF_DAY,
+ DAY_OF_MONTH,
+ MONTH_OF_YEAR,
+ YEAR,
+ };
+
if ("java.time.format.Parsed".equals(temporal.getClass().getName())) {
Map> factories = new HashMap<>();
factories.put(DAY_OF_WEEK, t -> now.with(TemporalAdjusters.previousOrSame(DayOfWeek.from(t))));
@@ -124,11 +125,25 @@ public static ZonedDateTime getFullDateIfPossible(TemporalAccessor temporal, Zon
}
}
-
- LocalDateTime res = now.with(TemporalAdjusters.ofDateAdjuster(date -> date));
+ LocalDateTime res = now;
+ boolean zeroField = true;
for (TemporalField tf: copyThese) {
- if (temporal.isSupported(tf)) {
- res = res.with(tf, temporal.get(tf));
+ if (zeroField && temporal.isSupported(tf)) {
+ zeroField = false;
+ }
+ if (zeroField) {
+ if (temporal.isSupported(tf)) {
+ long minimum = temporal.range(tf).getMinimum();
+ res = res.with(tf, minimum);
+ } else {
+ res = res.with(tf, tf.range().getMinimum());
+ }
+ } else {
+ if (temporal.isSupported(tf)) {
+ res = res.with(tf, temporal.get(tf));
+ } else {
+ res = res.with(tf, now.get(tf));
+ }
}
}
return res.atZone(zoneId);
diff --git a/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java b/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java
index fd49563e..70ae8a55 100644
--- a/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java
+++ b/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java
@@ -6,6 +6,7 @@ public class DatePatternRecognizingContext {
public final Locale locale;
public Boolean hasYear;
+ public Boolean hasEra;
public Boolean hasMonth;
public Boolean hasDate;
public Boolean weekDay;
diff --git a/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java b/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java
index 2114919c..67c3b8df 100644
--- a/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java
+++ b/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java
@@ -130,7 +130,12 @@ private List newList(String pattern) {
private GuessingResult getGuessingResult(Stream guessingStream, String normalized, Locale locale, ZoneId defaultZone) {
return guessingStream
.map(pattern -> {
- TemporalAccessor temporalAccessor = parseUsingPattern(normalized, pattern, locale);
+ TemporalAccessor temporalAccessor = null;
+ try {
+ temporalAccessor = parseUsingPattern(normalized, pattern, locale);
+ } catch (Exception e) {
+ // ignore
+ }
if (temporalAccessor != null) {
GuessingResult result = new GuessingResult();
result.pattern = pattern;
diff --git a/src/main/java/liqp/filters/date/fuzzy/Part.java b/src/main/java/liqp/filters/date/fuzzy/Part.java
index 1d8d80f2..b8532d14 100644
--- a/src/main/java/liqp/filters/date/fuzzy/Part.java
+++ b/src/main/java/liqp/filters/date/fuzzy/Part.java
@@ -161,4 +161,18 @@ public String toString() {
'}';
}
}
+ class RecognizedYearWithoutEraPart extends RecognizedPart {
+ public RecognizedYearWithoutEraPart(int start, int end, List patterns, String source) {
+ super(start, end, patterns, source);
+ }
+
+ @Override
+ public String toString() {
+ return "RecognizedYearWithoutEraPart{" +
+ "start=" + start +
+ ", end=" + end +
+ ", pattern='" + patterns + '\'' +
+ '}';
+ }
+ }
}
diff --git a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java
index b2a4859e..5f0061a1 100644
--- a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java
@@ -1,5 +1,7 @@
package liqp.filters.date.fuzzy;
+import static liqp.LValue.isBlank;
+
import java.util.Arrays;
import java.util.List;
import java.util.function.Supplier;
@@ -7,10 +9,20 @@
import liqp.filters.date.fuzzy.Part.NewPart;
import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart;
import liqp.filters.date.fuzzy.Part.RecognizedPart;
+import liqp.filters.date.fuzzy.Part.RecognizedYearWithoutEraPart;
import liqp.filters.date.fuzzy.extractors.PartExtractorResult;
public abstract class PartExtractor {
+ /**
+ * for debugging purposes
+ */
+ protected final String name;
+
+ public PartExtractor(String name) {
+ this.name = name;
+ }
+
public PartExtractorResult extract(String source, List parts, int i) {
throw new UnsupportedOperationException("Not supported yet.");
}
@@ -51,28 +63,58 @@ protected LookupResult getLookupResult(List parts, int i, PartExtractorRes
parts.remove(i);
+ int recognizedEnd = part.start() + per.end;
if (per.end != source.length()) {
- NewPart after = new NewPart(part.start() + per.end, part.end(), source.substring(per.end));
+ NewPart after = new NewPart(recognizedEnd, part.end(), source.substring(per.end));
parts.add(i, after);
}
RecognizedPart recognized;
- if (per.isMonthName) {
- recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(
- per.start, per.end));
+ int recognizedStart = part.start() + per.start;
+ String recognizedSource = source.substring(per.start, per.end);
+ if (per.yearWithoutEra) {
+ recognized = new RecognizedYearWithoutEraPart(recognizedStart, recognizedEnd, per.formatterPatterns, recognizedSource);
+ } else if (per.isMonthName) {
+ recognized = new RecognizedMonthNamePart(recognizedStart, recognizedEnd, per.formatterPatterns, recognizedSource);
} else {
- recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(
- per.start, per.end));
+ recognized = new RecognizedPart(recognizedStart, recognizedEnd, per.formatterPatterns, recognizedSource);
}
parts.add(i, recognized);
if (per.start != 0) {
NewPart before = new NewPart(
- part.start(), part.start() + per.start, source.substring(0, per.start));
+ part.start(), recognizedStart, source.substring(0, per.start));
parts.add(i, before);
}
return new LookupResult(per.extractorName, parts, true);
}
+ protected int getIndexByPartType(List parts, Class extends Part> partType) {
+ for (int i = 0; i < parts.size(); i++) {
+ Part part = parts.get(i);
+ if (partType.isInstance(part)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ protected LookupResult locatePart(List parts, PartExtractor extractor, int index) {
+ Part part = parts.get(index);
+ if (part instanceof RecognizedPart) {
+ return new LookupResult(this.name, parts, false);
+ }
+ if (part instanceof NewPart) {
+ NewPart newPart = (NewPart) part;
+ String source = newPart.source();
+ if (!isBlank(source) && extractor != null) {
+ PartExtractorResult result = extractor.extract(source, parts, index);
+ if (result.found) {
+ return getLookupResult(parts, index, result);
+ }
+ }
+ }
+ return null;
+ }
}
diff --git a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java
index ba8e38fe..312103e4 100644
--- a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java
+++ b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java
@@ -2,9 +2,10 @@
import static liqp.filters.date.fuzzy.Part.PunctuationPart.punctuationChars;
import static liqp.filters.date.fuzzy.extractors.Extractors.allYMDPatternExtractor;
+import static liqp.filters.date.fuzzy.extractors.Extractors.eraAfterYearExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.fullWeekdaysExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.monthDateExtractor;
-import static liqp.filters.date.fuzzy.extractors.Extractors.monthExtractor;
+import static liqp.filters.date.fuzzy.extractors.Extractors.monthNameExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.plainYearExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.regularTimeExtractor;
import static liqp.filters.date.fuzzy.extractors.Extractors.shortWeekdaysExtractor;
@@ -13,12 +14,8 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import liqp.filters.date.fuzzy.Part.NewPart;
import liqp.filters.date.fuzzy.Part.PunctuationPart;
-import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart;
-import liqp.filters.date.fuzzy.Part.RecognizedPart;
import liqp.filters.date.fuzzy.Part.UnrecognizedPart;
-import liqp.filters.date.fuzzy.extractors.PartExtractorResult;
public class PartRecognizer {
@@ -38,13 +35,6 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) {
ctx.weekDay = false;
}
- if (notSet(ctx.hasYear)) {
- LookupResult result = lookup(parts, yearWithEraExtractor.get(ctx.locale));
- if (result.found) {
- ctx.hasYear = true;
- return result.parts;
- }
- }
if (notSet(ctx.hasTime)) {
LookupResult result = lookup(parts, regularTimeExtractor.get(ctx.locale));
if (result.found) {
@@ -61,20 +51,30 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) {
ctx.hasDate = true;
return result.parts;
}
+ result = lookup(parts, yearWithEraExtractor.get(ctx.locale));
+ if (result.found) {
+ ctx.hasYear = true;
+ ctx.hasEra = true;
+ return result.parts;
+ }
+ result = lookup(parts, plainYearExtractor.get(ctx.locale));
+ if (result.found) {
+ ctx.hasYear = true;
+ return result.parts;
+ }
}
- if (notSet(ctx.hasYear)) {
- LookupResult result = lookup(parts, plainYearExtractor.get(ctx.locale));
+ if (isTrue(ctx.hasYear) && notSet(ctx.hasEra)) {
+ LookupResult result = lookup(parts, eraAfterYearExtractor.get(ctx.locale));
if (result.found) {
- ctx.hasYear = true;
+ ctx.hasEra = true;
return result.parts;
}
- // last "year check" and since we are here - there is no year
- ctx.hasYear = false;
+ ctx.hasEra = false;
}
if (notSet(ctx.hasMonth)) {
- LookupResult result = lookup(parts, monthExtractor.get(ctx.locale));
+ LookupResult result = lookup(parts, monthNameExtractor.get(ctx.locale));
if (result.found) {
ctx.hasMonth = true;
return result.parts;
@@ -90,6 +90,14 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) {
}
ctx.hasDate = false;
}
+//
+// if (notSet(ctx.hasYear)) {
+// LookupResult result = lookup(parts, twoDigitYearExtractor.get(ctx.locale));
+// if (result.found) {
+// ctx.hasYear = true;
+// return result.parts;
+// }
+// }
return markAsUnrecognized(parts);
}
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java
index 14fa5eab..4c4a7a02 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java
@@ -2,9 +2,11 @@
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pD;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pM;
+import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pMn;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pY2;
import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pY4;
-import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp;
+import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp1;
+import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp2;
import java.util.ArrayList;
import java.util.List;
@@ -16,39 +18,44 @@ public class AllYMDPatternExtractor extends PartExtractor {
private final List extractors = new ArrayList<>();
public AllYMDPatternExtractor() {
+ super("AllYMDPatternExtractor");
extractors.add(new AnyYMDPatternExtractor("iSO8601Y4MDPatternExtractor",
- pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy-MM-dd
+ pY4(), pp1("-"), pM(), pp2("-"), pD())); // yyyy-MM-dd
extractors.add(new AnyYMDPatternExtractor("americanY4MDPatternExtractor",
- pM(), pp("/"), pD(), pp("/"), pY4())); // MM/dd/yyyy
+ pM(), pp1("/"), pD(), pp2("/"), pY4())); // MM/dd/yyyy
// next are top-rated locale formats, according to gpt
extractors.add(new AnyYMDPatternExtractor("indianY4MDPatternExtractor",
- pD(), pp("-"), pM(), pp("-"), pY4())); // d-M-yyyy
+ pD(), pp1("-"), pM(), pp2("-"), pY4())); // d-M-yyyy
extractors.add(new AnyYMDPatternExtractor("chineseY4MDPatternExtractor",
- pY4(), pp("/"), pM(), pp("/"), pD())); // yyyy/M/d
+ pY4(), pp1("/"), pM(), pp2("/"), pD())); // yyyy/M/d
extractors.add(new AnyYMDPatternExtractor("englishY4MDPatternExtractor",
- pD(), pp("/"), pM(), pp("/"), pY4())); // d/M/yyyy
+ pD(), pp1("/"), pM(), pp2("/"), pY4())); // d/M/yyyy
extractors.add(new AnyYMDPatternExtractor("slavicY4MDPatternExtractor",
- pD(), pp("."), pM(), pp("."), pY4())); // dd.MM.yyyy
+ pD(), pp1("."), pM(), pp2("."), pY4())); // dd.MM.yyyy
extractors.add(new AnyYMDPatternExtractor("coldEuropeY4MDPatternExtractor",
- pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy-MM-dd
+ pY4(), pp1("-"), pM(), pp2("-"), pD())); // yyyy-MM-dd
extractors.add(new AnyYMDPatternExtractor("espanaY4MDPatternExtractor",
- pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy/MM/dd
+ pY4(), pp1("-"), pM(), pp2("-"), pD())); // yyyy/MM/dd
extractors.add(new AnyYMDPatternExtractor("americanY2MDPatternExtractor",
- pM(), pp("/"), pD(), pp("/"), pY2())); // MM/dd/yy
+ pM(), pp1("/"), pD(), pp2("/"), pY2())); // MM/dd/yy
extractors.add(new AnyYMDPatternExtractor("indianY2MDPatternExtractor",
- pD(), pp("-"), pM(), pp("-"), pY2())); // d-M-yy
+ pD(), pp1("-"), pM(), pp2("-"), pY2())); // d-M-yy
extractors.add(new AnyYMDPatternExtractor("chineseY2MDPatternExtractor",
- pY2(), pp("/"), pM(), pp("/"), pD())); // yy/M/d
+ pY2(), pp1("/"), pM(), pp2("/"), pD())); // yy/M/d
extractors.add(new AnyYMDPatternExtractor("englishY2MDPatternExtractor",
- pD(), pp("/"), pM(), pp("/"), pY2())); // d/M/yy
+ pD(), pp1("/"), pM(), pp2("/"), pY2())); // d/M/yy
extractors.add(new AnyYMDPatternExtractor("slavicY2MDPatternExtractor",
- pD(), pp("."), pM(), pp("."), pY2())); // dd.MM.yy
+ pD(), pp1("."), pM(), pp2("."), pY2())); // dd.MM.yy
extractors.add(new AnyYMDPatternExtractor("coldEuropeY2MDPatternExtractor",
- pY2(), pp("-"), pM(), pp("-"), pD())); // yy-MM-dd
+ pY2(), pp1("-"), pM(), pp2("-"), pD())); // yy-MM-dd
extractors.add(new AnyYMDPatternExtractor("espanaY2MDPatternExtractor",
- pY2(), pp("-"), pM(), pp("-"), pD())); // yy/MM/dd
+ pY2(), pp1("-"), pM(), pp2("-"), pD())); // yy/MM/dd
+ extractors.add(new AnyYMDPatternExtractor("RFC822Y4MDPatternExtractor",
+ pD(), pp1(" "), pMn(), pp2(" "), pY4())); // dd MMMM yyyy
+ extractors.add(new AnyYMDPatternExtractor("RFC822Y2MDPatternExtractor",
+ pD(), pp1(" "), pMn(), pp2(" "), pY2())); // dd MMMM yy
}
@Override
@@ -59,6 +66,6 @@ public PartExtractorResult extract(String source, List parts, int i) {
return result;
}
}
- return new PartExtractorResult("AllYMDPatternExtractor");
+ return new PartExtractorResult(this.name);
}
}
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java
index ff8a21eb..a45afa31 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java
@@ -10,8 +10,10 @@
class AnyYMDPatternExtractor extends RegexPartExtractor {
+ private static final String monthsNamesExpr = "January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec";
+
public enum RuleType {
- Y, M, D, PUNCTUATION;
+ Y, M, Mn, D, PUNCTUATION1, PUNCTUATION2;
}
public static class RulePart {
private final RuleType type;
@@ -30,8 +32,11 @@ private RulePart(RuleType type, Integer length) {
}
}
- static RulePart pp(String content) {
- return new RulePart(RuleType.PUNCTUATION, content);
+ static RulePart pp1(String content) {
+ return new RulePart(RuleType.PUNCTUATION1, content);
+ }
+ static RulePart pp2(String content) {
+ return new RulePart(RuleType.PUNCTUATION2, content);
}
static RulePart pY4() {
return new RulePart(RuleType.Y, 4);
@@ -42,6 +47,9 @@ static RulePart pY2() {
static RulePart pM() {
return new RulePart(RuleType.M, (Integer)null);
}
+ static RulePart pMn() {
+ return new RulePart(RuleType.Mn, (Integer)null);
+ }
static RulePart pD() {
return new RulePart(RuleType.D, (Integer)null);
}
@@ -55,11 +63,21 @@ protected AnyYMDPatternExtractor(String name, RulePart... partsInOrder) {
private static String reconstructPattern(RulePart[] partsInOrder) {
StringBuilder sb = new StringBuilder("(?:^|.*?\\D)");
for (RulePart part : partsInOrder) {
- if (part.type == RuleType.PUNCTUATION) {
+ if (part.type == RuleType.PUNCTUATION1) {
+ if (".".equals(part.content)) {
+ sb.append("(?\\.)");
+ } else if (" ".equals(part.content)) {
+ sb.append("(?\\s+)");
+ } else {
+ sb.append("(?").append(part.content).append(")");
+ }
+ } else if (part.type == RuleType.PUNCTUATION2) {
if (".".equals(part.content)) {
- sb.append("\\.");
+ sb.append("(?\\.)");
+ } else if (" ".equals(part.content)) {
+ sb.append("(?\\s+)");
} else {
- sb.append(part.content);
+ sb.append("(?").append(part.content).append(")");
}
} else {
if (part.type == RuleType.Y) {
@@ -71,6 +89,8 @@ private static String reconstructPattern(RulePart[] partsInOrder) {
sb.append("(?0?[1-9]|1[0-2])");
} else if (part.type == RuleType.D) {
sb.append("(?0?[1-9]|[12][0-9]|3[01])");
+ } else if (part.type == RuleType.Mn) {
+ sb.append("(?"+ monthsNamesExpr +")");
}
}
}
@@ -87,6 +107,7 @@ public PartExtractorResult extract(String source, List parts, int i) {
result.start = matcher.start(findFirstGroupName());
result.end = matcher.end(findLastGroupName());
result.formatterPatterns = getPatterns(matcher);
+ result.yearWithoutEra = true;
return result;
}
return new PartExtractorResult(name);
@@ -98,14 +119,14 @@ private String findLastGroupName() {
Collections.reverse(list);
Optional first = list
.stream()
- .filter(p -> p.type != RuleType.PUNCTUATION)
+ .filter(p -> p.type != RuleType.PUNCTUATION1 && p.type != RuleType.PUNCTUATION2)
.findFirst();
return getNoGroupNameFound(first);
}
private String findFirstGroupName() {
Optional first = Arrays.stream(partsInOrder)
- .filter(p -> p.type != RuleType.PUNCTUATION)
+ .filter(p -> p.type != RuleType.PUNCTUATION1 && p.type != RuleType.PUNCTUATION2)
.findFirst();
return getNoGroupNameFound(first);
}
@@ -118,6 +139,8 @@ private static String getNoGroupNameFound(Optional first) {
return "year";
case M:
return "month";
+ case Mn:
+ return "monthName";
case D:
default:
return "day";
@@ -148,8 +171,14 @@ protected List getPatterns(Matcher matcher) {
} else {
return newList("dd", "d");
}
- } else if (part.type == RuleType.PUNCTUATION) {
- return Collections.singletonList(part.content);
+ } else if (part.type == RuleType.Mn) {
+ return newList("MMMM", "MMM");
+ } else if (part.type == RuleType.PUNCTUATION1) {
+ String punct1 = matcher.group("punct1");
+ return Collections.singletonList(punct1);
+ } else if (part.type == RuleType.PUNCTUATION2) {
+ String punct2 = matcher.group("punct2");
+ return Collections.singletonList(punct2);
}
return Collections.singletonList("");
});
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java
index 9b709be7..2088a0cd 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java
@@ -7,6 +7,7 @@
abstract class EnumExtractor extends PartExtractorDelegate {
public EnumExtractor(String name, Locale locale, String formatterPattern) {
+ super(name);
if (locale == null || Locale.ROOT.equals(locale)) {
locale = Locale.US;
}
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java
new file mode 100644
index 00000000..8edcba47
--- /dev/null
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java
@@ -0,0 +1,57 @@
+package liqp.filters.date.fuzzy.extractors;
+
+import static liqp.LValue.isBlank;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import liqp.filters.date.fuzzy.LookupResult;
+import liqp.filters.date.fuzzy.Part;
+import liqp.filters.date.fuzzy.PartExtractor;
+
+public class EraAfterYearExtractor extends PartExtractor {
+
+ private final PartExtractor eraExtractor = new RegexPartExtractor("Era",
+ "(?:^|.*?\\s)(?AD|BC|Anno Domini|Before Christ)(?:$|\\s.*?)", null) {
+ @Override
+ public PartExtractorResult extract(String source, List parts, int i) {
+ Matcher matcher = pattern.matcher(source);
+ if (matcher.find()) {
+ PartExtractorResult result = new PartExtractorResult("Era");
+ result.found = true;
+ result.start = matcher.start("era");
+ result.end = matcher.end("era");
+ String era = matcher.group("era");
+ if (era.length() == 2) {
+ result.formatterPatterns = newList("GG");
+ } else {
+ result.formatterPatterns = newList("GGGG");
+ }
+ return result;
+ }
+ return new PartExtractorResult("Era");
+ }
+ };
+
+ public EraAfterYearExtractor() {
+ super("EraAfterYear");
+ }
+
+ @Override
+ public LookupResult extract(List parts) {
+ int yearPartIndex = getIndexByPartType(parts, Part.RecognizedYearWithoutEraPart.class);
+ if (yearPartIndex == -1) {
+ return new LookupResult(name, parts, false);
+ }
+ int i = yearPartIndex + 1;
+ while (i < parts.size()) {
+ LookupResult result = locatePart(parts, eraExtractor, i);
+ if (result != null) {
+ return result;
+ }
+ i++;
+ }
+
+ return new LookupResult(name, parts, false);
+ }
+
+}
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java b/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java
index 942c178a..8147565d 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java
@@ -45,19 +45,19 @@ public PartExtractor get(Locale locale) {
return partExtractor;
}
},
- plainYearExtractor{
+ plainYearExtractor {
private final PartExtractor partExtractor = new RegexPartExtractor("plainYearExtractor", ".*\\b?(\\d{4})\\b?.*", "yyyy");
@Override
public PartExtractor get(Locale locale) {
return partExtractor;
}
},
- monthExtractor {
+ monthNameExtractor {
private final Map extractors = new HashMap<>();
@Override
public PartExtractor get(Locale locale) {
- return extractors.computeIfAbsent(locale, l -> new MonthExtractor(locale));
+ return extractors.computeIfAbsent(locale, l -> new MonthNameExtractor(locale));
}
},
monthDateExtractor {
@@ -67,6 +67,20 @@ public PartExtractor get(Locale locale) {
return partExtractor;
}
},
+ eraAfterYearExtractor {
+ private final PartExtractor partExtractor = new EraAfterYearExtractor();
+ @Override
+ public PartExtractor get(Locale locale) {
+ return partExtractor;
+ }
+ },
+// twoDigitYearExtractor {
+// private final PartExtractor partExtractor = new RegexPartExtractor("twoDigitYearExtractor", ".*\\b?(\\d{2})\\b?.*", "yy");
+// @Override
+// public PartExtractor get(Locale locale) {
+// return partExtractor;
+// }
+// },
// fullMonthExtractor {
// private final Map extractors = new HashMap<>();
// @Override
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java
index f0a5f71c..70d936ea 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java
@@ -1,18 +1,18 @@
package liqp.filters.date.fuzzy.extractors;
-import static liqp.LValue.isBlank;
-
import java.util.List;
import java.util.regex.Matcher;
import liqp.filters.date.fuzzy.LookupResult;
import liqp.filters.date.fuzzy.Part;
-import liqp.filters.date.fuzzy.Part.NewPart;
import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart;
-import liqp.filters.date.fuzzy.Part.RecognizedPart;
import liqp.filters.date.fuzzy.PartExtractor;
public class MonthDateExtractor extends PartExtractor {
+ public MonthDateExtractor() {
+ super("MonthDateExtractor");
+ }
+
enum Mode {
SPACES_ONLY,
/**
@@ -24,6 +24,16 @@ enum Direction {
LEFT,
RIGHT
}
+
+ private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left",
+ "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])[^,\\d;]+?$");
+ private static final RegexPartExtractor leftDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.left",
+ "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\s+?$");
+ private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right",
+ "^[^,\\d;]+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
+ private static final RegexPartExtractor rightDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.right",
+ "^\\s+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
+
@Override
public LookupResult extract(List parts) {
// 1. find named month
@@ -31,9 +41,9 @@ public LookupResult extract(List parts) {
// then look both left and right for a day
// comparing them (left vs right) by priority
// so the situation like ' 11 december, 11 ' vs '11, december 11' will be resolved
- int monthIndex = lookForNamedMonth(parts);
+ int monthIndex = getIndexByPartType(parts, RecognizedMonthNamePart.class);
if (monthIndex == -1) {
- return new LookupResult("MonthDateExtractor", parts, false);
+ return new LookupResult(this.name, parts, false);
}
LookupResult rightResult = rightDate(monthIndex, parts, Mode.SPACES_ONLY);
@@ -55,15 +65,15 @@ public LookupResult extract(List parts) {
if (rightResult.found && leftResult.found) {
throw new IllegalArgumentException("Month have date candidates on both sides");
}
- if (rightResult.found) {
- return rightResult;
- }
if (leftResult.found) {
return leftResult;
}
+ if (rightResult.found) {
+ return rightResult;
+ }
- return new LookupResult("MonthDateExtractor", parts, false);
+ return new LookupResult(this.name, parts, false);
}
private LookupResult leftDate(int monthIndex, List parts, Mode mode) {
@@ -71,14 +81,14 @@ private LookupResult leftDate(int monthIndex, List parts, Mode mode) {
int index = monthIndex - 1;
while (index >= 0) {
- LookupResult result = locateDate(parts, extractor, index);
+ LookupResult result = locatePart(parts, extractor, index);
if (result != null) {
return result;
}
index--;
}
- return new LookupResult("MonthDateExtractor", parts, false);
+ return new LookupResult(this.name, parts, false);
}
private LookupResult rightDate(int monthIndex, List parts, Mode mode) {
@@ -86,32 +96,14 @@ private LookupResult rightDate(int monthIndex, List parts, Mode mode) {
int index = monthIndex + 1;
while (index < parts.size()) {
- LookupResult result = locateDate(parts, extractor, index);
+ LookupResult result = locatePart(parts, extractor, index);
if (result != null) {
return result;
}
index++;
}
- return new LookupResult("MonthDateExtractor", parts, false);
- }
-
- private LookupResult locateDate(List parts, RegexPartExtractor extractor, int index) {
- Part part = parts.get(index);
- if (part instanceof RecognizedPart) {
- return new LookupResult("MonthDateExtractor", parts, false);
- }
- if (part instanceof NewPart) {
- NewPart newPart = (NewPart) part;
- String source = newPart.source();
- if (!isBlank(source) && extractor != null) {
- PartExtractorResult leftResult = extractor.extract(source, parts, index);
- if (leftResult.found) {
- return getLookupResult(parts, index, leftResult);
- }
- }
- }
- return null;
+ return new LookupResult(this.name, parts, false);
}
private RegexPartExtractor getExtractorByModeAndDirection(Mode mode, Direction direction) {
@@ -131,24 +123,6 @@ private RegexPartExtractor getExtractorByModeAndDirection(Mode mode, Direction d
return null;
}
- private int lookForNamedMonth(List parts) {
- for (int i = 0; i < parts.size(); i++) {
- Part part = parts.get(i);
- if (part instanceof RecognizedMonthNamePart) {
- return i;
- }
- }
- return -1;
- }
-
- private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left",
- "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])[^,\\d;]+?$");
- private static final RegexPartExtractor leftDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.left",
- "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\s+?$");
- private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right",
- "^[^,\\d;]+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
- private static final RegexPartExtractor rightDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.right",
- "^\\s+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
private static class MonthDatePartExtractor extends RegexPartExtractor {
public MonthDatePartExtractor(String name, String regex) {
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthNameExtractor.java
similarity index 86%
rename from src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java
rename to src/main/java/liqp/filters/date/fuzzy/extractors/MonthNameExtractor.java
index 07f65daf..7f4abb00 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthNameExtractor.java
@@ -7,10 +7,11 @@
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.PartExtractor;
-public class MonthExtractor extends PartExtractor {
+public class MonthNameExtractor extends PartExtractor {
private final List monthExtractors;
- public MonthExtractor(Locale locale) {
+ public MonthNameExtractor(Locale locale) {
+ super("MonthNameExtractor");
this.monthExtractors = new ArrayList<>();
this.monthExtractors.add(new EnumExtractor("FullMonthExtractor", locale, "MMMM") {
@Override
@@ -28,7 +29,7 @@ protected String[] getEnumValues(Locale locale) {
}
@Override
public PartExtractorResult extract(String source, List parts, int i) {
- PartExtractorResult res = new PartExtractorResult("MonthDayExtractor");
+ PartExtractorResult res = new PartExtractorResult(this.name);
for (EnumExtractor monthExtractor : monthExtractors) {
PartExtractorResult monthResult = monthExtractor.extract(source, parts, i);
if (monthResult.found) {
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java
index 73b00192..de8a0d87 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java
@@ -4,10 +4,14 @@
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.PartExtractor;
-public class PartExtractorDelegate extends PartExtractor {
+public abstract class PartExtractorDelegate extends PartExtractor {
protected PartExtractor delegate;
+ public PartExtractorDelegate(String name) {
+ super(name);
+ }
+
@Override
public PartExtractorResult extract(String source, List parts, int i) {
return delegate.extract(source, parts, i);
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java
index 513651a9..0fd6867d 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java
@@ -11,6 +11,7 @@ public class PartExtractorResult {
public int end;
public List formatterPatterns;
public boolean isMonthName;
+ public boolean yearWithoutEra;
public PartExtractorResult(String extractorName){
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java
index a7083137..87631b57 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java
@@ -8,12 +8,11 @@
class RegexPartExtractor extends PartExtractor {
- protected final String name;
protected final Pattern pattern;
protected final String formatterPattern;
public RegexPartExtractor(String name, String regex, String formatterPattern) {
- this.name = name;
+ super(name);
this.pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
this.formatterPattern = formatterPattern;
}
diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java b/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java
index 34d4518e..58c670f5 100644
--- a/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java
+++ b/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java
@@ -5,6 +5,7 @@
import java.util.List;
import java.util.regex.Matcher;
+import liqp.filters.date.fuzzy.LookupResult;
import liqp.filters.date.fuzzy.Part;
class YearWithEra extends RegexPartExtractor {
@@ -14,6 +15,11 @@ public YearWithEra() {
null);
}
+ @Override
+ public LookupResult extract(List parts) {
+ return super.extract(parts);
+ }
+
@Override
public PartExtractorResult extract(String source, List parts, int i) {
Matcher matcher = pattern.matcher(source);
diff --git a/src/test/java/liqp/filters/DateTest.java b/src/test/java/liqp/filters/DateTest.java
index 7708ddc7..6778e887 100644
--- a/src/test/java/liqp/filters/DateTest.java
+++ b/src/test/java/liqp/filters/DateTest.java
@@ -240,32 +240,32 @@ public void test298InstantWhenEpochBeginAtUTC() {
@Test
public void testSupportedDateStrings() {
String[] tests = {
- "now",
- "today",
- "1 March",
- "MAR",
- "MARCH",
- "2024 MAR",
- "2 mar",
- "2 MAR",
- "march 2nd",
- "MARCH 2",
- "MARCH 2nd",
- "MARCH 3RD",
- "MARCH 3rD",
- "MARCH 4th",
- "MARCH 5th",
- "MARCH 10th",
- "2010-10-31",
- "Aug 2000",
- "Aug 31",
- "Wed Nov 28 14:33:20 2001",
- "Wed, 05 Oct 2011 22:26:12 -0400",
- "Wed, 05 Oct 2011 02:26:12 GMT",
- "Nov 29 14:33:20 2001",
- "05 Oct 2011 22:26:12 -0400",
- "06 Oct 2011 02:26:12 GMT",
- "2011-10-05T22:26:12-04:00",
+// "now",
+// "today",
+// "1 March",
+// "MAR",
+// "MARCH",
+// "2024 MAR",
+// "2 mar",
+// "2 MAR",
+// "march 2nd",
+// "MARCH 2",
+// "MARCH 2nd",
+// "MARCH 3RD",
+// "MARCH 3rD",
+// "MARCH 4th",
+// "MARCH 5th",
+// "MARCH 10th",
+// "2010-10-31",
+// "Aug 2000",
+// "Aug 31",
+// "Wed Nov 28 14:33:20 2001",
+// "Wed, 05 Oct 2011 22:26:12 -0400",
+// "Wed, 05 Oct 2011 02:26:12 GMT",
+// "Nov 29 14:33:20 2001",
+// "05 Oct 2011 22:26:12 -0400",
+// "06 Oct 2011 02:26:12 GMT",
+// "2011-10-05T22:26:12-04:00",
"0:00",
"1:00",
"01:00",
diff --git a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java
index 9c478c24..87ae0382 100644
--- a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java
+++ b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java
@@ -22,8 +22,7 @@ public class FuzzyDateParserParametrizedErrorsTest {
@Parameterized.Parameters
public static Collection