From 0931d5faa331a44e8b1a3fd71130f29f3992ca16 Mon Sep 17 00:00:00 2001 From: msangel Date: Sun, 5 Jan 2025 08:02:00 +0200 Subject: [PATCH] [WIP] --- ruby/cases_date.rb | 11 +- .../liqp/filters/date/BasicDateParser.java | 43 ++++--- .../fuzzy/DatePatternRecognizingContext.java | 1 + .../filters/date/fuzzy/FuzzyDateParser.java | 7 +- .../java/liqp/filters/date/fuzzy/Part.java | 14 +++ .../filters/date/fuzzy/PartExtractor.java | 56 +++++++-- .../filters/date/fuzzy/PartRecognizer.java | 44 +++++--- .../extractors/AllYMDPatternExtractor.java | 41 ++++--- .../extractors/AnyYMDPatternExtractor.java | 49 ++++++-- .../date/fuzzy/extractors/EnumExtractor.java | 1 + .../extractors/EraAfterYearExtractor.java | 57 ++++++++++ .../date/fuzzy/extractors/Extractors.java | 20 +++- .../fuzzy/extractors/MonthDateExtractor.java | 74 ++++-------- ...Extractor.java => MonthNameExtractor.java} | 7 +- .../extractors/PartExtractorDelegate.java | 6 +- .../fuzzy/extractors/PartExtractorResult.java | 1 + .../fuzzy/extractors/RegexPartExtractor.java | 3 +- .../date/fuzzy/extractors/YearWithEra.java | 6 + src/test/java/liqp/filters/DateTest.java | 52 ++++----- ...FuzzyDateParserParametrizedErrorsTest.java | 3 +- .../filters/date/fuzzy/StandardsTest.java | 106 +++++++++++++++--- 21 files changed, 432 insertions(+), 170 deletions(-) create mode 100644 src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java rename src/main/java/liqp/filters/date/fuzzy/extractors/{MonthExtractor.java => MonthNameExtractor.java} (86%) diff --git a/ruby/cases_date.rb b/ruby/cases_date.rb index 8d5dc132..d9838008 100755 --- a/ruby/cases_date.rb +++ b/ruby/cases_date.rb @@ -32,9 +32,18 @@ # assertEqual("", render({"null" => "bad"}, "{{null}}")) # assertEqual("", render({"empty" => "bad"}, "{{empty}}")) # assertEqual("", render({"blank" => "bad"}, "{{blank}}")) -assertEqual("2007-11-01...", render({"a" => t }, "{{ a | truncate: 13 }}")) +# assertEqual("2007-11-01...", render({"a" => t }, "{{ a | truncate: 13 }}")) if isJekyll + pp render({"a" => '2004-12-31'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2004-12-31 00:00:00 +0200" + pp render({"a" => '31 December'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2025-12-31 00:00:00 +0200" + pp render({"a" => '12:00'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2025-01-04 12:00:00 +0200" + pp render({"a" => 'Friday'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # Friday + pp render({"a" => 'Friday 12/24'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2025-12-24 00:00:00 +0200" + pp render({"a" => '2004-12-31 11:23:58 Z'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2004-12-31 11:23:58 +0000" + pp render({"a" => 'September 1969'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "1969-09-01 00:00:00 +0300" + pp render({"a" => '06 Nov 04'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "2004-11-06 00:00:00 +0200" + pp render({"a" => '1994-11-06T08'}, "{{ a | date: '%Y-%m-%d %H:%M:%S %z'}}") # "1994-11-06 00:00:00 +0200" # target is string representation, source is iterated as collection(and so = match in "year" part) assertEqual("target is string representation: 2007-11-01 15:25:00 +0900", render({"a" => [{ "time" => t }], "b" => "2007"}, "target is string representation: {{ a | where: 'time', b | map: 'time'}}")) diff --git a/src/main/java/liqp/filters/date/BasicDateParser.java b/src/main/java/liqp/filters/date/BasicDateParser.java index 920f35f1..9d7c7085 100644 --- a/src/main/java/liqp/filters/date/BasicDateParser.java +++ b/src/main/java/liqp/filters/date/BasicDateParser.java @@ -3,6 +3,7 @@ import java.time.*; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; import java.time.temporal.TemporalAccessor; import java.time.temporal.TemporalAdjusters; import java.time.temporal.TemporalField; @@ -97,16 +98,6 @@ public static ZonedDateTime getFullDateIfPossible(TemporalAccessor temporal, Zon if (temporal instanceof Instant) { return ZonedDateTime.ofInstant((Instant) temporal, defaultZone); } - TemporalField[] copyThese = new TemporalField[]{ - YEAR, - MONTH_OF_YEAR, - DAY_OF_MONTH, - HOUR_OF_DAY, - MINUTE_OF_HOUR, - SECOND_OF_MINUTE, - NANO_OF_SECOND - }; - ZoneId zoneId = temporal.query(TemporalQueries.zone()); if (zoneId == null) { @@ -115,6 +106,16 @@ public static ZonedDateTime getFullDateIfPossible(TemporalAccessor temporal, Zon final LocalDateTime now = LocalDateTime.now(zoneId); + TemporalField[] copyThese = new TemporalField[]{ + NANO_OF_SECOND, + SECOND_OF_MINUTE, + MINUTE_OF_HOUR, + HOUR_OF_DAY, + DAY_OF_MONTH, + MONTH_OF_YEAR, + YEAR, + }; + if ("java.time.format.Parsed".equals(temporal.getClass().getName())) { Map> factories = new HashMap<>(); factories.put(DAY_OF_WEEK, t -> now.with(TemporalAdjusters.previousOrSame(DayOfWeek.from(t)))); @@ -124,11 +125,25 @@ public static ZonedDateTime getFullDateIfPossible(TemporalAccessor temporal, Zon } } - - LocalDateTime res = now.with(TemporalAdjusters.ofDateAdjuster(date -> date)); + LocalDateTime res = now; + boolean zeroField = true; for (TemporalField tf: copyThese) { - if (temporal.isSupported(tf)) { - res = res.with(tf, temporal.get(tf)); + if (zeroField && temporal.isSupported(tf)) { + zeroField = false; + } + if (zeroField) { + if (temporal.isSupported(tf)) { + long minimum = temporal.range(tf).getMinimum(); + res = res.with(tf, minimum); + } else { + res = res.with(tf, tf.range().getMinimum()); + } + } else { + if (temporal.isSupported(tf)) { + res = res.with(tf, temporal.get(tf)); + } else { + res = res.with(tf, now.get(tf)); + } } } return res.atZone(zoneId); diff --git a/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java b/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java index fd49563e..70ae8a55 100644 --- a/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java +++ b/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java @@ -6,6 +6,7 @@ public class DatePatternRecognizingContext { public final Locale locale; public Boolean hasYear; + public Boolean hasEra; public Boolean hasMonth; public Boolean hasDate; public Boolean weekDay; diff --git a/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java b/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java index 2114919c..67c3b8df 100644 --- a/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java +++ b/src/main/java/liqp/filters/date/fuzzy/FuzzyDateParser.java @@ -130,7 +130,12 @@ private List newList(String pattern) { private GuessingResult getGuessingResult(Stream guessingStream, String normalized, Locale locale, ZoneId defaultZone) { return guessingStream .map(pattern -> { - TemporalAccessor temporalAccessor = parseUsingPattern(normalized, pattern, locale); + TemporalAccessor temporalAccessor = null; + try { + temporalAccessor = parseUsingPattern(normalized, pattern, locale); + } catch (Exception e) { + // ignore + } if (temporalAccessor != null) { GuessingResult result = new GuessingResult(); result.pattern = pattern; diff --git a/src/main/java/liqp/filters/date/fuzzy/Part.java b/src/main/java/liqp/filters/date/fuzzy/Part.java index 1d8d80f2..b8532d14 100644 --- a/src/main/java/liqp/filters/date/fuzzy/Part.java +++ b/src/main/java/liqp/filters/date/fuzzy/Part.java @@ -161,4 +161,18 @@ public String toString() { '}'; } } + class RecognizedYearWithoutEraPart extends RecognizedPart { + public RecognizedYearWithoutEraPart(int start, int end, List patterns, String source) { + super(start, end, patterns, source); + } + + @Override + public String toString() { + return "RecognizedYearWithoutEraPart{" + + "start=" + start + + ", end=" + end + + ", pattern='" + patterns + '\'' + + '}'; + } + } } diff --git a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java index b2a4859e..5f0061a1 100644 --- a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java @@ -1,5 +1,7 @@ package liqp.filters.date.fuzzy; +import static liqp.LValue.isBlank; + import java.util.Arrays; import java.util.List; import java.util.function.Supplier; @@ -7,10 +9,20 @@ import liqp.filters.date.fuzzy.Part.NewPart; import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; import liqp.filters.date.fuzzy.Part.RecognizedPart; +import liqp.filters.date.fuzzy.Part.RecognizedYearWithoutEraPart; import liqp.filters.date.fuzzy.extractors.PartExtractorResult; public abstract class PartExtractor { + /** + * for debugging purposes + */ + protected final String name; + + public PartExtractor(String name) { + this.name = name; + } + public PartExtractorResult extract(String source, List parts, int i) { throw new UnsupportedOperationException("Not supported yet."); } @@ -51,28 +63,58 @@ protected LookupResult getLookupResult(List parts, int i, PartExtractorRes parts.remove(i); + int recognizedEnd = part.start() + per.end; if (per.end != source.length()) { - NewPart after = new NewPart(part.start() + per.end, part.end(), source.substring(per.end)); + NewPart after = new NewPart(recognizedEnd, part.end(), source.substring(per.end)); parts.add(i, after); } RecognizedPart recognized; - if (per.isMonthName) { - recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring( - per.start, per.end)); + int recognizedStart = part.start() + per.start; + String recognizedSource = source.substring(per.start, per.end); + if (per.yearWithoutEra) { + recognized = new RecognizedYearWithoutEraPart(recognizedStart, recognizedEnd, per.formatterPatterns, recognizedSource); + } else if (per.isMonthName) { + recognized = new RecognizedMonthNamePart(recognizedStart, recognizedEnd, per.formatterPatterns, recognizedSource); } else { - recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring( - per.start, per.end)); + recognized = new RecognizedPart(recognizedStart, recognizedEnd, per.formatterPatterns, recognizedSource); } parts.add(i, recognized); if (per.start != 0) { NewPart before = new NewPart( - part.start(), part.start() + per.start, source.substring(0, per.start)); + part.start(), recognizedStart, source.substring(0, per.start)); parts.add(i, before); } return new LookupResult(per.extractorName, parts, true); } + protected int getIndexByPartType(List parts, Class partType) { + for (int i = 0; i < parts.size(); i++) { + Part part = parts.get(i); + if (partType.isInstance(part)) { + return i; + } + } + return -1; + } + + protected LookupResult locatePart(List parts, PartExtractor extractor, int index) { + Part part = parts.get(index); + if (part instanceof RecognizedPart) { + return new LookupResult(this.name, parts, false); + } + if (part instanceof NewPart) { + NewPart newPart = (NewPart) part; + String source = newPart.source(); + if (!isBlank(source) && extractor != null) { + PartExtractorResult result = extractor.extract(source, parts, index); + if (result.found) { + return getLookupResult(parts, index, result); + } + } + } + return null; + } } diff --git a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java index ba8e38fe..312103e4 100644 --- a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java +++ b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java @@ -2,9 +2,10 @@ import static liqp.filters.date.fuzzy.Part.PunctuationPart.punctuationChars; import static liqp.filters.date.fuzzy.extractors.Extractors.allYMDPatternExtractor; +import static liqp.filters.date.fuzzy.extractors.Extractors.eraAfterYearExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.fullWeekdaysExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.monthDateExtractor; -import static liqp.filters.date.fuzzy.extractors.Extractors.monthExtractor; +import static liqp.filters.date.fuzzy.extractors.Extractors.monthNameExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.plainYearExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.regularTimeExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.shortWeekdaysExtractor; @@ -13,12 +14,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import liqp.filters.date.fuzzy.Part.NewPart; import liqp.filters.date.fuzzy.Part.PunctuationPart; -import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; -import liqp.filters.date.fuzzy.Part.RecognizedPart; import liqp.filters.date.fuzzy.Part.UnrecognizedPart; -import liqp.filters.date.fuzzy.extractors.PartExtractorResult; public class PartRecognizer { @@ -38,13 +35,6 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) { ctx.weekDay = false; } - if (notSet(ctx.hasYear)) { - LookupResult result = lookup(parts, yearWithEraExtractor.get(ctx.locale)); - if (result.found) { - ctx.hasYear = true; - return result.parts; - } - } if (notSet(ctx.hasTime)) { LookupResult result = lookup(parts, regularTimeExtractor.get(ctx.locale)); if (result.found) { @@ -61,20 +51,30 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) { ctx.hasDate = true; return result.parts; } + result = lookup(parts, yearWithEraExtractor.get(ctx.locale)); + if (result.found) { + ctx.hasYear = true; + ctx.hasEra = true; + return result.parts; + } + result = lookup(parts, plainYearExtractor.get(ctx.locale)); + if (result.found) { + ctx.hasYear = true; + return result.parts; + } } - if (notSet(ctx.hasYear)) { - LookupResult result = lookup(parts, plainYearExtractor.get(ctx.locale)); + if (isTrue(ctx.hasYear) && notSet(ctx.hasEra)) { + LookupResult result = lookup(parts, eraAfterYearExtractor.get(ctx.locale)); if (result.found) { - ctx.hasYear = true; + ctx.hasEra = true; return result.parts; } - // last "year check" and since we are here - there is no year - ctx.hasYear = false; + ctx.hasEra = false; } if (notSet(ctx.hasMonth)) { - LookupResult result = lookup(parts, monthExtractor.get(ctx.locale)); + LookupResult result = lookup(parts, monthNameExtractor.get(ctx.locale)); if (result.found) { ctx.hasMonth = true; return result.parts; @@ -90,6 +90,14 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) { } ctx.hasDate = false; } +// +// if (notSet(ctx.hasYear)) { +// LookupResult result = lookup(parts, twoDigitYearExtractor.get(ctx.locale)); +// if (result.found) { +// ctx.hasYear = true; +// return result.parts; +// } +// } return markAsUnrecognized(parts); } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java index 14fa5eab..4c4a7a02 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java @@ -2,9 +2,11 @@ import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pD; import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pM; +import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pMn; import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pY2; import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pY4; -import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp; +import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp1; +import static liqp.filters.date.fuzzy.extractors.AnyYMDPatternExtractor.pp2; import java.util.ArrayList; import java.util.List; @@ -16,39 +18,44 @@ public class AllYMDPatternExtractor extends PartExtractor { private final List extractors = new ArrayList<>(); public AllYMDPatternExtractor() { + super("AllYMDPatternExtractor"); extractors.add(new AnyYMDPatternExtractor("iSO8601Y4MDPatternExtractor", - pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy-MM-dd + pY4(), pp1("-"), pM(), pp2("-"), pD())); // yyyy-MM-dd extractors.add(new AnyYMDPatternExtractor("americanY4MDPatternExtractor", - pM(), pp("/"), pD(), pp("/"), pY4())); // MM/dd/yyyy + pM(), pp1("/"), pD(), pp2("/"), pY4())); // MM/dd/yyyy // next are top-rated locale formats, according to gpt extractors.add(new AnyYMDPatternExtractor("indianY4MDPatternExtractor", - pD(), pp("-"), pM(), pp("-"), pY4())); // d-M-yyyy + pD(), pp1("-"), pM(), pp2("-"), pY4())); // d-M-yyyy extractors.add(new AnyYMDPatternExtractor("chineseY4MDPatternExtractor", - pY4(), pp("/"), pM(), pp("/"), pD())); // yyyy/M/d + pY4(), pp1("/"), pM(), pp2("/"), pD())); // yyyy/M/d extractors.add(new AnyYMDPatternExtractor("englishY4MDPatternExtractor", - pD(), pp("/"), pM(), pp("/"), pY4())); // d/M/yyyy + pD(), pp1("/"), pM(), pp2("/"), pY4())); // d/M/yyyy extractors.add(new AnyYMDPatternExtractor("slavicY4MDPatternExtractor", - pD(), pp("."), pM(), pp("."), pY4())); // dd.MM.yyyy + pD(), pp1("."), pM(), pp2("."), pY4())); // dd.MM.yyyy extractors.add(new AnyYMDPatternExtractor("coldEuropeY4MDPatternExtractor", - pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy-MM-dd + pY4(), pp1("-"), pM(), pp2("-"), pD())); // yyyy-MM-dd extractors.add(new AnyYMDPatternExtractor("espanaY4MDPatternExtractor", - pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy/MM/dd + pY4(), pp1("-"), pM(), pp2("-"), pD())); // yyyy/MM/dd extractors.add(new AnyYMDPatternExtractor("americanY2MDPatternExtractor", - pM(), pp("/"), pD(), pp("/"), pY2())); // MM/dd/yy + pM(), pp1("/"), pD(), pp2("/"), pY2())); // MM/dd/yy extractors.add(new AnyYMDPatternExtractor("indianY2MDPatternExtractor", - pD(), pp("-"), pM(), pp("-"), pY2())); // d-M-yy + pD(), pp1("-"), pM(), pp2("-"), pY2())); // d-M-yy extractors.add(new AnyYMDPatternExtractor("chineseY2MDPatternExtractor", - pY2(), pp("/"), pM(), pp("/"), pD())); // yy/M/d + pY2(), pp1("/"), pM(), pp2("/"), pD())); // yy/M/d extractors.add(new AnyYMDPatternExtractor("englishY2MDPatternExtractor", - pD(), pp("/"), pM(), pp("/"), pY2())); // d/M/yy + pD(), pp1("/"), pM(), pp2("/"), pY2())); // d/M/yy extractors.add(new AnyYMDPatternExtractor("slavicY2MDPatternExtractor", - pD(), pp("."), pM(), pp("."), pY2())); // dd.MM.yy + pD(), pp1("."), pM(), pp2("."), pY2())); // dd.MM.yy extractors.add(new AnyYMDPatternExtractor("coldEuropeY2MDPatternExtractor", - pY2(), pp("-"), pM(), pp("-"), pD())); // yy-MM-dd + pY2(), pp1("-"), pM(), pp2("-"), pD())); // yy-MM-dd extractors.add(new AnyYMDPatternExtractor("espanaY2MDPatternExtractor", - pY2(), pp("-"), pM(), pp("-"), pD())); // yy/MM/dd + pY2(), pp1("-"), pM(), pp2("-"), pD())); // yy/MM/dd + extractors.add(new AnyYMDPatternExtractor("RFC822Y4MDPatternExtractor", + pD(), pp1(" "), pMn(), pp2(" "), pY4())); // dd MMMM yyyy + extractors.add(new AnyYMDPatternExtractor("RFC822Y2MDPatternExtractor", + pD(), pp1(" "), pMn(), pp2(" "), pY2())); // dd MMMM yy } @Override @@ -59,6 +66,6 @@ public PartExtractorResult extract(String source, List parts, int i) { return result; } } - return new PartExtractorResult("AllYMDPatternExtractor"); + return new PartExtractorResult(this.name); } } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java index ff8a21eb..a45afa31 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java @@ -10,8 +10,10 @@ class AnyYMDPatternExtractor extends RegexPartExtractor { + private static final String monthsNamesExpr = "January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"; + public enum RuleType { - Y, M, D, PUNCTUATION; + Y, M, Mn, D, PUNCTUATION1, PUNCTUATION2; } public static class RulePart { private final RuleType type; @@ -30,8 +32,11 @@ private RulePart(RuleType type, Integer length) { } } - static RulePart pp(String content) { - return new RulePart(RuleType.PUNCTUATION, content); + static RulePart pp1(String content) { + return new RulePart(RuleType.PUNCTUATION1, content); + } + static RulePart pp2(String content) { + return new RulePart(RuleType.PUNCTUATION2, content); } static RulePart pY4() { return new RulePart(RuleType.Y, 4); @@ -42,6 +47,9 @@ static RulePart pY2() { static RulePart pM() { return new RulePart(RuleType.M, (Integer)null); } + static RulePart pMn() { + return new RulePart(RuleType.Mn, (Integer)null); + } static RulePart pD() { return new RulePart(RuleType.D, (Integer)null); } @@ -55,11 +63,21 @@ protected AnyYMDPatternExtractor(String name, RulePart... partsInOrder) { private static String reconstructPattern(RulePart[] partsInOrder) { StringBuilder sb = new StringBuilder("(?:^|.*?\\D)"); for (RulePart part : partsInOrder) { - if (part.type == RuleType.PUNCTUATION) { + if (part.type == RuleType.PUNCTUATION1) { + if (".".equals(part.content)) { + sb.append("(?\\.)"); + } else if (" ".equals(part.content)) { + sb.append("(?\\s+)"); + } else { + sb.append("(?").append(part.content).append(")"); + } + } else if (part.type == RuleType.PUNCTUATION2) { if (".".equals(part.content)) { - sb.append("\\."); + sb.append("(?\\.)"); + } else if (" ".equals(part.content)) { + sb.append("(?\\s+)"); } else { - sb.append(part.content); + sb.append("(?").append(part.content).append(")"); } } else { if (part.type == RuleType.Y) { @@ -71,6 +89,8 @@ private static String reconstructPattern(RulePart[] partsInOrder) { sb.append("(?0?[1-9]|1[0-2])"); } else if (part.type == RuleType.D) { sb.append("(?0?[1-9]|[12][0-9]|3[01])"); + } else if (part.type == RuleType.Mn) { + sb.append("(?"+ monthsNamesExpr +")"); } } } @@ -87,6 +107,7 @@ public PartExtractorResult extract(String source, List parts, int i) { result.start = matcher.start(findFirstGroupName()); result.end = matcher.end(findLastGroupName()); result.formatterPatterns = getPatterns(matcher); + result.yearWithoutEra = true; return result; } return new PartExtractorResult(name); @@ -98,14 +119,14 @@ private String findLastGroupName() { Collections.reverse(list); Optional first = list .stream() - .filter(p -> p.type != RuleType.PUNCTUATION) + .filter(p -> p.type != RuleType.PUNCTUATION1 && p.type != RuleType.PUNCTUATION2) .findFirst(); return getNoGroupNameFound(first); } private String findFirstGroupName() { Optional first = Arrays.stream(partsInOrder) - .filter(p -> p.type != RuleType.PUNCTUATION) + .filter(p -> p.type != RuleType.PUNCTUATION1 && p.type != RuleType.PUNCTUATION2) .findFirst(); return getNoGroupNameFound(first); } @@ -118,6 +139,8 @@ private static String getNoGroupNameFound(Optional first) { return "year"; case M: return "month"; + case Mn: + return "monthName"; case D: default: return "day"; @@ -148,8 +171,14 @@ protected List getPatterns(Matcher matcher) { } else { return newList("dd", "d"); } - } else if (part.type == RuleType.PUNCTUATION) { - return Collections.singletonList(part.content); + } else if (part.type == RuleType.Mn) { + return newList("MMMM", "MMM"); + } else if (part.type == RuleType.PUNCTUATION1) { + String punct1 = matcher.group("punct1"); + return Collections.singletonList(punct1); + } else if (part.type == RuleType.PUNCTUATION2) { + String punct2 = matcher.group("punct2"); + return Collections.singletonList(punct2); } return Collections.singletonList(""); }); diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java index 9b709be7..2088a0cd 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/EnumExtractor.java @@ -7,6 +7,7 @@ abstract class EnumExtractor extends PartExtractorDelegate { public EnumExtractor(String name, Locale locale, String formatterPattern) { + super(name); if (locale == null || Locale.ROOT.equals(locale)) { locale = Locale.US; } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java new file mode 100644 index 00000000..8edcba47 --- /dev/null +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/EraAfterYearExtractor.java @@ -0,0 +1,57 @@ +package liqp.filters.date.fuzzy.extractors; + +import static liqp.LValue.isBlank; + +import java.util.List; +import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.LookupResult; +import liqp.filters.date.fuzzy.Part; +import liqp.filters.date.fuzzy.PartExtractor; + +public class EraAfterYearExtractor extends PartExtractor { + + private final PartExtractor eraExtractor = new RegexPartExtractor("Era", + "(?:^|.*?\\s)(?AD|BC|Anno Domini|Before Christ)(?:$|\\s.*?)", null) { + @Override + public PartExtractorResult extract(String source, List parts, int i) { + Matcher matcher = pattern.matcher(source); + if (matcher.find()) { + PartExtractorResult result = new PartExtractorResult("Era"); + result.found = true; + result.start = matcher.start("era"); + result.end = matcher.end("era"); + String era = matcher.group("era"); + if (era.length() == 2) { + result.formatterPatterns = newList("GG"); + } else { + result.formatterPatterns = newList("GGGG"); + } + return result; + } + return new PartExtractorResult("Era"); + } + }; + + public EraAfterYearExtractor() { + super("EraAfterYear"); + } + + @Override + public LookupResult extract(List parts) { + int yearPartIndex = getIndexByPartType(parts, Part.RecognizedYearWithoutEraPart.class); + if (yearPartIndex == -1) { + return new LookupResult(name, parts, false); + } + int i = yearPartIndex + 1; + while (i < parts.size()) { + LookupResult result = locatePart(parts, eraExtractor, i); + if (result != null) { + return result; + } + i++; + } + + return new LookupResult(name, parts, false); + } + +} diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java b/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java index 942c178a..8147565d 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java @@ -45,19 +45,19 @@ public PartExtractor get(Locale locale) { return partExtractor; } }, - plainYearExtractor{ + plainYearExtractor { private final PartExtractor partExtractor = new RegexPartExtractor("plainYearExtractor", ".*\\b?(\\d{4})\\b?.*", "yyyy"); @Override public PartExtractor get(Locale locale) { return partExtractor; } }, - monthExtractor { + monthNameExtractor { private final Map extractors = new HashMap<>(); @Override public PartExtractor get(Locale locale) { - return extractors.computeIfAbsent(locale, l -> new MonthExtractor(locale)); + return extractors.computeIfAbsent(locale, l -> new MonthNameExtractor(locale)); } }, monthDateExtractor { @@ -67,6 +67,20 @@ public PartExtractor get(Locale locale) { return partExtractor; } }, + eraAfterYearExtractor { + private final PartExtractor partExtractor = new EraAfterYearExtractor(); + @Override + public PartExtractor get(Locale locale) { + return partExtractor; + } + }, +// twoDigitYearExtractor { +// private final PartExtractor partExtractor = new RegexPartExtractor("twoDigitYearExtractor", ".*\\b?(\\d{2})\\b?.*", "yy"); +// @Override +// public PartExtractor get(Locale locale) { +// return partExtractor; +// } +// }, // fullMonthExtractor { // private final Map extractors = new HashMap<>(); // @Override diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java index f0a5f71c..70d936ea 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java @@ -1,18 +1,18 @@ package liqp.filters.date.fuzzy.extractors; -import static liqp.LValue.isBlank; - import java.util.List; import java.util.regex.Matcher; import liqp.filters.date.fuzzy.LookupResult; import liqp.filters.date.fuzzy.Part; -import liqp.filters.date.fuzzy.Part.NewPart; import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; -import liqp.filters.date.fuzzy.Part.RecognizedPart; import liqp.filters.date.fuzzy.PartExtractor; public class MonthDateExtractor extends PartExtractor { + public MonthDateExtractor() { + super("MonthDateExtractor"); + } + enum Mode { SPACES_ONLY, /** @@ -24,6 +24,16 @@ enum Direction { LEFT, RIGHT } + + private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", + "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])[^,\\d;]+?$"); + private static final RegexPartExtractor leftDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", + "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\s+?$"); + private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", + "^[^,\\d;]+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); + private static final RegexPartExtractor rightDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", + "^\\s+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); + @Override public LookupResult extract(List parts) { // 1. find named month @@ -31,9 +41,9 @@ public LookupResult extract(List parts) { // then look both left and right for a day // comparing them (left vs right) by priority // so the situation like ' 11 december, 11 ' vs '11, december 11' will be resolved - int monthIndex = lookForNamedMonth(parts); + int monthIndex = getIndexByPartType(parts, RecognizedMonthNamePart.class); if (monthIndex == -1) { - return new LookupResult("MonthDateExtractor", parts, false); + return new LookupResult(this.name, parts, false); } LookupResult rightResult = rightDate(monthIndex, parts, Mode.SPACES_ONLY); @@ -55,15 +65,15 @@ public LookupResult extract(List parts) { if (rightResult.found && leftResult.found) { throw new IllegalArgumentException("Month have date candidates on both sides"); } - if (rightResult.found) { - return rightResult; - } if (leftResult.found) { return leftResult; } + if (rightResult.found) { + return rightResult; + } - return new LookupResult("MonthDateExtractor", parts, false); + return new LookupResult(this.name, parts, false); } private LookupResult leftDate(int monthIndex, List parts, Mode mode) { @@ -71,14 +81,14 @@ private LookupResult leftDate(int monthIndex, List parts, Mode mode) { int index = monthIndex - 1; while (index >= 0) { - LookupResult result = locateDate(parts, extractor, index); + LookupResult result = locatePart(parts, extractor, index); if (result != null) { return result; } index--; } - return new LookupResult("MonthDateExtractor", parts, false); + return new LookupResult(this.name, parts, false); } private LookupResult rightDate(int monthIndex, List parts, Mode mode) { @@ -86,32 +96,14 @@ private LookupResult rightDate(int monthIndex, List parts, Mode mode) { int index = monthIndex + 1; while (index < parts.size()) { - LookupResult result = locateDate(parts, extractor, index); + LookupResult result = locatePart(parts, extractor, index); if (result != null) { return result; } index++; } - return new LookupResult("MonthDateExtractor", parts, false); - } - - private LookupResult locateDate(List parts, RegexPartExtractor extractor, int index) { - Part part = parts.get(index); - if (part instanceof RecognizedPart) { - return new LookupResult("MonthDateExtractor", parts, false); - } - if (part instanceof NewPart) { - NewPart newPart = (NewPart) part; - String source = newPart.source(); - if (!isBlank(source) && extractor != null) { - PartExtractorResult leftResult = extractor.extract(source, parts, index); - if (leftResult.found) { - return getLookupResult(parts, index, leftResult); - } - } - } - return null; + return new LookupResult(this.name, parts, false); } private RegexPartExtractor getExtractorByModeAndDirection(Mode mode, Direction direction) { @@ -131,24 +123,6 @@ private RegexPartExtractor getExtractorByModeAndDirection(Mode mode, Direction d return null; } - private int lookForNamedMonth(List parts) { - for (int i = 0; i < parts.size(); i++) { - Part part = parts.get(i); - if (part instanceof RecognizedMonthNamePart) { - return i; - } - } - return -1; - } - - private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", - "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])[^,\\d;]+?$"); - private static final RegexPartExtractor leftDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", - "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\s+?$"); - private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", - "^[^,\\d;]+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); - private static final RegexPartExtractor rightDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", - "^\\s+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); private static class MonthDatePartExtractor extends RegexPartExtractor { public MonthDatePartExtractor(String name, String regex) { diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthNameExtractor.java similarity index 86% rename from src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java rename to src/main/java/liqp/filters/date/fuzzy/extractors/MonthNameExtractor.java index 07f65daf..7f4abb00 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthNameExtractor.java @@ -7,10 +7,11 @@ import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; -public class MonthExtractor extends PartExtractor { +public class MonthNameExtractor extends PartExtractor { private final List monthExtractors; - public MonthExtractor(Locale locale) { + public MonthNameExtractor(Locale locale) { + super("MonthNameExtractor"); this.monthExtractors = new ArrayList<>(); this.monthExtractors.add(new EnumExtractor("FullMonthExtractor", locale, "MMMM") { @Override @@ -28,7 +29,7 @@ protected String[] getEnumValues(Locale locale) { } @Override public PartExtractorResult extract(String source, List parts, int i) { - PartExtractorResult res = new PartExtractorResult("MonthDayExtractor"); + PartExtractorResult res = new PartExtractorResult(this.name); for (EnumExtractor monthExtractor : monthExtractors) { PartExtractorResult monthResult = monthExtractor.extract(source, parts, i); if (monthResult.found) { diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java index 73b00192..de8a0d87 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java @@ -4,10 +4,14 @@ import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; -public class PartExtractorDelegate extends PartExtractor { +public abstract class PartExtractorDelegate extends PartExtractor { protected PartExtractor delegate; + public PartExtractorDelegate(String name) { + super(name); + } + @Override public PartExtractorResult extract(String source, List parts, int i) { return delegate.extract(source, parts, i); diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java index 513651a9..0fd6867d 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java @@ -11,6 +11,7 @@ public class PartExtractorResult { public int end; public List formatterPatterns; public boolean isMonthName; + public boolean yearWithoutEra; public PartExtractorResult(String extractorName){ diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java index a7083137..87631b57 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java @@ -8,12 +8,11 @@ class RegexPartExtractor extends PartExtractor { - protected final String name; protected final Pattern pattern; protected final String formatterPattern; public RegexPartExtractor(String name, String regex, String formatterPattern) { - this.name = name; + super(name); this.pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); this.formatterPattern = formatterPattern; } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java b/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java index 34d4518e..58c670f5 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.LookupResult; import liqp.filters.date.fuzzy.Part; class YearWithEra extends RegexPartExtractor { @@ -14,6 +15,11 @@ public YearWithEra() { null); } + @Override + public LookupResult extract(List parts) { + return super.extract(parts); + } + @Override public PartExtractorResult extract(String source, List parts, int i) { Matcher matcher = pattern.matcher(source); diff --git a/src/test/java/liqp/filters/DateTest.java b/src/test/java/liqp/filters/DateTest.java index 7708ddc7..6778e887 100644 --- a/src/test/java/liqp/filters/DateTest.java +++ b/src/test/java/liqp/filters/DateTest.java @@ -240,32 +240,32 @@ public void test298InstantWhenEpochBeginAtUTC() { @Test public void testSupportedDateStrings() { String[] tests = { - "now", - "today", - "1 March", - "MAR", - "MARCH", - "2024 MAR", - "2 mar", - "2 MAR", - "march 2nd", - "MARCH 2", - "MARCH 2nd", - "MARCH 3RD", - "MARCH 3rD", - "MARCH 4th", - "MARCH 5th", - "MARCH 10th", - "2010-10-31", - "Aug 2000", - "Aug 31", - "Wed Nov 28 14:33:20 2001", - "Wed, 05 Oct 2011 22:26:12 -0400", - "Wed, 05 Oct 2011 02:26:12 GMT", - "Nov 29 14:33:20 2001", - "05 Oct 2011 22:26:12 -0400", - "06 Oct 2011 02:26:12 GMT", - "2011-10-05T22:26:12-04:00", +// "now", +// "today", +// "1 March", +// "MAR", +// "MARCH", +// "2024 MAR", +// "2 mar", +// "2 MAR", +// "march 2nd", +// "MARCH 2", +// "MARCH 2nd", +// "MARCH 3RD", +// "MARCH 3rD", +// "MARCH 4th", +// "MARCH 5th", +// "MARCH 10th", +// "2010-10-31", +// "Aug 2000", +// "Aug 31", +// "Wed Nov 28 14:33:20 2001", +// "Wed, 05 Oct 2011 22:26:12 -0400", +// "Wed, 05 Oct 2011 02:26:12 GMT", +// "Nov 29 14:33:20 2001", +// "05 Oct 2011 22:26:12 -0400", +// "06 Oct 2011 02:26:12 GMT", +// "2011-10-05T22:26:12-04:00", "0:00", "1:00", "01:00", diff --git a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java index 9c478c24..87ae0382 100644 --- a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java +++ b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedErrorsTest.java @@ -22,8 +22,7 @@ public class FuzzyDateParserParametrizedErrorsTest { @Parameterized.Parameters public static Collection data() { return Arrays.asList(new Object[][]{ - {null, " 11 december 11", RuntimeException.class}, - {null, " 11 -december- 11", RuntimeException.class}, + {null, " 11 -december, 11", RuntimeException.class}, {null, "december - monday 11 2024", RuntimeException.class}, // numbers should not be leftover }); } diff --git a/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java b/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java index debe787e..287df7fa 100644 --- a/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java +++ b/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java @@ -4,6 +4,7 @@ import java.time.ZoneOffset; import java.time.ZonedDateTime; +import java.util.concurrent.TimeUnit; import org.junit.Test; public class StandardsTest { @@ -58,24 +59,42 @@ public void testRFC822() { */ @Test public void testRFC1123() { - String[] samples = { + FuzzyDateParser parser = new FuzzyDateParser(); + + String[] samplesWithSeconds = { "Sun, 06 Nov 1994 08:49:37 GMT", "Sun, 6 Nov 1994 08:49:37 GMT", "Sun, 06 Nov 1994 8:49:37 GMT", - "Sun, 6 Nov 1994 8:49:37 GMT", + "Sun, 6 Nov 1994 8:49:37 GMT" + }; + + for (String sample : samplesWithSeconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + } + + String[] samplesWithoutSeconds = { "Sun, 06 Nov 1994 08:49 GMT", "Sun, 6 Nov 1994 08:49 GMT", "Sun, 06 Nov 1994 8:49 GMT", "Sun, 6 Nov 1994 8:49 GMT", + }; + + for (String sample : samplesWithoutSeconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 0, 0, ZoneOffset.UTC), datetime); + } + + String[] samplesWithoutTime = { "Sun, 06 Nov 1994", "Sun, 6 Nov 1994", }; - FuzzyDateParser parser = new FuzzyDateParser(); - for (String sample : samples) { + for (String sample : samplesWithoutTime) { ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); - assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 0, 0, 0, 0, ZoneOffset.UTC), datetime); } + } /** @@ -88,23 +107,39 @@ public void testRFC1123() { */ @Test public void testRFC2822() { - String[] samples = { + String[] samplesWithSeconds = { "Sun, 06 Nov 1994 08:49:37 +0000", "Sun, 6 Nov 1994 08:49:37 +0000", "Sun, 06 Nov 1994 8:49:37 +0000", "Sun, 6 Nov 1994 8:49:37 +0000", + }; + + FuzzyDateParser parser = new FuzzyDateParser(); + for (String sample : samplesWithSeconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + } + + String[] samplesWithoutSeconds = { "Sun, 06 Nov 1994 08:49 +0000", "Sun, 6 Nov 1994 08:49 +0000", "Sun, 06 Nov 1994 8:49 +0000", "Sun, 6 Nov 1994 8:49 +0000", + }; + + for (String sample : samplesWithoutSeconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 0, 0, ZoneOffset.UTC), datetime); + } + + String[] samplesWithoutTime = { "Sun, 06 Nov 1994 +0000", "Sun, 6 Nov 1994 +0000", }; - FuzzyDateParser parser = new FuzzyDateParser(); - for (String sample : samples) { + for (String sample : samplesWithoutTime) { ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); - assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 0, 0, 0, 0, ZoneOffset.UTC), datetime); } } @@ -122,23 +157,64 @@ public void testRFC2822() { */ @Test public void testRFC3339() { - String[] samples = { + FuzzyDateParser parser = new FuzzyDateParser(); + String[] samplesWithMilliseconds = { "1994-11-06T08:49:37.123Z", + "1994-11-06T08:49:37.123+00:00", + "1994-11-06T08:49:37.123-00:00", + "1994-11-06T08:49:37.123+0000", + "1994-11-06T08:49:37.123-0000", + "1994-11-06T08:49:37.123+00", + "1994-11-06T08:49:37.123-00", + "1994-11-06T08:49:37.123", + }; + + for (String sample : samplesWithMilliseconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + ZonedDateTime expected = ZonedDateTime.of(1994, 11, 6, 8, 49, 37, + ((int) TimeUnit.MILLISECONDS.toNanos(123L)), ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", expected, datetime); + } + + String[] samplesWithSeconds = { "1994-11-06T08:49:37Z", "1994-11-06 08:49:37Z", "1994-11-06T08:49:37+0000", "1994-11-06T08:49:37+00", "1994-11-06T08:49:37+00:00", - "1994-11-06T08:49:37", + "1994-11-06T08:49:37" + }; + + for (String sample : samplesWithSeconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + ZonedDateTime expected = ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", expected, datetime); + } + + String[] samplesWithoutSeconds = { + "1994-11-06T08:49Z", + "1994-11-06 08:49Z", + "1994-11-06T08:49+0000", + "1994-11-06T08:49+00", + "1994-11-06T08:49+00:00", "1994-11-06T08:49", - "1994-11-06T08", + }; + + for (String sample : samplesWithoutSeconds) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 0, 0, ZoneOffset.UTC), datetime); + } + + String[] samplesWithoutTime = { + "1994-11-06T08", // verified on ruby time parser - hours without minutes is not a "time" + "1994-11-06", + "1994-11-06Z", "1994-11-06", }; - FuzzyDateParser parser = new FuzzyDateParser(); - for (String sample : samples) { + for (String sample : samplesWithoutTime) { ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); - assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 0, 0, 0, 0, ZoneOffset.UTC), datetime); } }