Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
vkhrystiuk-ks committed Dec 19, 2024
1 parent 90ea4d4 commit ef743c9
Show file tree
Hide file tree
Showing 9 changed files with 214 additions and 65 deletions.
7 changes: 0 additions & 7 deletions src/main/java/liqp/filters/date/fuzzy/LookupResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,10 @@ class LookupResult {

final List<Part> parts;
final boolean found;
final DatePatternRecognizingContext ctx;

LookupResult(List<Part> parts, boolean found) {
this.parts = parts;
this.found = found;
this.ctx = null;
}

LookupResult(List<Part> parts, boolean found, DatePatternRecognizingContext ctx) {
this.parts = parts;
this.found = found;
this.ctx = ctx;
}
}
18 changes: 18 additions & 0 deletions src/main/java/liqp/filters/date/fuzzy/PartExtractor.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
package liqp.filters.date.fuzzy;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import liqp.filters.date.fuzzy.extractors.PartExtractorResult;

public interface PartExtractor {

PartExtractorResult extract(String source);

default List<String> newList(String... el) {
return Arrays.asList(el);
}

default List<String> appendToExisting(List<String> start, Supplier<List<String>> supplier) {
if (start.isEmpty()) {
return supplier.get();
}
return start.stream()
.flatMap(prefix -> supplier.get().stream().map(suffix -> prefix + suffix))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package liqp.filters.date.fuzzy.extractors;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;

abstract class AnyYMDPatternExtractor extends RegexPartExtractor {

public enum RuleType {
Y, M, D, PUNCTUATION;
}
public static class RulePart {
private final RuleType type;
private final Integer[] length;
private final String content;
private RulePart(RuleType type, String content) {
this.type = type;
this.content = content;
this.length = new Integer[0];
}

private RulePart(RuleType type, Integer[] length) {
this.type = type;
this.length = length;
this.content = null;
}
}

static RulePart pp(String content) {
return new RulePart(RuleType.PUNCTUATION, content);
}
static RulePart pY(Integer length) {
return new RulePart(RuleType.Y, new Integer[]{length});
}
static RulePart pY(Integer length1, Integer length2) {
return new RulePart(RuleType.Y, new Integer[]{length1, length2});
}
static RulePart pM() {
return new RulePart(RuleType.M, (Integer[])null);
}
static RulePart pD() {
return new RulePart(RuleType.D, (Integer[])null);
}
private final RulePart[] partsInOrder;
protected AnyYMDPatternExtractor(RulePart... partsInOrder) {
super(reconstructPattern(partsInOrder), null);
this.partsInOrder = partsInOrder;
}

private static String reconstructPattern(RulePart[] partsInOrder) {
StringBuilder sb = new StringBuilder("(?:^|.*?\\D)");
for (RulePart part : partsInOrder) {
if (part.type == RuleType.PUNCTUATION) {
sb.append(part.content);
} else {
if (part.type == RuleType.Y) {
if (part.length == null || part.length.length == 0) {
throw new IllegalArgumentException("Year part must have length");
}
if (part.length.length == 1) {
sb.append("(?<year>\\d{").append(part.length[0]).append("})");
} else {
sb.append("(?<year>\\d{").append(part.length[0]).append("}|\\d{")
.append(part.length[1]).append("})");
}
} else if (part.type == RuleType.M) {
sb.append("(?<month>0?[1-9]|1[0-2])");
} else if (part.type == RuleType.D) {
sb.append("(?<day>0?[1-9]|[12][0-9]|3[01])");
}
}
}
sb.append("(?:$|\\D.*?)");
return sb.toString();
}

@Override
public PartExtractorResult extract(String source) {
Matcher matcher = pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult result = new PartExtractorResult();
result.found = true;
result.start = matcher.start(findFirstGroupName());
result.end = matcher.end(findLastGroupName());
result.formatterPatterns = getPatterns(matcher);
return result;
}
return new PartExtractorResult();
}

private String findLastGroupName() {
List<RulePart> list = new ArrayList<>();
Collections.addAll(list, partsInOrder);
Collections.reverse(list);
Optional<RulePart> first = list
.stream()
.filter(p -> p.type != RuleType.PUNCTUATION)
.findFirst();
return getNoGroupNameFound(first);
}

private String findFirstGroupName() {
Optional<RulePart> first = Arrays.stream(partsInOrder)
.filter(p -> p.type != RuleType.PUNCTUATION)
.findFirst();
return getNoGroupNameFound(first);
}

@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static String getNoGroupNameFound(Optional<RulePart> first) {
return first.map(e -> {
switch (e.type) {
case Y:
return "year";
case M:
return "month";
case D:
default:
return "day";
}
}).map(String::toLowerCase)
.orElseThrow(() -> new IllegalArgumentException("No group name found"));
}

protected List<String> getPatterns(Matcher matcher) {
List<String> start = new ArrayList<>();
for (RulePart part : partsInOrder) {
start = appendToExisting(start, () -> {
if (part.type == RuleType.Y) {
if (matcher.group("year").length() == 2) {
return newList("yy");
} else {
return newList("yyyy");
}
} else if (part.type == RuleType.M) {
if (matcher.group("month").length() == 1) {
return newList("M", "MM");
} else {
return newList("MM", "M");
}
} else if (part.type == RuleType.D) {
if (matcher.group("day").length() == 1) {
return newList("d", "dd");
} else {
return newList("dd", "d");
}
} else if (part.type == RuleType.PUNCTUATION) {
return Collections.singletonList(part.content);
}
return Collections.singletonList("");
});
}
return start;
}

}
Original file line number Diff line number Diff line change
@@ -1,52 +1,7 @@
package liqp.filters.date.fuzzy.extractors;

import java.util.regex.Matcher;

class EnglishDMYPatternExtractor extends RegexPartExtractor {
class EnglishDMYPatternExtractor extends AnyYMDPatternExtractor {
public EnglishDMYPatternExtractor() {
super("(?:^|.*?\\D)"
+ "(?<day>0?[1-9]|[12][0-9]|3[01])"
+ "/"
+ "(?<month>0?[1-9]|1[0-2])"
+ "/"
+ "(?<year>\\d{2}|\\d{4})"
+ "(?:$|\\D.*?)", null);
}


@Override
public PartExtractorResult extract(String source) {
Matcher matcher = pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult result = new PartExtractorResult();
result.found = true;
result.start = matcher.start("day");
result.end = matcher.end("year");
result.formatterPattern = getPattern(matcher);
return result;
}
return new PartExtractorResult();
}

private String getPattern(Matcher matcher) {
StringBuilder sbfp = new StringBuilder();
if (matcher.group("day").length() == 1) {
sbfp.append("d");
} else {
sbfp.append("dd");
}
sbfp.append("/");
if (matcher.group("month").length() == 1) {
sbfp.append("M");
} else {
sbfp.append("MM");
}
sbfp.append("/");
if (matcher.group("year").length() == 2) {
sbfp.append("yy");
} else {
sbfp.append("yyyy");
}
return sbfp.toString();
super(pD(), pp("/"), pM(), pp("/"), pY(2, 4));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public PartExtractorResult extract(String source) {
result.found = true;
result.start = matcher.start("year");
result.end = matcher.end("date");
result.formatterPattern = getPattern(matcher);
result.formatterPatterns = newList(getPattern(matcher));
return result;
}
return new PartExtractorResult();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,25 +47,27 @@ public PartExtractorResult extract(String source) {
}

r.start = m.start("hours");
String resPattern;
if (m.group("milliseconds") != null) {
int millisecondsLength = m.group("milliseconds").length();
r.end = m.end("milliseconds");
r.formatterPattern =
resPattern =
hourPart + ":mm:ss." + repeat("S", millisecondsLength);
} else if (m.group("seconds") != null) {
r.end = m.end("seconds");
r.formatterPattern = hourPart + ":mm:ss";
resPattern = hourPart + ":mm:ss";
} else if (m.group("minutes") != null) {
r.end = m.end("minutes");
r.formatterPattern = hourPart + ":mm";
resPattern = hourPart + ":mm";
} else {
r.end = m.end("hours");
r.formatterPattern = hourPart;
resPattern = hourPart;
}
if (hasAmPm) {
r.formatterPattern += ampmPart;
resPattern += ampmPart;
r.end = m.end("ampm");
}
r.formatterPatterns = newList(resPattern);
return r;
}
return new PartExtractorResult();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,23 @@ public PartExtractorResult extract(String source) {
PartExtractorResult result = new PartExtractorResult();
result.found = true;
result.start = matcher.start("year");
result.formatterPattern = repeat("y", matcher.group("year").length());
String resPattern = repeat("y", matcher.group("year").length());
String era = matcher.group("era");
if (!isBlank(era)) {
String eraSeparator = matcher.group("eraSeparator");
if (eraSeparator != null) {
result.formatterPattern += eraSeparator;
resPattern += eraSeparator;
}
result.end = matcher.end("era");
if (era.length() == 2) {
result.formatterPattern += "GG";
resPattern += "GG";
} else {
result.formatterPattern += "GGGG";
resPattern += "GGGG";
}
} else {
result.end = matcher.end("year");
}
result.formatterPatterns = newList(resPattern);
return result;
}
return new PartExtractorResult();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public void testTimeRegexp() {
assertTrue(result.found);
assertEquals( 1, result.start);
assertEquals( 6, result.end);
assertEquals("HH:mm", result.formatterPattern);
assertEquals("HH:mm", result.formatterPatterns.get(0));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package liqp.filters.date.fuzzy.extractors;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.util.Arrays;
import org.junit.Test;

public class EnglishDMYPatternExtractorTest{
@Test
public void test() {
EnglishDMYPatternExtractor extractor = new EnglishDMYPatternExtractor();
PartExtractorResult extract = extractor.extract(" 1/1/11 ");
assertTrue(extract.found);
assertEquals(2, extract.start);
assertEquals(8, extract.end);
assertEquals(4, extract.formatterPatterns.size());
assertEquals(Arrays.asList("d/M/yy", "d/MM/yy", "dd/M/yy", "dd/MM/yy"),
extract.formatterPatterns);
}
}

0 comments on commit ef743c9

Please sign in to comment.