-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
90ea4d4
commit ef743c9
Showing
9 changed files
with
214 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,26 @@ | ||
package liqp.filters.date.fuzzy; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.function.Supplier; | ||
import java.util.stream.Collectors; | ||
import liqp.filters.date.fuzzy.extractors.PartExtractorResult; | ||
|
||
public interface PartExtractor { | ||
|
||
PartExtractorResult extract(String source); | ||
|
||
default List<String> newList(String... el) { | ||
return Arrays.asList(el); | ||
} | ||
|
||
default List<String> appendToExisting(List<String> start, Supplier<List<String>> supplier) { | ||
if (start.isEmpty()) { | ||
return supplier.get(); | ||
} | ||
return start.stream() | ||
.flatMap(prefix -> supplier.get().stream().map(suffix -> prefix + suffix)) | ||
.collect(Collectors.toList()); | ||
} | ||
} |
159 changes: 159 additions & 0 deletions
159
src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
package liqp.filters.date.fuzzy.extractors; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import java.util.regex.Matcher; | ||
|
||
abstract class AnyYMDPatternExtractor extends RegexPartExtractor { | ||
|
||
public enum RuleType { | ||
Y, M, D, PUNCTUATION; | ||
} | ||
public static class RulePart { | ||
private final RuleType type; | ||
private final Integer[] length; | ||
private final String content; | ||
private RulePart(RuleType type, String content) { | ||
this.type = type; | ||
this.content = content; | ||
this.length = new Integer[0]; | ||
} | ||
|
||
private RulePart(RuleType type, Integer[] length) { | ||
this.type = type; | ||
this.length = length; | ||
this.content = null; | ||
} | ||
} | ||
|
||
static RulePart pp(String content) { | ||
return new RulePart(RuleType.PUNCTUATION, content); | ||
} | ||
static RulePart pY(Integer length) { | ||
return new RulePart(RuleType.Y, new Integer[]{length}); | ||
} | ||
static RulePart pY(Integer length1, Integer length2) { | ||
return new RulePart(RuleType.Y, new Integer[]{length1, length2}); | ||
} | ||
static RulePart pM() { | ||
return new RulePart(RuleType.M, (Integer[])null); | ||
} | ||
static RulePart pD() { | ||
return new RulePart(RuleType.D, (Integer[])null); | ||
} | ||
private final RulePart[] partsInOrder; | ||
protected AnyYMDPatternExtractor(RulePart... partsInOrder) { | ||
super(reconstructPattern(partsInOrder), null); | ||
this.partsInOrder = partsInOrder; | ||
} | ||
|
||
private static String reconstructPattern(RulePart[] partsInOrder) { | ||
StringBuilder sb = new StringBuilder("(?:^|.*?\\D)"); | ||
for (RulePart part : partsInOrder) { | ||
if (part.type == RuleType.PUNCTUATION) { | ||
sb.append(part.content); | ||
} else { | ||
if (part.type == RuleType.Y) { | ||
if (part.length == null || part.length.length == 0) { | ||
throw new IllegalArgumentException("Year part must have length"); | ||
} | ||
if (part.length.length == 1) { | ||
sb.append("(?<year>\\d{").append(part.length[0]).append("})"); | ||
} else { | ||
sb.append("(?<year>\\d{").append(part.length[0]).append("}|\\d{") | ||
.append(part.length[1]).append("})"); | ||
} | ||
} else if (part.type == RuleType.M) { | ||
sb.append("(?<month>0?[1-9]|1[0-2])"); | ||
} else if (part.type == RuleType.D) { | ||
sb.append("(?<day>0?[1-9]|[12][0-9]|3[01])"); | ||
} | ||
} | ||
} | ||
sb.append("(?:$|\\D.*?)"); | ||
return sb.toString(); | ||
} | ||
|
||
@Override | ||
public PartExtractorResult extract(String source) { | ||
Matcher matcher = pattern.matcher(source); | ||
if (matcher.find()) { | ||
PartExtractorResult result = new PartExtractorResult(); | ||
result.found = true; | ||
result.start = matcher.start(findFirstGroupName()); | ||
result.end = matcher.end(findLastGroupName()); | ||
result.formatterPatterns = getPatterns(matcher); | ||
return result; | ||
} | ||
return new PartExtractorResult(); | ||
} | ||
|
||
private String findLastGroupName() { | ||
List<RulePart> list = new ArrayList<>(); | ||
Collections.addAll(list, partsInOrder); | ||
Collections.reverse(list); | ||
Optional<RulePart> first = list | ||
.stream() | ||
.filter(p -> p.type != RuleType.PUNCTUATION) | ||
.findFirst(); | ||
return getNoGroupNameFound(first); | ||
} | ||
|
||
private String findFirstGroupName() { | ||
Optional<RulePart> first = Arrays.stream(partsInOrder) | ||
.filter(p -> p.type != RuleType.PUNCTUATION) | ||
.findFirst(); | ||
return getNoGroupNameFound(first); | ||
} | ||
|
||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") | ||
private static String getNoGroupNameFound(Optional<RulePart> first) { | ||
return first.map(e -> { | ||
switch (e.type) { | ||
case Y: | ||
return "year"; | ||
case M: | ||
return "month"; | ||
case D: | ||
default: | ||
return "day"; | ||
} | ||
}).map(String::toLowerCase) | ||
.orElseThrow(() -> new IllegalArgumentException("No group name found")); | ||
} | ||
|
||
protected List<String> getPatterns(Matcher matcher) { | ||
List<String> start = new ArrayList<>(); | ||
for (RulePart part : partsInOrder) { | ||
start = appendToExisting(start, () -> { | ||
if (part.type == RuleType.Y) { | ||
if (matcher.group("year").length() == 2) { | ||
return newList("yy"); | ||
} else { | ||
return newList("yyyy"); | ||
} | ||
} else if (part.type == RuleType.M) { | ||
if (matcher.group("month").length() == 1) { | ||
return newList("M", "MM"); | ||
} else { | ||
return newList("MM", "M"); | ||
} | ||
} else if (part.type == RuleType.D) { | ||
if (matcher.group("day").length() == 1) { | ||
return newList("d", "dd"); | ||
} else { | ||
return newList("dd", "d"); | ||
} | ||
} else if (part.type == RuleType.PUNCTUATION) { | ||
return Collections.singletonList(part.content); | ||
} | ||
return Collections.singletonList(""); | ||
}); | ||
} | ||
return start; | ||
} | ||
|
||
} |
49 changes: 2 additions & 47 deletions
49
src/main/java/liqp/filters/date/fuzzy/extractors/EnglishDMYPatternExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,7 @@ | ||
package liqp.filters.date.fuzzy.extractors; | ||
|
||
import java.util.regex.Matcher; | ||
|
||
class EnglishDMYPatternExtractor extends RegexPartExtractor { | ||
class EnglishDMYPatternExtractor extends AnyYMDPatternExtractor { | ||
public EnglishDMYPatternExtractor() { | ||
super("(?:^|.*?\\D)" | ||
+ "(?<day>0?[1-9]|[12][0-9]|3[01])" | ||
+ "/" | ||
+ "(?<month>0?[1-9]|1[0-2])" | ||
+ "/" | ||
+ "(?<year>\\d{2}|\\d{4})" | ||
+ "(?:$|\\D.*?)", null); | ||
} | ||
|
||
|
||
@Override | ||
public PartExtractorResult extract(String source) { | ||
Matcher matcher = pattern.matcher(source); | ||
if (matcher.find()) { | ||
PartExtractorResult result = new PartExtractorResult(); | ||
result.found = true; | ||
result.start = matcher.start("day"); | ||
result.end = matcher.end("year"); | ||
result.formatterPattern = getPattern(matcher); | ||
return result; | ||
} | ||
return new PartExtractorResult(); | ||
} | ||
|
||
private String getPattern(Matcher matcher) { | ||
StringBuilder sbfp = new StringBuilder(); | ||
if (matcher.group("day").length() == 1) { | ||
sbfp.append("d"); | ||
} else { | ||
sbfp.append("dd"); | ||
} | ||
sbfp.append("/"); | ||
if (matcher.group("month").length() == 1) { | ||
sbfp.append("M"); | ||
} else { | ||
sbfp.append("MM"); | ||
} | ||
sbfp.append("/"); | ||
if (matcher.group("year").length() == 2) { | ||
sbfp.append("yy"); | ||
} else { | ||
sbfp.append("yyyy"); | ||
} | ||
return sbfp.toString(); | ||
super(pD(), pp("/"), pM(), pp("/"), pY(2, 4)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
21 changes: 21 additions & 0 deletions
21
src/test/java/liqp/filters/date/fuzzy/extractors/EnglishDMYPatternExtractorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package liqp.filters.date.fuzzy.extractors; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
import java.util.Arrays; | ||
import org.junit.Test; | ||
|
||
public class EnglishDMYPatternExtractorTest{ | ||
@Test | ||
public void test() { | ||
EnglishDMYPatternExtractor extractor = new EnglishDMYPatternExtractor(); | ||
PartExtractorResult extract = extractor.extract(" 1/1/11 "); | ||
assertTrue(extract.found); | ||
assertEquals(2, extract.start); | ||
assertEquals(8, extract.end); | ||
assertEquals(4, extract.formatterPatterns.size()); | ||
assertEquals(Arrays.asList("d/M/yy", "d/MM/yy", "dd/M/yy", "dd/MM/yy"), | ||
extract.formatterPatterns); | ||
} | ||
} |