From ea88eb987ad66bab55f721127a5fafeb9af88a49 Mon Sep 17 00:00:00 2001 From: Crystark Date: Mon, 15 May 2017 16:02:02 +0200 Subject: [PATCH 1/2] #53 Allow to ack emits Also allow use of isOnlyWholeWords, isOnlyWholeWordsWhiteSpaceSeparated and isAllowOverlaps using a StatefulEmitHandler --- src/main/java/org/ahocorasick/trie/Trie.java | 14 ++++--- .../trie/handler/DefaultEmitHandler.java | 6 ++- .../ahocorasick/trie/handler/EmitHandler.java | 2 +- .../trie/handler/StatefulEmitHandler.java | 9 +++++ .../java/org/ahocorasick/trie/TrieTest.java | 40 ++++++++++++++++++- 5 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 0c6ece0..b618289 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -4,6 +4,7 @@ import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; +import org.ahocorasick.trie.handler.StatefulEmitHandler; import java.util.ArrayList; import java.util.Collection; @@ -103,10 +104,13 @@ private Token createMatch(Emit emit, String text) { return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); } - @SuppressWarnings("unchecked") public Collection parseText(final CharSequence text) { - final DefaultEmitHandler emitHandler = new DefaultEmitHandler(); - parseText(text, emitHandler); + return parseText(text, new DefaultEmitHandler()); + } + + @SuppressWarnings("unchecked") + public Collection parseText(final CharSequence text, final StatefulEmitHandler emitHandler) { + parseText(text, (EmitHandler) emitHandler); final List collectedEmits = emitHandler.getEmits(); @@ -281,8 +285,8 @@ private boolean storeEmits(final int position, final State currentState, final E // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { - emitHandler.emit(new Emit(position - emit.length() + 1, position, emit)); - emitted = true; + emitted = emitHandler.emit(new Emit(position - emit.length() + 1, position, emit)) || emitted; + if(emitted && trieConfig.isStopOnHit()) break; } } diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java index 4531f3d..0e9236f 100644 --- a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java +++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java @@ -5,15 +5,17 @@ import java.util.ArrayList; import java.util.List; -public class DefaultEmitHandler implements EmitHandler { +public class DefaultEmitHandler implements StatefulEmitHandler { private final List emits = new ArrayList<>(); @Override - public void emit(final Emit emit) { + public boolean emit(final Emit emit) { this.emits.add(emit); + return true; } + @Override public List getEmits() { return this.emits; } diff --git a/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java index 74fd71e..1332ec2 100644 --- a/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java +++ b/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java @@ -3,5 +3,5 @@ import org.ahocorasick.trie.Emit; public interface EmitHandler { - void emit(Emit emit); + boolean emit(Emit emit); } diff --git a/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java new file mode 100644 index 0000000..13cb20e --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java @@ -0,0 +1,9 @@ +package org.ahocorasick.trie.handler; + +import java.util.List; + +import org.ahocorasick.trie.Emit; + +public interface StatefulEmitHandler extends EmitHandler { + List getEmits(); +} diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index b529de4..c185924 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,6 +1,7 @@ package org.ahocorasick.trie; import org.ahocorasick.trie.handler.EmitHandler; +import org.ahocorasick.trie.handler.StatefulEmitHandler; import org.junit.Test; import java.util.ArrayList; @@ -97,9 +98,43 @@ public void ushersTestAndStopOnHit() { .stopOnHit() .build(); Collection emits = trie.parseText("ushers"); - assertEquals(2, emits.size()); // she @ 3, he @ 3, hers @ 5 + assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5 Iterator iterator = emits.iterator(); checkEmit(iterator.next(), 2, 3, "he"); + } + + @Test + public void ushersTestStopOnHitSkipOne() { + Trie trie = Trie.builder() + .addKeywords(PRONOUNS) + .stopOnHit() + .build(); + + StatefulEmitHandler testEmitHandler = new StatefulEmitHandler() { + private final List emits = new ArrayList<>(); + boolean first = true; + + @Override + public boolean emit(final Emit emit) { + if(first) { + // return false for the first element + first = false; + return false; + } + this.emits.add(emit); + return true; + } + + @Override + public List getEmits() { + return this.emits; + } + }; + + trie.parseText("ushers", testEmitHandler); + Collection emits = testEmitHandler.getEmits(); + assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator iterator = emits.iterator(); checkEmit(iterator.next(), 1, 3, "she"); } @@ -152,8 +187,9 @@ public void ushersTestByCallback() { EmitHandler emitHandler = new EmitHandler() { @Override - public void emit(Emit emit) { + public boolean emit(Emit emit) { emits.add(emit); + return true; } }; trie.parseText("ushers", emitHandler); From d9a10a475af11148184aaa67e807197cf1b3a9d4 Mon Sep 17 00:00:00 2001 From: robert-bor Date: Mon, 15 May 2017 20:51:58 +0200 Subject: [PATCH 2/2] #53 Added AbstractStatefulEmitHandler, test shows the example of usage. --- src/main/java/org/ahocorasick/trie/Trie.java | 4 +++- .../handler/AbstractStatefulEmitHandler.java | 21 +++++++++++++++++++ .../java/org/ahocorasick/trie/TrieTest.java | 10 +++------ 3 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index b618289..23ba9d1 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -286,7 +286,9 @@ private boolean storeEmits(final int position, final State currentState, final E if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { emitted = emitHandler.emit(new Emit(position - emit.length() + 1, position, emit)) || emitted; - if(emitted && trieConfig.isStopOnHit()) break; + if (emitted && trieConfig.isStopOnHit()) { + break; + } } } diff --git a/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java new file mode 100644 index 0000000..eaa170c --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java @@ -0,0 +1,21 @@ +package org.ahocorasick.trie.handler; + +import java.util.ArrayList; +import java.util.List; + +import org.ahocorasick.trie.Emit; + +public abstract class AbstractStatefulEmitHandler implements StatefulEmitHandler { + + private final List emits = new ArrayList<>(); + + public void addEmit(final Emit emit) { + this.emits.add(emit); + } + + @Override + public List getEmits() { + return this.emits; + } + +} diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index c185924..bf01589 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,5 +1,6 @@ package org.ahocorasick.trie; +import org.ahocorasick.trie.handler.AbstractStatefulEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; import org.ahocorasick.trie.handler.StatefulEmitHandler; import org.junit.Test; @@ -110,8 +111,7 @@ public void ushersTestStopOnHitSkipOne() { .stopOnHit() .build(); - StatefulEmitHandler testEmitHandler = new StatefulEmitHandler() { - private final List emits = new ArrayList<>(); + StatefulEmitHandler testEmitHandler = new AbstractStatefulEmitHandler() { boolean first = true; @Override @@ -121,14 +121,10 @@ public boolean emit(final Emit emit) { first = false; return false; } - this.emits.add(emit); + addEmit(emit); return true; } - @Override - public List getEmits() { - return this.emits; - } }; trie.parseText("ushers", testEmitHandler);