diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index d31d52f..8c8b58f 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -33,9 +33,12 @@ private void addKeyword(String keyword) { } State currentState = this.rootState; for (Character character : keyword.toCharArray()) { + if (trieConfig.isCaseInsensitive()) { + character = Character.toLowerCase(character); + } currentState = currentState.addState(character); } - currentState.addEmit(keyword); + currentState.addEmit(trieConfig.isCaseInsensitive() ? keyword.toLowerCase() : keyword); } public Collection tokenize(String text) { diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index 063f177..f4d3a6c 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -106,7 +106,24 @@ public void ushersTest() { checkEmit(iterator.next(), 2, 5, "hers"); } - @Test + @Test + public void ushersTestWithCapitalKeywords() { + Trie trie = Trie.builder() + .caseInsensitive() + .addKeyword("HERS") + .addKeyword("HIS") + .addKeyword("SHE") + .addKeyword("HE") + .build(); + Collection emits = trie.parseText("ushers"); + assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator iterator = emits.iterator(); + checkEmit(iterator.next(), 2, 3, "he"); + checkEmit(iterator.next(), 1, 3, "she"); + checkEmit(iterator.next(), 2, 5, "hers"); + } + + @Test public void ushersTestFirstMatch() { Trie trie = Trie.builder() .addKeyword("hers")