-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathword_tokenizer_test.go
49 lines (40 loc) · 1.25 KB
/
word_tokenizer_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package sentences
import (
"reflect"
"testing"
)
func TestWordTokenizer(t *testing.T) {
t.Log("Starting word tokenizer suite of tests ...")
punctStrings := NewPunctStrings()
wordTokenizer := NewWordTokenizer(punctStrings)
tokenizeTest(t, wordTokenizer, "This is a test sentence", []string{
"This",
"is",
"a",
"test",
"sentence",
})
tokenizeTestOnlyPunct(t, wordTokenizer, "This is a test sentence?", []string{
"sentence?",
})
}
func tokenizeTest(t *testing.T, wordTokenizer WordTokenizer, actualText string, expected []string) {
actualTokens := wordTokenizer.Tokenize(actualText, false)
compareTokens(t, actualTokens, expected)
}
func tokenizeTestOnlyPunct(t *testing.T, wordTokenizer WordTokenizer, actualText string, expected []string) {
actualTokens := wordTokenizer.Tokenize(actualText, true)
compareTokens(t, actualTokens, expected)
}
func compareTokens(t *testing.T, actualTokens []*Token, expected []string) {
actual := make([]string, 0, len(actualTokens))
for _, token := range actualTokens {
actual = append(actual, token.Tok)
}
if !reflect.DeepEqual(actual, expected) {
t.Logf("%v", actualTokens)
t.Logf("Actual: %#v", actual)
t.Logf("Expected: %#v", expected)
t.Fatalf("Actual tokens do not match expected tokens")
}
}