Skip to content

Commit

Permalink
feat: add english stemmer in es schema (#59)
Browse files Browse the repository at this point in the history
* feat: add english stemmer in es schema

* add test case

---------

Co-authored-by: anjali.agarwal <[email protected]>
  • Loading branch information
anjali9791 and anjaliagg9791 authored Oct 4, 2023
1 parent ec9201e commit 66aa534
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 1 deletion.
44 changes: 44 additions & 0 deletions internal/store/elasticsearch/es_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,50 @@ func TestElasticsearch(t *testing.T) {
analyzedTokens = append(analyzedTokens, tok.Token)
}

if reflect.DeepEqual(expectTokens, analyzedTokens) == false {
return fmt.Errorf("expected analyzer to tokenize %q as %v, was %v", textToAnalyze, expectTokens, analyzedTokens)
}
return nil
},
},
{
Title: "created index should be able to correctly identify stemmed tokens",
Service: daggerService,
Validate: func(esClient *store.Client, cli *elasticsearch.Client, indexName string) error {
textToAnalyze := "walking"
analyzerPath := fmt.Sprintf("/%s/_analyze", indexName)
analyzerPayload := fmt.Sprintf(`{"analyzer": "my_analyzer", "text": %q}`, textToAnalyze)

//nolint:noctx
req, err := http.NewRequest(http.MethodPost, analyzerPath, strings.NewReader(analyzerPayload))
if err != nil {
return fmt.Errorf("error creating analyzer request: %w", err)
}
req.Header.Add("content-type", "application/json")

res, err := cli.Perform(req)
if err != nil {
return fmt.Errorf("invoke analyzer: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return fmt.Errorf("elasticsearch returned non-200 response: %d", res.StatusCode)
}
var response struct {
Tokens []struct {
Token string `json:"token"`
} `json:"tokens"`
}
err = json.NewDecoder(res.Body).Decode(&response)
if err != nil {
return fmt.Errorf("error decoding response: %w", err)
}
expectTokens := []string{"walk"}
analyzedTokens := []string{}
for _, tok := range response.Tokens {
analyzedTokens = append(analyzedTokens, tok.Token)
}

if reflect.DeepEqual(expectTokens, analyzedTokens) == false {
return fmt.Errorf("expected analyzer to tokenize %q as %v, was %v", textToAnalyze, expectTokens, analyzedTokens)
}
Expand Down
8 changes: 7 additions & 1 deletion internal/store/elasticsearch/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ var indexSettingsTemplate = `{
"my_analyzer": {
"type": "custom",
"tokenizer": "my_tokenizer",
"filter": ["lowercase"]
"filter": ["lowercase", "english_stemmer"]
}
},
"filter": {
"english_stemmer": {
"type": "stemmer",
"name": "english"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "pattern",
Expand Down

0 comments on commit 66aa534

Please sign in to comment.