diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index 1f114e9ad9ed6..b1d1d5a751057 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -624,3 +624,123 @@ Which results in: } ] } -------------------------------------------------- + +[[analysis-kuromoji-hiragana-uppercase]] +==== `hiragana_uppercase` token filter + +The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters. +This filter is useful if you want to search against old style Japanese text such as +patents, legal documents, contract policies, etc. + +For example: + +[source,console] +-------------------------------------------------- +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "hiragana_uppercase" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "ちょっとまって" +} +-------------------------------------------------- + +Which results in: + +[source,console-result] +-------------------------------------------------- +{ + "tokens": [ + { + "token": "ちよつと", + "start_offset": 0, + "end_offset": 4, + "type": "word", + "position": 0 + }, + { + "token": "まつ", + "start_offset": 4, + "end_offset": 6, + "type": "word", + "position": 1 + }, + { + "token": "て", + "start_offset": 6, + "end_offset": 7, + "type": "word", + "position": 2 + } + ] +} +-------------------------------------------------- + +[[analysis-kuromoji-katakana-uppercase]] +==== `katakana_uppercase` token filter + +The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters. +This filter is useful if you want to search against old style Japanese text such as +patents, legal documents, contract policies, etc. + +For example: + +[source,console] +-------------------------------------------------- +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "katakana_uppercase" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "ストップウォッチ" +} +-------------------------------------------------- + +Which results in: + +[source,console-result] +-------------------------------------------------- +{ + "tokens": [ + { + "token": "ストツプウオツチ", + "start_offset": 0, + "end_offset": 8, + "type": "word", + "position": 0 + } + ] +} +--------------------------------------------------