From 1a27080e79e765b656e881871d5d79e939970b95 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 28 Oct 2024 09:42:27 +0100 Subject: [PATCH] ngram tokenizer backport --- output/openapi/elasticsearch-openapi.json | 5 +-- .../elasticsearch-serverless-openapi.json | 5 +-- output/schema/schema-serverless.json | 37 ++++++++++--------- output/schema/schema.json | 37 ++++++++++--------- output/typescript/types.ts | 6 +-- specification/_types/analysis/tokenizers.ts | 9 +++-- 6 files changed, 49 insertions(+), 50 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 6bd96fd9d5..8074c6e58d 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -58561,10 +58561,7 @@ } }, "required": [ - "type", - "max_gram", - "min_gram", - "token_chars" + "type" ] } ] diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 08d407d333..e0b2adce5f 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -39323,10 +39323,7 @@ } }, "required": [ - "type", - "max_gram", - "min_gram", - "token_chars" + "type" ] } ] diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index b259ce04f3..caa4532c2f 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -53207,7 +53207,7 @@ "name": "NoriDecompoundMode", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L74-L78" + "specLocation": "_types/analysis/tokenizers.ts#L77-L81" }, { "kind": "interface", @@ -79776,7 +79776,7 @@ "name": "TokenChar", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L46-L53" + "specLocation": "_types/analysis/tokenizers.ts#L49-L56" }, { "codegenNames": [ @@ -83059,7 +83059,7 @@ "name": "Tokenizer", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L119-L121", + "specLocation": "_types/analysis/tokenizers.ts#L122-L124", "type": { "items": [ { @@ -83086,7 +83086,7 @@ "name": "TokenizerDefinition", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L123-L141", + "specLocation": "_types/analysis/tokenizers.ts#L126-L144", "type": { "items": [ { @@ -83243,7 +83243,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L55-L59" + "specLocation": "_types/analysis/tokenizers.ts#L58-L62" }, { "kind": "interface", @@ -83370,7 +83370,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L61-L64" + "specLocation": "_types/analysis/tokenizers.ts#L64-L67" }, { "inherits": { @@ -83394,7 +83394,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L66-L68" + "specLocation": "_types/analysis/tokenizers.ts#L69-L71" }, { "inherits": { @@ -83418,7 +83418,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L70-L72" + "specLocation": "_types/analysis/tokenizers.ts#L73-L75" }, { "inherits": { @@ -83454,7 +83454,7 @@ }, { "name": "max_gram", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -83465,7 +83465,7 @@ }, { "name": "min_gram", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -83476,7 +83476,8 @@ }, { "name": "token_chars", - "required": true, + "required": false, + "serverDefault": [], "type": { "kind": "array_of", "value": { @@ -83489,7 +83490,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L38-L44" + "specLocation": "_types/analysis/tokenizers.ts#L38-L47" }, { "inherits": { @@ -83560,7 +83561,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L80-L86" + "specLocation": "_types/analysis/tokenizers.ts#L83-L89" }, { "inherits": { @@ -83666,7 +83667,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L88-L95" + "specLocation": "_types/analysis/tokenizers.ts#L91-L98" }, { "inherits": { @@ -83701,7 +83702,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L104-L107" + "specLocation": "_types/analysis/tokenizers.ts#L107-L110" }, { "inherits": { @@ -83736,7 +83737,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L109-L112" + "specLocation": "_types/analysis/tokenizers.ts#L112-L115" }, { "inherits": { @@ -83771,7 +83772,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L114-L117" + "specLocation": "_types/analysis/tokenizers.ts#L117-L120" }, { "inherits": { @@ -83932,7 +83933,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L97-L102" + "specLocation": "_types/analysis/tokenizers.ts#L100-L105" }, { "inherits": { diff --git a/output/schema/schema.json b/output/schema/schema.json index bb59892038..daf4bd89bd 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -65376,7 +65376,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L55-L59" + "specLocation": "_types/analysis/tokenizers.ts#L58-L62" }, { "inherits": { @@ -67129,7 +67129,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L61-L64" + "specLocation": "_types/analysis/tokenizers.ts#L64-L67" }, { "kind": "interface", @@ -67704,7 +67704,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L66-L68" + "specLocation": "_types/analysis/tokenizers.ts#L69-L71" }, { "inherits": { @@ -67836,7 +67836,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L70-L72" + "specLocation": "_types/analysis/tokenizers.ts#L73-L75" }, { "inherits": { @@ -68045,7 +68045,7 @@ }, { "name": "max_gram", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -68056,7 +68056,7 @@ }, { "name": "min_gram", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -68067,7 +68067,8 @@ }, { "name": "token_chars", - "required": true, + "required": false, + "serverDefault": [], "type": { "kind": "array_of", "value": { @@ -68080,7 +68081,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L38-L44" + "specLocation": "_types/analysis/tokenizers.ts#L38-L47" }, { "kind": "interface", @@ -68164,7 +68165,7 @@ "name": "NoriDecompoundMode", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L74-L78" + "specLocation": "_types/analysis/tokenizers.ts#L77-L81" }, { "inherits": { @@ -68273,7 +68274,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L80-L86" + "specLocation": "_types/analysis/tokenizers.ts#L83-L89" }, { "docId": "analysis-normalizers", @@ -68413,7 +68414,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L88-L95" + "specLocation": "_types/analysis/tokenizers.ts#L91-L98" }, { "kind": "interface", @@ -68726,7 +68727,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L97-L102" + "specLocation": "_types/analysis/tokenizers.ts#L100-L105" }, { "kind": "enum", @@ -69445,7 +69446,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L104-L107" + "specLocation": "_types/analysis/tokenizers.ts#L107-L110" }, { "inherits": { @@ -69956,7 +69957,7 @@ "name": "TokenChar", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L46-L53" + "specLocation": "_types/analysis/tokenizers.ts#L49-L56" }, { "codegenNames": [ @@ -70367,7 +70368,7 @@ "name": "Tokenizer", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L119-L121", + "specLocation": "_types/analysis/tokenizers.ts#L122-L124", "type": { "items": [ { @@ -70415,7 +70416,7 @@ "name": "TokenizerDefinition", "namespace": "_types.analysis" }, - "specLocation": "_types/analysis/tokenizers.ts#L123-L141", + "specLocation": "_types/analysis/tokenizers.ts#L126-L144", "type": { "items": [ { @@ -70617,7 +70618,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L109-L112" + "specLocation": "_types/analysis/tokenizers.ts#L112-L115" }, { "inherits": { @@ -70740,7 +70741,7 @@ } } ], - "specLocation": "_types/analysis/tokenizers.ts#L114-L117" + "specLocation": "_types/analysis/tokenizers.ts#L117-L120" }, { "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 73287f120a..76ddc4382c 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -4736,9 +4736,9 @@ export interface AnalysisNGramTokenFilter extends AnalysisTokenFilterBase { export interface AnalysisNGramTokenizer extends AnalysisTokenizerBase { type: 'ngram' custom_token_chars?: string - max_gram: integer - min_gram: integer - token_chars: AnalysisTokenChar[] + max_gram?: integer + min_gram?: integer + token_chars?: AnalysisTokenChar[] } export interface AnalysisNoriAnalyzer { diff --git a/specification/_types/analysis/tokenizers.ts b/specification/_types/analysis/tokenizers.ts index 322a0098a1..d61128b717 100644 --- a/specification/_types/analysis/tokenizers.ts +++ b/specification/_types/analysis/tokenizers.ts @@ -38,9 +38,12 @@ export class EdgeNGramTokenizer extends TokenizerBase { export class NGramTokenizer extends TokenizerBase { type: 'ngram' custom_token_chars?: string - max_gram: integer - min_gram: integer - token_chars: TokenChar[] + max_gram?: integer + min_gram?: integer + /** + * @server_default [] + */ + token_chars?: TokenChar[] } export enum TokenChar {