From 666977d491810b810f4f959c8a7a134ef6949fbc Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Thu, 31 Oct 2024 15:38:14 +0200 Subject: [PATCH] Adding text_similarity_reranker retriever specification (#3057) (cherry picked from commit d78609196510eaa4149f6478db0a6f21614647a3) --- output/openapi/elasticsearch-openapi.json | 45 +++++- .../elasticsearch-serverless-openapi.json | 45 +++++- output/schema/schema-serverless.json | 129 +++++++++++++++--- output/schema/schema.json | 129 +++++++++++++++--- output/typescript/types.ts | 11 +- specification/_global/search/SearchRequest.ts | 4 +- specification/_types/Retriever.ts | 19 ++- 7 files changed, 331 insertions(+), 51 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 72331ec39b..28dc31448a 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -80706,6 +80706,9 @@ }, "rrf": { "$ref": "#/components/schemas/_types:RRFRetriever" + }, + "text_similarity_reranker": { + "$ref": "#/components/schemas/_types:TextSimilarityReranker" } }, "minProperties": 1, @@ -80732,10 +80735,6 @@ "sort": { "$ref": "#/components/schemas/_types:Sort" }, - "min_score": { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "type": "number" - }, "collapse": { "$ref": "#/components/schemas/_global.search._types:FieldCollapse" } @@ -80759,6 +80758,10 @@ } } ] + }, + "min_score": { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "type": "number" } } }, @@ -80831,6 +80834,40 @@ } ] }, + "_types:TextSimilarityReranker": { + "allOf": [ + { + "$ref": "#/components/schemas/_types:RetrieverBase" + }, + { + "type": "object", + "properties": { + "retriever": { + "$ref": "#/components/schemas/_types:RetrieverContainer" + }, + "rank_window_size": { + "description": "This value determines how many documents we will consider from the nested retriever.", + "type": "number" + }, + "inference_id": { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "type": "string" + }, + "inference_text": { + "description": "The text snippet used as the basis for similarity comparison", + "type": "string" + }, + "field": { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "type": "string" + } + }, + "required": [ + "retriever" + ] + } + ] + }, "search_application._types:SearchApplication": { "type": "object", "properties": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 13a9a94f64..fd27db6834 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -53274,6 +53274,9 @@ }, "rrf": { "$ref": "#/components/schemas/_types:RRFRetriever" + }, + "text_similarity_reranker": { + "$ref": "#/components/schemas/_types:TextSimilarityReranker" } }, "minProperties": 1, @@ -53300,10 +53303,6 @@ "sort": { "$ref": "#/components/schemas/_types:Sort" }, - "min_score": { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "type": "number" - }, "collapse": { "$ref": "#/components/schemas/_global.search._types:FieldCollapse" } @@ -53327,6 +53326,10 @@ } } ] + }, + "min_score": { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "type": "number" } } }, @@ -53399,6 +53402,40 @@ } ] }, + "_types:TextSimilarityReranker": { + "allOf": [ + { + "$ref": "#/components/schemas/_types:RetrieverBase" + }, + { + "type": "object", + "properties": { + "retriever": { + "$ref": "#/components/schemas/_types:RetrieverContainer" + }, + "rank_window_size": { + "description": "This value determines how many documents we will consider from the nested retriever.", + "type": "number" + }, + "inference_id": { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "type": "string" + }, + "inference_text": { + "description": "The text snippet used as the basis for similarity comparison", + "type": "string" + }, + "field": { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "type": "string" + } + }, + "required": [ + "retriever" + ] + } + ] + }, "search_application._types:SearchApplication": { "type": "object", "properties": { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index cf3d812d38..38e8c72950 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -36555,9 +36555,12 @@ }, { "availability": { - "serverless": {}, + "serverless": { + "stability": "stable" + }, "stack": { - "since": "8.14.0" + "since": "8.14.0", + "stability": "stable" } }, "description": "A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.", @@ -137975,9 +137978,21 @@ "namespace": "_types" } } + }, + { + "description": "A retriever that reranks the top documents based on a reranking model using the InferenceAPI", + "name": "text_similarity_reranker", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TextSimilarityReranker", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L26-L36", + "specLocation": "_types/Retriever.ts#L26-L38", "variants": { "kind": "container" } @@ -138043,18 +138058,6 @@ } } }, - { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "name": "min_score", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, { "description": "Collapses the top documents by a specified key into a single top document per key.", "name": "collapse", @@ -138068,7 +138071,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L43-L56" + "specLocation": "_types/Retriever.ts#L47-L58" }, { "kind": "interface", @@ -138103,9 +138106,21 @@ ], "kind": "union_of" } + }, + { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "name": "min_score", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L38-L41" + "specLocation": "_types/Retriever.ts#L40-L45" }, { "inherits": { @@ -138193,7 +138208,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L58-L71" + "specLocation": "_types/Retriever.ts#L60-L73" }, { "inherits": { @@ -138248,7 +138263,83 @@ } } ], - "specLocation": "_types/Retriever.ts#L73-L80" + "specLocation": "_types/Retriever.ts#L75-L82" + }, + { + "inherits": { + "type": { + "name": "RetrieverBase", + "namespace": "_types" + } + }, + "kind": "interface", + "name": { + "name": "TextSimilarityReranker", + "namespace": "_types" + }, + "properties": [ + { + "description": "The nested retriever which will produce the first-level results, that will later be used for reranking.", + "name": "retriever", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "RetrieverContainer", + "namespace": "_types" + } + } + }, + { + "description": "This value determines how many documents we will consider from the nested retriever.", + "name": "rank_window_size", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The text snippet used as the basis for similarity comparison", + "name": "inference_text", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "name": "field", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "_types/Retriever.ts#L84-L95" }, { "kind": "interface", diff --git a/output/schema/schema.json b/output/schema/schema.json index caa47b4ed9..1b3e646c9f 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -32231,9 +32231,12 @@ }, { "availability": { - "serverless": {}, + "serverless": { + "stability": "stable" + }, "stack": { - "since": "8.14.0" + "since": "8.14.0", + "stability": "stable" } }, "description": "A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.", @@ -45910,7 +45913,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L58-L71" + "specLocation": "_types/Retriever.ts#L60-L73" }, { "kind": "interface", @@ -47417,7 +47420,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L73-L80" + "specLocation": "_types/Retriever.ts#L75-L82" }, { "kind": "interface", @@ -47842,9 +47845,21 @@ } ] } + }, + { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "name": "min_score", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L38-L41" + "specLocation": "_types/Retriever.ts#L40-L45" }, { "kind": "interface", @@ -47888,9 +47903,21 @@ "namespace": "_types" } } + }, + { + "description": "A retriever that reranks the top documents based on a reranking model using the InferenceAPI", + "name": "text_similarity_reranker", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TextSimilarityReranker", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L26-L36", + "specLocation": "_types/Retriever.ts#L26-L38", "variants": { "kind": "container" } @@ -49526,18 +49553,6 @@ } } }, - { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "name": "min_score", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, { "description": "Collapses the top documents by a specified key into a single top document per key.", "name": "collapse", @@ -49551,7 +49566,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L43-L56" + "specLocation": "_types/Retriever.ts#L47-L58" }, { "kind": "interface", @@ -49842,6 +49857,82 @@ ], "specLocation": "_types/Knn.ts#L79-L82" }, + { + "kind": "interface", + "inherits": { + "type": { + "name": "RetrieverBase", + "namespace": "_types" + } + }, + "name": { + "name": "TextSimilarityReranker", + "namespace": "_types" + }, + "properties": [ + { + "description": "The nested retriever which will produce the first-level results, that will later be used for reranking.", + "name": "retriever", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "RetrieverContainer", + "namespace": "_types" + } + } + }, + { + "description": "This value determines how many documents we will consider from the nested retriever.", + "name": "rank_window_size", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The text snippet used as the basis for similarity comparison", + "name": "inference_text", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "name": "field", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "_types/Retriever.ts#L84-L95" + }, { "kind": "enum", "members": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index f570d05b57..a01793f46a 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -2697,12 +2697,14 @@ export interface Retries { export interface RetrieverBase { filter?: QueryDslQueryContainer | QueryDslQueryContainer[] + min_score?: float } export interface RetrieverContainer { standard?: StandardRetriever knn?: KnnRetriever rrf?: RRFRetriever + text_similarity_reranker?: TextSimilarityReranker } export type Routing = string @@ -2867,7 +2869,6 @@ export interface StandardRetriever extends RetrieverBase { search_after?: SortResults terminate_after?: integer sort?: Sort - min_score?: float collapse?: SearchFieldCollapse } @@ -2904,6 +2905,14 @@ export interface TextEmbedding { model_text: string } +export interface TextSimilarityReranker extends RetrieverBase { + retriever: RetrieverContainer + rank_window_size?: integer + inference_id?: string + inference_text?: string + field?: string +} + export type ThreadType = 'cpu' | 'wait' | 'block' | 'gpu' | 'mem' export type TimeOfDay = string diff --git a/specification/_global/search/SearchRequest.ts b/specification/_global/search/SearchRequest.ts index 9fec2b244d..b2719a6b29 100644 --- a/specification/_global/search/SearchRequest.ts +++ b/specification/_global/search/SearchRequest.ts @@ -428,8 +428,8 @@ export interface Request extends RequestBase { rescore?: Rescore | Rescore[] /** * A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn. - * @availability stack since=8.14.0 - * @availability serverless + * @availability stack since=8.14.0 stability=stable + * @availability serverless stability=stable */ retriever?: RetrieverContainer /** diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts index 5118033924..42d5c5f232 100644 --- a/specification/_types/Retriever.ts +++ b/specification/_types/Retriever.ts @@ -33,11 +33,15 @@ export class RetrieverContainer { knn?: KnnRetriever /** A retriever that produces top documents from reciprocal rank fusion (RRF). */ rrf?: RRFRetriever + /** A retriever that reranks the top documents based on a reranking model using the InferenceAPI */ + text_similarity_reranker?: TextSimilarityReranker } export class RetrieverBase { /** Query to filter the documents that can match. */ filter?: QueryContainer | QueryContainer[] + /** Minimum _score for matching documents. Documents with a lower _score are not included in the top documents. */ + min_score?: float } export class StandardRetriever extends RetrieverBase { @@ -49,8 +53,6 @@ export class StandardRetriever extends RetrieverBase { terminate_after?: integer /** A sort object that that specifies the order of matching documents. */ sort?: Sort - /** Minimum _score for matching documents. Documents with a lower _score are not included in the top documents. */ - min_score?: float /** Collapses the top documents by a specified key into a single top document per key. */ collapse?: FieldCollapse } @@ -78,3 +80,16 @@ export class RRFRetriever extends RetrieverBase { /** This value determines the size of the individual result sets per query. */ rank_window_size?: integer } + +export class TextSimilarityReranker extends RetrieverBase { + /** The nested retriever which will produce the first-level results, that will later be used for reranking. */ + retriever: RetrieverContainer + /** This value determines how many documents we will consider from the nested retriever. */ + rank_window_size?: integer + /** Unique identifier of the inference endpoint created using the inference API. */ + inference_id?: string + /** The text snippet used as the basis for similarity comparison */ + inference_text?: string + /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text */ + field?: string +}