From eb72f5fe2a912878b1b2ad8a55b24b1969d8d5f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Thu, 1 Aug 2024 14:11:55 +0200
Subject: [PATCH 1/5] Adds inference API steps.

---
 docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc | 59 ++++++++++++++++++----
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
index cf5c3022b..5fff7e801 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
@@ -105,8 +105,37 @@ that walks through upgrading an index to ELSER V2.
 [[download-deploy-elser]]
 == Download and deploy ELSER
 
-You can download and deploy ELSER either from **{ml-app}** > **Trained Models**, 
-from **Search** > **Indices**, or by using the Dev Console.
+The easiest and recommended way to download and deploy ELSER is to use the {ref}/inference-apis.html[{infer} API].
+
+1. In {kib}, navigate to the **Dev Console**.
+2. Create an {infer} endpoint with the ELSER service by running the following API request:
++
+--
+[source,console]
+----------------------------------
+PUT _inference/sparse_embedding/my-elser-model
+{
+  "service": "elser",
+  "service_settings": {
+    "adaptive_allocations": {
+      "enabled": true,
+      "min_number_of_allocations": 2,
+      "max_number_of_allocations": 5
+    }
+  }
+}
+----------------------------------
+
+The API request automatically initiates the model download and then deploy the model with adaptive allocations enabled.
+
+Refer to the {ref}/infer-service-elser.html[ELSER {infer} service documentation] to learn more about the available settings.
+
+
+[discrete]
+[[alternative-download-deploy]]
+=== Alternative methods to download and deploy ELSER
+
+You can also download and deploy ELSER either from **{ml-app}** > **Trained Models**, from **Search** > **Indices**, or by using the trained models API in Dev Console.
 
 [NOTE]
 ====
@@ -120,10 +149,12 @@ separate deployments for search and ingest mitigates performance issues
 resulting from interactions between the two, which can be hard to diagnose.
 ====
 
-
+.Using the Trained Models page
+[%collapsible%closed]
+=====
 [discrete]
 [[trained-model]]
-=== Using the Trained Models page
+==== Using the Trained Models page
 
 1. In {kib}, navigate to **{ml-app}** > **Trained Models**. ELSER can be found 
 in the list of trained models. There are two versions available: one portable 
@@ -154,11 +185,14 @@ allocations and threads per allocation values.
 image::images/ml-nlp-deployment-id-elser-v2.png[alt="Deploying ELSER",align="center"]
 --
 5. Click **Start**.
+=====
 
-
+.Using the search indices UI
+[%collapsible%closed]
+=====
 [discrete]
 [[elasticsearch]]
-=== Using the search indices UI
+==== Using the search indices UI
 
 Alternatively, you can download and deploy ELSER to an {infer} pipeline using 
 the search indices UI.
@@ -191,7 +225,7 @@ pipeline.
 
 [discrete]
 [[elasticsearch-ingest-pipeline]]
-==== Adding ELSER to an ingest pipeline
+===== Adding ELSER to an ingest pipeline
 
 To add ELSER to an ingest pipeline, you need to copy the default ingest 
 pipeline and then customize it according to your needs.
@@ -216,11 +250,14 @@ mappings.
 
 Once your pipeline is created, you are ready to ingest documents and utilize 
 ELSER for text expansions in your search queries.
+=====
 
-
+.Using the traned models API in Dev Console
+[%collapsible%closed]
+=====
 [discrete]
 [[dev-console]]
-=== Using the Dev Console
+==== Using the trained models API in Dev Console
 
 1. In {kib}, navigate to the **Dev Console**.
 2. Create the ELSER model configuration by running the following API call:
@@ -254,6 +291,7 @@ You can deploy the model multiple times with different deployment IDs.
 
 After the deployment is complete, ELSER is ready to use either in an ingest 
 pipeline or in a `text_expansion` query to perform semantic search.
+=====
 
 
 [discrete]
@@ -440,7 +478,7 @@ To learn more about ELSER performance, refer to the <<elser-benchmarks>>.
 * {ref}/semantic-search-elser.html[Perform semantic search with ELSER]
 * https://www.elastic.co/blog/may-2023-launch-information-retrieval-elasticsearch-ai-model[Improving information retrieval in the Elastic Stack: Introducing Elastic Learned Sparse Encoder, our new retrieval model]
 
-
+[discrete]
 [[elser-benchmarks]]
 == Benchmark information
 
@@ -459,6 +497,7 @@ any platform.
 
 
 [discrete]
+[[version-overview-v2]]
 ==== ELSER V2
 
 Besides the performance improvements, the biggest change in ELSER V2 is the 

From f2d2f13764fc5cde0b6167f5038de6fcea7d1f4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Thu, 1 Aug 2024 16:25:14 +0200
Subject: [PATCH 2/5] Makes inference endpoint the primary way to download and
 deploy ELSER and E5.

---
 docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc    | 65 +++++++++++++++++++---
 docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc | 46 ++++-----------
 2 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
index f1550f93a..2252c134a 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
@@ -21,7 +21,11 @@ contextual meaning and user intent, rather than exact keyword matches.
 E5 has two versions: one cross-platform version which runs on any hardware 
 and one version which is optimized for Intel® silicon. The 
 **Model Management** > **Trained Models** page shows you which version of E5 is 
-recommended to deploy based on your cluster's hardware.
+recommended to deploy based on your cluster's hardware. However, the
+recommended way to use E5 is through the 
+{ref}/infer-service-elasticsearch.html[{infer} API] as a service which makes it
+easier to download and deploy the model and you don't need to select from
+different versions. 
 
 Refer to the model cards of the 
 https://huggingface.co/elastic/multilingual-e5-small[multilingual-e5-small] and 
@@ -42,17 +46,51 @@ for semantic search or the trial period activated.
 [[download-deploy-e5]]
 == Download and deploy E5
 
-You can download and deploy the E5 model either from 
-**{ml-app}** > **Trained Models**, from **Search** > **Indices**, or by using 
-the Dev Console.
+The easiest and recommended way to download and deploy E5 is to use the {ref}/inference-apis.html[{infer} API].
 
-NOTE: For most cases, the preferred version is the **Intel and Linux optimized**
-model, it is recommended to download and deploy that version.
+1. In {kib}, navigate to the **Dev Console**.
+2. Create an {infer} endpoint with the `elasticsearch` service by running the following API request:
++
+--
+[source,console]
+----------------------------------
+PUT _inference/text_embedding/my-e5-model
+{
+  "service": "elasticsearch",
+  "service_settings": {
+    "adaptive_allocations": {
+      "enabled": true,
+      "min_number_of_allocations": 2,
+      "max_number_of_allocations": 5
+    },
+    "model_id": ".multilingual-e5-small"
+  }
+}
+----------------------------------
+
+The API request automatically initiates the model download and then deploy the model with adaptive allocations enabled.
+
+Refer to the {ref}/infer-service-elasticsearch.html[`elasticsearch` {infer} service documentation] to learn more about the available settings.
+
+After you created the E5 {infer} endpoint, it's ready to be used for semantic search.
+The easiest way to perform semantic search in the {stack} is to {ref}/semantic-search-semantic-text.html[follow the `semantic_text` workflow].
+
+
+[discrete]
+[[alternative-download-deploy-e5]]
+=== Alternative methods to download and deploy E5
+
+You can also download and deploy the E5 model either from **{ml-app}** > **Trained Models**, from **Search** > **Indices**, or by using the trained models API in Dev Console.
+
+NOTE: For most cases, the preferred version is the **Intel and Linux optimized** model, it is recommended to download and deploy that version.
 
 
+.Using the Trained Models page
+[%collapsible%closed]
+=====
 [discrete]
 [[trained-model-e5]]
-=== Using the Trained Models page
+==== Using the Trained Models page
 
 1. In {kib}, navigate to **{ml-app}** > **Trained Models**. E5 can be found in 
 the list of trained models. There are two versions available: one portable 
@@ -83,11 +121,15 @@ allocations and threads per allocation values.
 image::images/ml-nlp-deployment-id-e5.png[alt="Deploying ELSER",align="center"]
 --
 5. Click Start.
+=====
 
 
+.Using the search indices UI
+[%collapsible%closed]
+=====
 [discrete]
 [[elasticsearch-e5]]
-=== Using the search indices UI
+==== Using the search indices UI
 
 Alternatively, you can download and deploy the E5 model to an {infer} pipeline 
 using the search indices UI.
@@ -116,11 +158,15 @@ image::images/ml-nlp-start-e5-es.png[alt="Start E5 in Elasticsearch",align="cent
 
 When your E5 model is deployed and started, it is ready to be used in a 
 pipeline.
+=====
 
 
+.Using the traned models API in Dev Console
+[%collapsible%closed]
+=====
 [discrete]
 [[dev-console-e5]]
-=== Using the Dev Console
+==== Using the traned models API in Dev Console
 
 1. In {kib}, navigate to the **Dev Console**.
 2. Create the E5 model configuration by running the following API call:
@@ -149,6 +195,7 @@ with a delpoyment ID:
 POST _ml/trained_models/.multilingual-e5-small/deployment/_start?deployment_id=for_search
 ----------------------------------
 --
+=====
 
 
 [discrete]
diff --git a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
index 5fff7e801..559e175ad 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
@@ -80,7 +80,11 @@ computing the similarity between a query and a document.
 ELSER v2 has two versions: one cross-platform version which runs on any hardware 
 and one version which is optimized for Intel® silicon. The 
 **Model Management** > **Trained Models** page shows you which version of ELSER 
-v2 is recommended to deploy based on your cluster's hardware.
+v2 is recommended to deploy based on your cluster's hardware. However, the
+recommended way to use ELSER is through the 
+{ref}/infer-service-elser.html[{infer} API] as a service which makes it easier
+to download and deploy the model and you don't need to select from different 
+versions. 
 
 If you want to learn more about the ELSER V2 improvements, refer to 
 https://www.elastic.co/search-labs/introducing-elser-v2-part-1[this blog post].
@@ -130,6 +134,9 @@ The API request automatically initiates the model download and then deploy the m
 
 Refer to the {ref}/infer-service-elser.html[ELSER {infer} service documentation] to learn more about the available settings.
 
+After you created the ELSER {infer} endpoint, it's ready to be used for semantic search.
+The easiest way to perform semantic search in the {stack} is to {ref}/semantic-search-semantic-text.html[follow the `semantic_text` workflow].
+
 
 [discrete]
 [[alternative-download-deploy]]
@@ -218,38 +225,6 @@ model deployment.
 [role="screenshot"]
 image::images/ml-nlp-start-elser-v2-es.png[alt="Start ELSER in Elasticsearch",align="center"]
 --
-
-When your ELSER model is deployed and started, it is ready to be used in a 
-pipeline.
-
-
-[discrete]
-[[elasticsearch-ingest-pipeline]]
-===== Adding ELSER to an ingest pipeline
-
-To add ELSER to an ingest pipeline, you need to copy the default ingest 
-pipeline and then customize it according to your needs.
-
-1. Click **Copy and customize** under the **Unlock your custom pipelines** block 
-at the top of the page. This enables the **Add inference pipeline** button.
-+
---
-[role="screenshot"]
-image::images/ml-nlp-pipeline-copy-customize.png[alt="Start ELSER in Elasticsearch",align="center"]
---
-2. Under **{ml-app} {infer-cap} Pipelines**, click **Add inference pipeline**.
-3. Give a name to the pipeline, select ELSER from the list of trained ML models, 
-and click **Continue**.
-4. Select the source text field, define the target field, and click **Add** then 
-**Continue**.
-5. Review the index mappings updates. Click **Back** if you want to change the 
-mappings. Click **Continue** if you are satisfied with the updated index 
-mappings.
-6. You can optionally test your pipeline. Click **Continue**.
-7. **Create pipeline**.
-
-Once your pipeline is created, you are ready to ingest documents and utilize 
-ELSER for text expansions in your search queries.
 =====
 
 .Using the traned models API in Dev Console
@@ -288,9 +263,6 @@ POST _ml/trained_models/.elser_model_2/deployment/_start?deployment_id=for_searc
 
 You can deploy the model multiple times with different deployment IDs.
 --
-
-After the deployment is complete, ELSER is ready to use either in an ingest 
-pipeline or in a `text_expansion` query to perform semantic search.
 =====
 
 
@@ -482,6 +454,8 @@ To learn more about ELSER performance, refer to the <<elser-benchmarks>>.
 [[elser-benchmarks]]
 == Benchmark information
 
+IMPORTANT: The recommended way to use ELSER is through the {ref}/infer-service-elser.html[{infer} API] as a service. 
+
 The following sections provide information about how ELSER performs on different 
 hardwares and compares the model performance to {es} BM25 and other strong 
 baselines.

From 1a5cea63533044b445cd7e9c1c249dc2faabb59d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Thu, 1 Aug 2024 16:59:58 +0200
Subject: [PATCH 3/5] Fixes block.

---
 docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc    | 2 +-
 docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
index 2252c134a..cd7b9ae53 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
@@ -67,7 +67,7 @@ PUT _inference/text_embedding/my-e5-model
   }
 }
 ----------------------------------
-
+--
 The API request automatically initiates the model download and then deploy the model with adaptive allocations enabled.
 
 Refer to the {ref}/infer-service-elasticsearch.html[`elasticsearch` {infer} service documentation] to learn more about the available settings.
diff --git a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
index 559e175ad..79149cc0e 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
@@ -129,7 +129,7 @@ PUT _inference/sparse_embedding/my-elser-model
   }
 }
 ----------------------------------
-
+--
 The API request automatically initiates the model download and then deploy the model with adaptive allocations enabled.
 
 Refer to the {ref}/infer-service-elser.html[ELSER {infer} service documentation] to learn more about the available settings.

From 39661b635b5a796aa50b071b311d76abb1b8e7e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Fri, 2 Aug 2024 10:11:07 +0200
Subject: [PATCH 4/5] Fixes typo.

---
 docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
index cd7b9ae53..642a39801 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
@@ -118,7 +118,7 @@ allocations and threads per allocation values.
 +
 --
 [role="screenshot"]
-image::images/ml-nlp-deployment-id-e5.png[alt="Deploying ELSER",align="center"]
+image::images/ml-nlp-deployment-id-e5.png[alt="Deploying E5",align="center"]
 --
 5. Click Start.
 =====

From e11530e8bcde5a2b89eaff8f8dd1571f03f174c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Fri, 2 Aug 2024 14:45:09 +0200
Subject: [PATCH 5/5] [DOCS] Replaces adaptive allocations settings.

---
 docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc    | 9 +++------
 docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc | 9 +++------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
index 642a39801..e23997b6c 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-e5.asciidoc
@@ -58,17 +58,14 @@ PUT _inference/text_embedding/my-e5-model
 {
   "service": "elasticsearch",
   "service_settings": {
-    "adaptive_allocations": {
-      "enabled": true,
-      "min_number_of_allocations": 2,
-      "max_number_of_allocations": 5
-    },
+    "num_allocations": 1,
+    "num_threads": 1,
     "model_id": ".multilingual-e5-small"
   }
 }
 ----------------------------------
 --
-The API request automatically initiates the model download and then deploy the model with adaptive allocations enabled.
+The API request automatically initiates the model download and then deploy the model.
 
 Refer to the {ref}/infer-service-elasticsearch.html[`elasticsearch` {infer} service documentation] to learn more about the available settings.
 
diff --git a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
index 79149cc0e..007ba5946 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
@@ -121,16 +121,13 @@ PUT _inference/sparse_embedding/my-elser-model
 {
   "service": "elser",
   "service_settings": {
-    "adaptive_allocations": {
-      "enabled": true,
-      "min_number_of_allocations": 2,
-      "max_number_of_allocations": 5
-    }
+    "num_allocations": 1,
+    "num_threads": 1
   }
 }
 ----------------------------------
 --
-The API request automatically initiates the model download and then deploy the model with adaptive allocations enabled.
+The API request automatically initiates the model download and then deploy the model.
 
 Refer to the {ref}/infer-service-elser.html[ELSER {infer} service documentation] to learn more about the available settings.