diff --git a/Cargo.lock b/Cargo.lock
index 134036f6..7d449f0d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2071,9 +2071,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.21"
+version = "0.4.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
 
 [[package]]
 name = "macro_rules_attribute"
diff --git a/docs/openapi.json b/docs/openapi.json
index 7368145e..d2f36301 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -10,7 +10,7 @@
       "name": "Apache 2.0",
       "url": "https://www.apache.org/licenses/LICENSE-2.0"
     },
-    "version": "1.2.3"
+    "version": "1.3.0"
   },
   "paths": {
     "/decode": {
@@ -19,7 +19,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Decode input ids",
-        "description": "Decode input ids",
         "operationId": "decode",
         "requestBody": {
           "content": {
@@ -65,7 +64,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Get Embeddings. Returns a 424 status code if the model is not an embedding model.",
-        "description": "Get Embeddings. Returns a 424 status code if the model is not an embedding model.",
         "operationId": "embed",
         "requestBody": {
           "content": {
@@ -153,7 +151,7 @@
           "Text Embeddings Inference"
         ],
         "summary": "Get all Embeddings without Pooling.",
-        "description": "Get all Embeddings without Pooling.\nReturns a 424 status code if the model is not an embedding model.",
+        "description": "Returns a 424 status code if the model is not an embedding model.",
         "operationId": "embed_all",
         "requestBody": {
           "content": {
@@ -241,7 +239,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Get Sparse Embeddings. Returns a 424 status code if the model is not an embedding model with SPLADE pooling.",
-        "description": "Get Sparse Embeddings. Returns a 424 status code if the model is not an embedding model with SPLADE pooling.",
         "operationId": "embed_sparse",
         "requestBody": {
           "content": {
@@ -323,101 +320,12 @@
         }
       }
     },
-    "/embeddings": {
-      "post": {
-        "tags": [
-          "Text Embeddings Inference"
-        ],
-        "summary": "OpenAI compatible route. Returns a 424 status code if the model is not an embedding model.",
-        "description": "OpenAI compatible route. Returns a 424 status code if the model is not an embedding model.",
-        "operationId": "openai_embed",
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/OpenAICompatRequest"
-              }
-            }
-          },
-          "required": true
-        },
-        "responses": {
-          "200": {
-            "description": "Embeddings",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/OpenAICompatResponse"
-                }
-              }
-            }
-          },
-          "413": {
-            "description": "Batch size error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
-                },
-                "example": {
-                  "message": "Batch size error",
-                  "type": "validation"
-                }
-              }
-            }
-          },
-          "422": {
-            "description": "Tokenization error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
-                },
-                "example": {
-                  "message": "Tokenization error",
-                  "type": "tokenizer"
-                }
-              }
-            }
-          },
-          "424": {
-            "description": "Embedding Error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
-                },
-                "example": {
-                  "message": "Inference failed",
-                  "type": "backend"
-                }
-              }
-            }
-          },
-          "429": {
-            "description": "Model is overloaded",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
-                },
-                "example": {
-                  "message": "Model is overloaded",
-                  "type": "overloaded"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
     "/health": {
       "get": {
         "tags": [
           "Text Embeddings Inference"
         ],
         "summary": "Health check method",
-        "description": "Health check method",
         "operationId": "health",
         "responses": {
           "200": {
@@ -446,7 +354,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Text Embeddings Inference endpoint info",
-        "description": "Text Embeddings Inference endpoint info",
         "operationId": "get_model_info",
         "responses": {
           "200": {
@@ -468,7 +375,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Prometheus metrics scrape endpoint",
-        "description": "Prometheus metrics scrape endpoint",
         "operationId": "metrics",
         "responses": {
           "200": {
@@ -490,7 +396,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Get Predictions. Returns a 424 status code if the model is not a Sequence Classification model",
-        "description": "Get Predictions. Returns a 424 status code if the model is not a Sequence Classification model",
         "operationId": "predict",
         "requestBody": {
           "content": {
@@ -578,7 +483,7 @@
           "Text Embeddings Inference"
         ],
         "summary": "Get Ranks. Returns a 424 status code if the model is not a Sequence Classification model with",
-        "description": "Get Ranks. Returns a 424 status code if the model is not a Sequence Classification model with\na single class.",
+        "description": "a single class.",
         "operationId": "rerank",
         "requestBody": {
           "content": {
@@ -666,7 +571,6 @@
           "Text Embeddings Inference"
         ],
         "summary": "Tokenize inputs",
-        "description": "Tokenize inputs",
         "operationId": "tokenize",
         "requestBody": {
           "content": {
@@ -706,19 +610,18 @@
         }
       }
     },
-    "/vertex": {
+    "/v1/embeddings": {
       "post": {
         "tags": [
           "Text Embeddings Inference"
         ],
-        "summary": "Generate embeddings from a Vertex request",
-        "description": "Generate embeddings from a Vertex request",
-        "operationId": "vertex_compatibility",
+        "summary": "OpenAI compatible route. Returns a 424 status code if the model is not an embedding model.",
+        "operationId": "openai_embed",
         "requestBody": {
           "content": {
             "application/json": {
               "schema": {
-                "$ref": "#/components/schemas/VertexRequest"
+                "$ref": "#/components/schemas/OpenAICompatRequest"
               }
             }
           },
@@ -726,18 +629,25 @@
         },
         "responses": {
           "200": {
-            "description": "Results"
+            "description": "Embeddings",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/OpenAICompatResponse"
+                }
+              }
+            }
           },
           "413": {
             "description": "Batch size error",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
+                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
                 },
                 "example": {
-                  "error": "Batch size error",
-                  "error_type": "validation"
+                  "message": "Batch size error",
+                  "type": "validation"
                 }
               }
             }
@@ -747,25 +657,25 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
+                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
                 },
                 "example": {
-                  "error": "Tokenization error",
-                  "error_type": "tokenizer"
+                  "message": "Tokenization error",
+                  "type": "tokenizer"
                 }
               }
             }
           },
           "424": {
-            "description": "Error",
+            "description": "Embedding Error",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
+                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
                 },
                 "example": {
-                  "error": "Inference failed",
-                  "error_type": "backend"
+                  "message": "Inference failed",
+                  "type": "backend"
                 }
               }
             }
@@ -775,11 +685,11 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
+                  "$ref": "#/components/schemas/OpenAICompatErrorResponse"
                 },
                 "example": {
-                  "error": "Model is overloaded",
-                  "error_type": "overloaded"
+                  "message": "Model is overloaded",
+                  "type": "overloaded"
                 }
               }
             }
@@ -852,10 +762,26 @@
           "inputs": {
             "$ref": "#/components/schemas/Input"
           },
+          "prompt_name": {
+            "type": "string",
+            "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `Sentence Transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
+            "default": "null",
+            "example": "null",
+            "nullable": true
+          },
           "truncate": {
             "type": "boolean",
             "default": "false",
-            "example": "false"
+            "example": "false",
+            "nullable": true
+          },
+          "truncation_direction": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/TruncationDirection"
+              }
+            ],
+            "default": "right"
           }
         }
       },
@@ -895,10 +821,26 @@
             "default": "true",
             "example": "true"
           },
+          "prompt_name": {
+            "type": "string",
+            "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `Sentence Transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
+            "default": "null",
+            "example": "null",
+            "nullable": true
+          },
           "truncate": {
             "type": "boolean",
             "default": "false",
-            "example": "false"
+            "example": "false",
+            "nullable": true
+          },
+          "truncation_direction": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/TruncationDirection"
+              }
+            ],
+            "default": "right"
           }
         }
       },
@@ -928,10 +870,26 @@
           "inputs": {
             "$ref": "#/components/schemas/Input"
           },
+          "prompt_name": {
+            "type": "string",
+            "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `Sentence Transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
+            "default": "null",
+            "example": "null",
+            "nullable": true
+          },
           "truncate": {
             "type": "boolean",
             "default": "false",
-            "example": "false"
+            "example": "false",
+            "nullable": true
+          },
+          "truncation_direction": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/TruncationDirection"
+              }
+            ],
+            "default": "right"
           }
         }
       },
@@ -944,6 +902,20 @@
           }
         }
       },
+      "Embedding": {
+        "oneOf": [
+          {
+            "type": "array",
+            "items": {
+              "type": "number",
+              "format": "float"
+            }
+          },
+          {
+            "type": "string"
+          }
+        ]
+      },
       "EmbeddingModel": {
         "type": "object",
         "required": [
@@ -956,6 +928,13 @@
           }
         }
       },
+      "EncodingFormat": {
+        "type": "string",
+        "enum": [
+          "float",
+          "base64"
+        ]
+      },
       "ErrorResponse": {
         "type": "object",
         "required": [
@@ -991,10 +970,14 @@
           "max_input_length",
           "max_batch_tokens",
           "max_client_batch_size",
+          "auto_truncate",
           "tokenization_workers",
           "version"
         ],
         "properties": {
+          "auto_truncate": {
+            "type": "boolean"
+          },
           "docker_label": {
             "type": "string",
             "example": "null",
@@ -1065,12 +1048,12 @@
       "Input": {
         "oneOf": [
           {
-            "type": "string"
+            "$ref": "#/components/schemas/InputType"
           },
           {
             "type": "array",
             "items": {
-              "type": "string"
+              "$ref": "#/components/schemas/InputType"
             }
           }
         ]
@@ -1098,6 +1081,21 @@
           }
         ]
       },
+      "InputType": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "integer",
+              "format": "int32",
+              "minimum": 0
+            }
+          }
+        ]
+      },
       "ModelType": {
         "oneOf": [
           {
@@ -1144,16 +1142,7 @@
         ],
         "properties": {
           "embedding": {
-            "type": "array",
-            "items": {
-              "type": "number",
-              "format": "float"
-            },
-            "example": [
-              0.0,
-              1.0,
-              2.0
-            ]
+            "$ref": "#/components/schemas/Embedding"
           },
           "index": {
             "type": "integer",
@@ -1193,6 +1182,14 @@
           "input"
         ],
         "properties": {
+          "encoding_format": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/EncodingFormat"
+              }
+            ],
+            "default": "float"
+          },
           "input": {
             "$ref": "#/components/schemas/Input"
           },
@@ -1317,7 +1314,16 @@
           "truncate": {
             "type": "boolean",
             "default": "false",
-            "example": "false"
+            "example": "false",
+            "nullable": true
+          },
+          "truncation_direction": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/TruncationDirection"
+              }
+            ],
+            "default": "right"
           }
         }
       },
@@ -1416,7 +1422,16 @@
           "truncate": {
             "type": "boolean",
             "default": "false",
-            "example": "false"
+            "example": "false",
+            "nullable": true
+          },
+          "truncation_direction": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/TruncationDirection"
+              }
+            ],
+            "default": "right"
           }
         }
       },
@@ -1479,6 +1494,19 @@
           }
         }
       },
+      "TokenizeInput": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        ]
+      },
       "TokenizeRequest": {
         "type": "object",
         "required": [
@@ -1491,7 +1519,14 @@
             "example": "true"
           },
           "inputs": {
-            "$ref": "#/components/schemas/Input"
+            "$ref": "#/components/schemas/TokenizeInput"
+          },
+          "prompt_name": {
+            "type": "string",
+            "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `Sentence Transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
+            "default": "null",
+            "example": "null",
+            "nullable": true
           }
         }
       },
@@ -1515,273 +1550,12 @@
           ]
         ]
       },
-      "VertexInstance": {
-        "oneOf": [
-          {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/EmbedRequest"
-              },
-              {
-                "type": "object",
-                "required": [
-                  "type"
-                ],
-                "properties": {
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "embed"
-                    ]
-                  }
-                }
-              }
-            ]
-          },
-          {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/EmbedAllRequest"
-              },
-              {
-                "type": "object",
-                "required": [
-                  "type"
-                ],
-                "properties": {
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "embed_all"
-                    ]
-                  }
-                }
-              }
-            ]
-          },
-          {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/EmbedSparseRequest"
-              },
-              {
-                "type": "object",
-                "required": [
-                  "type"
-                ],
-                "properties": {
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "embed_sparse"
-                    ]
-                  }
-                }
-              }
-            ]
-          },
-          {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/PredictRequest"
-              },
-              {
-                "type": "object",
-                "required": [
-                  "type"
-                ],
-                "properties": {
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "predict"
-                    ]
-                  }
-                }
-              }
-            ]
-          },
-          {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/RerankRequest"
-              },
-              {
-                "type": "object",
-                "required": [
-                  "type"
-                ],
-                "properties": {
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "rerank"
-                    ]
-                  }
-                }
-              }
-            ]
-          },
-          {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/TokenizeRequest"
-              },
-              {
-                "type": "object",
-                "required": [
-                  "type"
-                ],
-                "properties": {
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "tokenize"
-                    ]
-                  }
-                }
-              }
-            ]
-          }
-        ],
-        "discriminator": {
-          "propertyName": "type"
-        }
-      },
-      "VertexRequest": {
-        "type": "object",
-        "required": [
-          "instances"
-        ],
-        "properties": {
-          "instances": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/VertexInstance"
-            }
-          }
-        }
-      },
-      "VertexResponse": {
-        "type": "array",
-        "items": {
-          "$ref": "#/components/schemas/VertexResponseInstance"
-        }
-      },
-      "VertexResponseInstance": {
-        "oneOf": [
-          {
-            "type": "object",
-            "required": [
-              "type",
-              "result"
-            ],
-            "properties": {
-              "result": {
-                "$ref": "#/components/schemas/EmbedResponse"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "embed"
-                ]
-              }
-            }
-          },
-          {
-            "type": "object",
-            "required": [
-              "type",
-              "result"
-            ],
-            "properties": {
-              "result": {
-                "$ref": "#/components/schemas/EmbedAllResponse"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "embed_all"
-                ]
-              }
-            }
-          },
-          {
-            "type": "object",
-            "required": [
-              "type",
-              "result"
-            ],
-            "properties": {
-              "result": {
-                "$ref": "#/components/schemas/EmbedSparseResponse"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "embed_sparse"
-                ]
-              }
-            }
-          },
-          {
-            "type": "object",
-            "required": [
-              "type",
-              "result"
-            ],
-            "properties": {
-              "result": {
-                "$ref": "#/components/schemas/PredictResponse"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "predict"
-                ]
-              }
-            }
-          },
-          {
-            "type": "object",
-            "required": [
-              "type",
-              "result"
-            ],
-            "properties": {
-              "result": {
-                "$ref": "#/components/schemas/RerankResponse"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "rerank"
-                ]
-              }
-            }
-          },
-          {
-            "type": "object",
-            "required": [
-              "type",
-              "result"
-            ],
-            "properties": {
-              "result": {
-                "$ref": "#/components/schemas/TokenizeResponse"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "tokenize"
-                ]
-              }
-            }
-          }
-        ],
-        "discriminator": {
-          "propertyName": "type"
-        }
+      "TruncationDirection": {
+        "type": "string",
+        "enum": [
+          "Left",
+          "Right"
+        ]
       }
     }
   },
diff --git a/router/src/http/server.rs b/router/src/http/server.rs
index 49e6029a..17baada6 100644
--- a/router/src/http/server.rs
+++ b/router/src/http/server.rs
@@ -5,7 +5,8 @@ use crate::http::types::{
     OpenAICompatEmbedding, OpenAICompatErrorResponse, OpenAICompatRequest, OpenAICompatResponse,
     OpenAICompatUsage, PredictInput, PredictRequest, PredictResponse, Prediction, Rank,
     RerankRequest, RerankResponse, Sequence, SimpleToken, SparseValue, TokenizeInput,
-    TokenizeRequest, TokenizeResponse, VertexPrediction, VertexRequest, VertexResponse,
+    TokenizeRequest, TokenizeResponse, TruncationDirection, VertexPrediction, VertexRequest,
+    VertexResponse,
 };
 use crate::{
     shutdown, ClassifierModel, EmbeddingModel, ErrorResponse, ErrorType, Info, ModelType,
@@ -32,7 +33,6 @@ use text_embeddings_core::infer::{
     AllEmbeddingsInferResponse, Infer, InferMetadata, PooledEmbeddingsInferResponse,
 };
 use text_embeddings_core::TextEmbeddingsError;
-use tokenizers::TruncationDirection;
 use tokio::sync::OwnedSemaphorePermit;
 use tower_http::cors::{AllowOrigin, CorsLayer};
 use tracing::instrument;
@@ -118,7 +118,7 @@ async fn predict(
             .predict(
                 inputs,
                 truncate,
-                req.truncation_direction,
+                req.truncation_direction.into(),
                 req.raw_scores,
                 permit,
             )
@@ -335,7 +335,7 @@ async fn rerank(
             .predict(
                 (query, text),
                 truncate,
-                req.truncation_direction,
+                req.truncation_direction.into(),
                 req.raw_scores,
                 permit,
             )
@@ -499,7 +499,7 @@ async fn embed(
                 .embed_pooled(
                     input,
                     truncate,
-                    req.truncation_direction,
+                    req.truncation_direction.into(),
                     req.prompt_name,
                     req.normalize,
                     permit,
@@ -568,7 +568,7 @@ async fn embed(
                         .embed_pooled(
                             input,
                             truncate,
-                            req.truncation_direction,
+                            req.truncation_direction.into(),
                             prompt_name,
                             req.normalize,
                             permit,
@@ -677,7 +677,7 @@ async fn embed_sparse(
                 .embed_sparse(
                     input,
                     truncate,
-                    req.truncation_direction,
+                    req.truncation_direction.into(),
                     req.prompt_name,
                     permit,
                 )
@@ -745,7 +745,7 @@ async fn embed_sparse(
                         .embed_sparse(
                             input,
                             truncate,
-                            req.truncation_direction,
+                            req.truncation_direction.into(),
                             prompt_name,
                             permit,
                         )
@@ -846,7 +846,7 @@ async fn embed_all(
                 .embed_all(
                     input,
                     truncate,
-                    req.truncation_direction,
+                    req.truncation_direction.into(),
                     req.prompt_name,
                     permit,
                 )
@@ -914,7 +914,7 @@ async fn embed_all(
                         .embed_all(
                             input,
                             truncate,
-                            req.truncation_direction,
+                            req.truncation_direction.into(),
                             prompt_name,
                             permit,
                         )
@@ -1029,7 +1029,7 @@ async fn openai_embed(
                 .embed_pooled(
                     input,
                     truncate,
-                    TruncationDirection::Right,
+                    tokenizers::TruncationDirection::Right,
                     None,
                     true,
                     permit,
@@ -1102,7 +1102,7 @@ async fn openai_embed(
                         .embed_pooled(
                             input,
                             truncate,
-                            TruncationDirection::Right,
+                            tokenizers::TruncationDirection::Right,
                             None,
                             true,
                             permit,
@@ -1483,6 +1483,8 @@ pub async fn run(
     Info,
     ModelType,
     ClassifierModel,
+    Embedding,
+    EncodingFormat,
     EmbeddingModel,
     PredictRequest,
     Prediction,
@@ -1506,6 +1508,7 @@ pub async fn run(
     TokenizeInput,
     TokenizeRequest,
     TokenizeResponse,
+    TruncationDirection,
     SimpleToken,
     InputType,
     InputIds,
diff --git a/router/src/http/types.rs b/router/src/http/types.rs
index a47a995b..4414ecb4 100644
--- a/router/src/http/types.rs
+++ b/router/src/http/types.rs
@@ -4,7 +4,6 @@ use serde::{de, Deserialize, Deserializer, Serialize};
 use serde_json::json;
 use std::fmt::Formatter;
 use text_embeddings_core::tokenization::EncodingInput;
-use tokenizers::TruncationDirection;
 use utoipa::openapi::{RefOr, Schema};
 use utoipa::ToSchema;
 
@@ -194,6 +193,22 @@ impl<'__s> ToSchema<'__s> for PredictInput {
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Deserialize, ToSchema, Eq, Default)]
+pub(crate) enum TruncationDirection {
+    Left,
+    #[default]
+    Right,
+}
+
+impl From<TruncationDirection> for tokenizers::TruncationDirection {
+    fn from(value: TruncationDirection) -> Self {
+        match value {
+            TruncationDirection::Left => Self::Left,
+            TruncationDirection::Right => Self::Right,
+        }
+    }
+}
+
 #[derive(Deserialize, ToSchema)]
 pub(crate) struct PredictRequest {
     pub inputs: PredictInput,
@@ -262,6 +277,7 @@ pub(crate) enum InputType {
     String(String),
     Ids(Vec<u32>),
 }
+
 impl InputType {
     pub(crate) fn count_chars(&self) -> usize {
         match self {
@@ -270,6 +286,7 @@ impl InputType {
         }
     }
 }
+
 impl From<InputType> for EncodingInput {
     fn from(value: InputType) -> Self {
         match value {
@@ -278,6 +295,7 @@ impl From<InputType> for EncodingInput {
         }
     }
 }
+
 #[derive(Deserialize, ToSchema)]
 #[serde(untagged)]
 pub(crate) enum Input {
@@ -351,6 +369,15 @@ pub(crate) struct EmbedRequest {
     #[serde(default)]
     #[schema(default = "right", example = "right")]
     pub truncation_direction: TruncationDirection,
+    /// The name of the prompt that should be used by for encoding. If not set, no prompt
+    /// will be applied.
+    ///
+    /// Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
+    ///
+    /// For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...},
+    /// then the sentence "What is the capital of France?" will be encoded as
+    /// "query: What is the capital of France?" because the prompt text will be prepended before
+    /// any text to encode.
     #[schema(default = "null", example = "null", nullable = true)]
     pub prompt_name: Option<String>,
     #[serde(default = "default_normalize")]
@@ -375,6 +402,15 @@ pub(crate) struct EmbedSparseRequest {
     #[serde(default)]
     #[schema(default = "right", example = "right")]
     pub truncation_direction: TruncationDirection,
+    /// The name of the prompt that should be used by for encoding. If not set, no prompt
+    /// will be applied.
+    ///
+    /// Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
+    ///
+    /// For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...},
+    /// then the sentence "What is the capital of France?" will be encoded as
+    /// "query: What is the capital of France?" because the prompt text will be prepended before
+    /// any text to encode.
     #[schema(default = "null", example = "null", nullable = true)]
     pub prompt_name: Option<String>,
 }
@@ -397,6 +433,15 @@ pub(crate) struct EmbedAllRequest {
     #[serde(default)]
     #[schema(default = "right", example = "right")]
     pub truncation_direction: TruncationDirection,
+    /// The name of the prompt that should be used by for encoding. If not set, no prompt
+    /// will be applied.
+    ///
+    /// Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
+    ///
+    /// For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...},
+    /// then the sentence "What is the capital of France?" will be encoded as
+    /// "query: What is the capital of France?" because the prompt text will be prepended before
+    /// any text to encode.
     #[schema(default = "null", example = "null", nullable = true)]
     pub prompt_name: Option<String>,
 }
@@ -426,6 +471,15 @@ pub(crate) struct TokenizeRequest {
     #[serde(default = "default_add_special_tokens")]
     #[schema(default = "true", example = "true")]
     pub add_special_tokens: bool,
+    /// The name of the prompt that should be used by for encoding. If not set, no prompt
+    /// will be applied.
+    ///
+    /// Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
+    ///
+    /// For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...},
+    /// then the sentence "What is the capital of France?" will be encoded as
+    /// "query: What is the capital of France?" because the prompt text will be prepended before
+    /// any text to encode.
     #[schema(default = "null", example = "null", nullable = true)]
     pub prompt_name: Option<String>,
 }
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 3be03190..f5fd102c 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -238,11 +238,13 @@ pub async fn run(
         .await
         .context("Model backend is not healthy")?;
 
-    tracing::info!("Warming up model");
-    backend
-        .warmup(max_input_length, max_batch_tokens, max_batch_requests)
-        .await
-        .context("Model backend is not healthy")?;
+    if !backend.padded_model {
+        tracing::info!("Warming up model");
+        backend
+            .warmup(max_input_length, max_batch_tokens, max_batch_requests)
+            .await
+            .context("Model backend is not healthy")?;
+    }
 
     let max_batch_requests = backend
         .max_batch_size