Merge branch 'master' into master

SeldonIO · Feb 9, 2025 · b47f072 · b47f072
2 parents ffbcd1f + 2e9c43e
commit b47f072
Show file tree

Hide file tree

Showing 17 changed files with 1,962 additions and 1,972 deletions.
diff --git a/.github/workflows/release-sc.yml b/.github/workflows/release-sc.yml
@@ -89,12 +89,11 @@ jobs:
           QUAY_MLSERVER_IMAGE: quay.io/redhat-isv-containers/635670d3624969b495b6936f:${{ github.event.inputs.version }}
       - name: Install preflight
         run: |
+          PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
           wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
           chmod u+x preflight-linux-amd64
           sudo mv preflight-linux-amd64 /usr/local/bin/preflight
           preflight --version
-        env:
-          PREFLIGHT_VERSION: 1.10.0
       - name: Submit preflight results
         run: |
           preflight check container \
@@ -159,12 +158,11 @@ jobs:
           QUAY_MLSERVER_IMAGE: quay.io/redhat-isv-containers/63567143624969b495b69370:${{ github.event.inputs.version }}
       - name: Install preflight
         run: |
+          PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
           wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
           chmod u+x preflight-linux-amd64
           sudo mv preflight-linux-amd64 /usr/local/bin/preflight
           preflight --version
-        env:
-          PREFLIGHT_VERSION: 1.10.0
       - name: Submit preflight results
         run: |
           preflight check container \

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -87,12 +87,11 @@ jobs:
           docker push $QUAY_MLSERVER_IMAGE
       - name: Install preflight
         run: |
+          PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
           wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
           chmod u+x preflight-linux-amd64
           sudo mv preflight-linux-amd64 /usr/local/bin/preflight
           preflight --version
-        env:
-          PREFLIGHT_VERSION: 1.10.0
       - name: Submit preflight results
         run: |
           preflight check container \
@@ -159,12 +158,11 @@ jobs:
           docker push $QUAY_MLSERVER_IMAGE-slim
       - name: Install preflight
         run: |
+          PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
           wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
           chmod u+x preflight-linux-amd64
           sudo mv preflight-linux-amd64 /usr/local/bin/preflight
           preflight --version
-        env:
-          PREFLIGHT_VERSION: 1.10.0
       - name: Submit preflight results
         run: |
           preflight check container \
@@ -283,12 +281,11 @@ jobs:
           docker push $QUAY_MLSERVER_IMAGE-${{ matrix.runtime }}
       - name: Install preflight
         run: |
+          PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
           wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
           chmod u+x preflight-linux-amd64
           sudo mv preflight-linux-amd64 /usr/local/bin/preflight
           preflight --version
-        env:
-          PREFLIGHT_VERSION: 1.10.0
       - name: Submit preflight results
         run: |
           preflight check container \

diff --git a/docs-gb/user-guide/parallel-inference.md b/docs-gb/user-guide/parallel-inference.md
@@ -77,6 +77,13 @@ The expected values are:
 - `0`, will disable the parallel inference feature.
   In other words, inference will happen within the main MLServer process.
 
+### `inference_pool_gid` 
+
+The `inference_pool_gid` field of the `model-settings.json` file (or alternatively, the `MLSERVER_MODEL_INFERENCE_POOL_GID` global environment variable) allows to load models on a dedicated inference pool based on the group ID (GID) to prevent starvation behavior.
+
+Complementing the `inference_pool_gid`, if the `autogenerate_inference_pool_gid` field of the `model-settings.json` file (or alternatively, the `MLSERVER_MODEL_AUTOGENERATE_INFERENCE_POOL_GID` global environment variable) is set to `True`, a UUID is automatically generated, and a dedicated inference pool will load the given model. This option is useful if the user wants to load a single model on an dedicated inference pool without having to manage the GID themselves.
+
+
 ## References
 
 Jiale Zhi, Rui Wang, Jeff Clune, and Kenneth O. Stanley. Fiber: A Platform for Efficient Development and Distributed Training for Reinforcement Learning and Population-Based Methods. arXiv:2003.11164 [cs, stat], March 2020. [arXiv:2003.11164](https://arxiv.org/abs/2003.11164).
diff --git a/mlserver/parallel/registry.py b/mlserver/parallel/registry.py
@@ -38,6 +38,12 @@ def _get_env_tarball(model: MLModel) -> Optional[str]:
     return to_absolute_path(model_settings, env_tarball)
 
 
+def _append_gid_environment_hash(
+    env_hash: str, inference_pool_gid: Optional[str] = None
+) -> str:
+    return f"{env_hash}-{inference_pool_gid}"
+
+
 class InferencePoolRegistry:
     """
     Keeps track of the different inference pools loaded in the server.
@@ -80,14 +86,17 @@ async def _get_or_create(self, model: MLModel) -> InferencePool:
             and model.settings.parameters.environment_path
         ):
             pool = await self._get_or_create_with_existing_env(
-                model.settings.parameters.environment_path
+                model.settings.parameters.environment_path,
+                model.settings.parameters.inference_pool_gid,
             )
         else:
             pool = await self._get_or_create_with_tarball(model)
         return pool
 
     async def _get_or_create_with_existing_env(
-        self, environment_path: str
+        self,
+        environment_path: str,
+        inference_pool_gid: Optional[str],
     ) -> InferencePool:
         """
         Creates or returns the InferencePool for a model that uses an existing
@@ -98,8 +107,13 @@ async def _get_or_create_with_existing_env(
         )
         logger.info(f"Using environment {expanded_environment_path}")
         env_hash = await compute_hash_of_string(expanded_environment_path)
+
+        if inference_pool_gid is not None:
+            env_hash = _append_gid_environment_hash(env_hash, inference_pool_gid)
+
         if env_hash in self._pools:
             return self._pools[env_hash]
+
         env = Environment(
             env_path=expanded_environment_path,
             env_hash=env_hash,
@@ -114,22 +128,38 @@ async def _get_or_create_with_existing_env(
     async def _get_or_create_with_tarball(self, model: MLModel) -> InferencePool:
         """
         Creates or returns the InferencePool for a model that uses a
-        tarball as python environment.
+        tarball as a Python environment.
         """
         env_tarball = _get_env_tarball(model)
+        inference_pool_gid = (
+            model.settings.parameters.inference_pool_gid
+            if model.settings.parameters
+            else None
+        )
+
         if not env_tarball:
-            return self._default_pool
+            return (
+                self._pools.setdefault(
+                    inference_pool_gid,
+                    InferencePool(self._settings, on_worker_stop=self._on_worker_stop),
+                )
+                if inference_pool_gid
+                else self._default_pool
+            )
 
         env_hash = await compute_hash_of_file(env_tarball)
+        if inference_pool_gid is not None:
+            env_hash = _append_gid_environment_hash(env_hash, inference_pool_gid)
+
         if env_hash in self._pools:
             return self._pools[env_hash]
 
         env = await self._extract_tarball(env_hash, env_tarball)
-        pool = InferencePool(
+        self._pools[env_hash] = InferencePool(
             self._settings, env=env, on_worker_stop=self._on_worker_stop
         )
-        self._pools[env_hash] = pool
-        return pool
+
+        return self._pools[env_hash]
 
     async def _extract_tarball(self, env_hash: str, env_tarball: str) -> Environment:
         env_path = self._get_env_path(env_hash)
@@ -145,8 +175,17 @@ def _get_env_path(self, env_hash: str) -> str:
 
     async def _find(self, model: MLModel) -> InferencePool:
         env_hash = _get_environment_hash(model)
+        inference_pool_gid = (
+            model.settings.parameters.inference_pool_gid
+            if model.settings.parameters
+            else None
+        )
+
         if not env_hash:
-            return self._default_pool
+            if not inference_pool_gid:
+                return self._default_pool
+            else:
+                return self._pools[inference_pool_gid]
 
         if env_hash not in self._pools:
             raise EnvironmentNotFound(model, env_hash)

diff --git a/mlserver/settings.py b/mlserver/settings.py
@@ -1,5 +1,6 @@
 import sys
 import os
+import uuid
 import json
 import importlib
 import inspect
@@ -14,11 +15,13 @@
     no_type_check,
     TYPE_CHECKING,
 )
+from typing_extensions import Self
 from pydantic import (
     ImportString,
     Field,
     AliasChoices,
 )
+from pydantic import model_validator
 from pydantic._internal._validators import import_string
 import pydantic_settings
 from pydantic_settings import SettingsConfigDict
@@ -313,6 +316,12 @@ class ModelParameters(BaseSettings):
     """Path to the environment tarball which should be used to load this
     model."""
 
+    inference_pool_gid: Optional[str] = None
+    """Inference pool group id to be used to serve this model."""
+
+    autogenerate_inference_pool_gid: bool = False
+    """Flag to autogenerate the inference pool group id for this model."""
+
     format: Optional[str] = None
     """Format of the model (only available on certain runtimes)."""
 
@@ -323,6 +332,12 @@ class ModelParameters(BaseSettings):
     """Arbitrary settings, dependent on the inference runtime
     implementation."""
 
+    @model_validator(mode="after")
+    def set_inference_pool_gid(self) -> Self:
+        if self.autogenerate_inference_pool_gid and self.inference_pool_gid is None:
+            self.inference_pool_gid = str(uuid.uuid4())
+        return self
+
 
 class ModelSettings(BaseSettings):
     model_config = SettingsConfigDict(