Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
shivakrishnaah authored Feb 9, 2025
2 parents ffbcd1f + 2e9c43e commit b47f072
Show file tree
Hide file tree
Showing 17 changed files with 1,962 additions and 1,972 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/release-sc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,11 @@ jobs:
QUAY_MLSERVER_IMAGE: quay.io/redhat-isv-containers/635670d3624969b495b6936f:${{ github.event.inputs.version }}
- name: Install preflight
run: |
PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
chmod u+x preflight-linux-amd64
sudo mv preflight-linux-amd64 /usr/local/bin/preflight
preflight --version
env:
PREFLIGHT_VERSION: 1.10.0
- name: Submit preflight results
run: |
preflight check container \
Expand Down Expand Up @@ -159,12 +158,11 @@ jobs:
QUAY_MLSERVER_IMAGE: quay.io/redhat-isv-containers/63567143624969b495b69370:${{ github.event.inputs.version }}
- name: Install preflight
run: |
PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
chmod u+x preflight-linux-amd64
sudo mv preflight-linux-amd64 /usr/local/bin/preflight
preflight --version
env:
PREFLIGHT_VERSION: 1.10.0
- name: Submit preflight results
run: |
preflight check container \
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,11 @@ jobs:
docker push $QUAY_MLSERVER_IMAGE
- name: Install preflight
run: |
PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
chmod u+x preflight-linux-amd64
sudo mv preflight-linux-amd64 /usr/local/bin/preflight
preflight --version
env:
PREFLIGHT_VERSION: 1.10.0
- name: Submit preflight results
run: |
preflight check container \
Expand Down Expand Up @@ -159,12 +158,11 @@ jobs:
docker push $QUAY_MLSERVER_IMAGE-slim
- name: Install preflight
run: |
PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
chmod u+x preflight-linux-amd64
sudo mv preflight-linux-amd64 /usr/local/bin/preflight
preflight --version
env:
PREFLIGHT_VERSION: 1.10.0
- name: Submit preflight results
run: |
preflight check container \
Expand Down Expand Up @@ -283,12 +281,11 @@ jobs:
docker push $QUAY_MLSERVER_IMAGE-${{ matrix.runtime }}
- name: Install preflight
run: |
PREFLIGHT_VERSION=$(curl -s https://api.github.com/repos/redhat-openshift-ecosystem/openshift-preflight/releases/latest | jq -r .tag_name)
wget https://github.com/redhat-openshift-ecosystem/openshift-preflight/releases/download/$PREFLIGHT_VERSION/preflight-linux-amd64
chmod u+x preflight-linux-amd64
sudo mv preflight-linux-amd64 /usr/local/bin/preflight
preflight --version
env:
PREFLIGHT_VERSION: 1.10.0
- name: Submit preflight results
run: |
preflight check container \
Expand Down
7 changes: 7 additions & 0 deletions docs-gb/user-guide/parallel-inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ The expected values are:
- `0`, will disable the parallel inference feature.
In other words, inference will happen within the main MLServer process.

### `inference_pool_gid`

The `inference_pool_gid` field of the `model-settings.json` file (or alternatively, the `MLSERVER_MODEL_INFERENCE_POOL_GID` global environment variable) allows to load models on a dedicated inference pool based on the group ID (GID) to prevent starvation behavior.

Complementing the `inference_pool_gid`, if the `autogenerate_inference_pool_gid` field of the `model-settings.json` file (or alternatively, the `MLSERVER_MODEL_AUTOGENERATE_INFERENCE_POOL_GID` global environment variable) is set to `True`, a UUID is automatically generated, and a dedicated inference pool will load the given model. This option is useful if the user wants to load a single model on an dedicated inference pool without having to manage the GID themselves.


## References

Jiale Zhi, Rui Wang, Jeff Clune, and Kenneth O. Stanley. Fiber: A Platform for Efficient Development and Distributed Training for Reinforcement Learning and Population-Based Methods. arXiv:2003.11164 [cs, stat], March 2020. [arXiv:2003.11164](https://arxiv.org/abs/2003.11164).
55 changes: 47 additions & 8 deletions mlserver/parallel/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ def _get_env_tarball(model: MLModel) -> Optional[str]:
return to_absolute_path(model_settings, env_tarball)


def _append_gid_environment_hash(
env_hash: str, inference_pool_gid: Optional[str] = None
) -> str:
return f"{env_hash}-{inference_pool_gid}"


class InferencePoolRegistry:
"""
Keeps track of the different inference pools loaded in the server.
Expand Down Expand Up @@ -80,14 +86,17 @@ async def _get_or_create(self, model: MLModel) -> InferencePool:
and model.settings.parameters.environment_path
):
pool = await self._get_or_create_with_existing_env(
model.settings.parameters.environment_path
model.settings.parameters.environment_path,
model.settings.parameters.inference_pool_gid,
)
else:
pool = await self._get_or_create_with_tarball(model)
return pool

async def _get_or_create_with_existing_env(
self, environment_path: str
self,
environment_path: str,
inference_pool_gid: Optional[str],
) -> InferencePool:
"""
Creates or returns the InferencePool for a model that uses an existing
Expand All @@ -98,8 +107,13 @@ async def _get_or_create_with_existing_env(
)
logger.info(f"Using environment {expanded_environment_path}")
env_hash = await compute_hash_of_string(expanded_environment_path)

if inference_pool_gid is not None:
env_hash = _append_gid_environment_hash(env_hash, inference_pool_gid)

if env_hash in self._pools:
return self._pools[env_hash]

env = Environment(
env_path=expanded_environment_path,
env_hash=env_hash,
Expand All @@ -114,22 +128,38 @@ async def _get_or_create_with_existing_env(
async def _get_or_create_with_tarball(self, model: MLModel) -> InferencePool:
"""
Creates or returns the InferencePool for a model that uses a
tarball as python environment.
tarball as a Python environment.
"""
env_tarball = _get_env_tarball(model)
inference_pool_gid = (
model.settings.parameters.inference_pool_gid
if model.settings.parameters
else None
)

if not env_tarball:
return self._default_pool
return (
self._pools.setdefault(
inference_pool_gid,
InferencePool(self._settings, on_worker_stop=self._on_worker_stop),
)
if inference_pool_gid
else self._default_pool
)

env_hash = await compute_hash_of_file(env_tarball)
if inference_pool_gid is not None:
env_hash = _append_gid_environment_hash(env_hash, inference_pool_gid)

if env_hash in self._pools:
return self._pools[env_hash]

env = await self._extract_tarball(env_hash, env_tarball)
pool = InferencePool(
self._pools[env_hash] = InferencePool(
self._settings, env=env, on_worker_stop=self._on_worker_stop
)
self._pools[env_hash] = pool
return pool

return self._pools[env_hash]

async def _extract_tarball(self, env_hash: str, env_tarball: str) -> Environment:
env_path = self._get_env_path(env_hash)
Expand All @@ -145,8 +175,17 @@ def _get_env_path(self, env_hash: str) -> str:

async def _find(self, model: MLModel) -> InferencePool:
env_hash = _get_environment_hash(model)
inference_pool_gid = (
model.settings.parameters.inference_pool_gid
if model.settings.parameters
else None
)

if not env_hash:
return self._default_pool
if not inference_pool_gid:
return self._default_pool
else:
return self._pools[inference_pool_gid]

if env_hash not in self._pools:
raise EnvironmentNotFound(model, env_hash)
Expand Down
15 changes: 15 additions & 0 deletions mlserver/settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import os
import uuid
import json
import importlib
import inspect
Expand All @@ -14,11 +15,13 @@
no_type_check,
TYPE_CHECKING,
)
from typing_extensions import Self
from pydantic import (
ImportString,
Field,
AliasChoices,
)
from pydantic import model_validator
from pydantic._internal._validators import import_string
import pydantic_settings
from pydantic_settings import SettingsConfigDict
Expand Down Expand Up @@ -313,6 +316,12 @@ class ModelParameters(BaseSettings):
"""Path to the environment tarball which should be used to load this
model."""

inference_pool_gid: Optional[str] = None
"""Inference pool group id to be used to serve this model."""

autogenerate_inference_pool_gid: bool = False
"""Flag to autogenerate the inference pool group id for this model."""

format: Optional[str] = None
"""Format of the model (only available on certain runtimes)."""

Expand All @@ -323,6 +332,12 @@ class ModelParameters(BaseSettings):
"""Arbitrary settings, dependent on the inference runtime
implementation."""

@model_validator(mode="after")
def set_inference_pool_gid(self) -> Self:
if self.autogenerate_inference_pool_gid and self.inference_pool_gid is None:
self.inference_pool_gid = str(uuid.uuid4())
return self


class ModelSettings(BaseSettings):
model_config = SettingsConfigDict(
Expand Down
Loading

0 comments on commit b47f072

Please sign in to comment.