Skip to content

environments foundation model inference

github-actions[bot] edited this page Sep 24, 2024 · 67 revisions

foundation-model-inference

Overview

Environment used for deploying model to use DS-MII or vLLM for inference

Version: 54

Tags

Preview DS-MII VLLM

View in Studio: https://ml.azure.com/registries/azureml/environments/foundation-model-inference/version/54

Docker image: mcr.microsoft.com/azureml/curated/foundation-model-inference:54

Docker build context

Dockerfile

FROM nvidia/cuda:12.1.1-devel-ubuntu20.04

ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    TZ=Etc/UTC \
    DEBIAN_FRONTEND=noninteractive
RUN apt update && apt upgrade -y && apt install software-properties-common -y && add-apt-repository ppa:deadsnakes/ppa -y
RUN apt install git -y

ENV MINICONDA_VERSION py310_23.10.0-1
ENV PATH /opt/miniconda/bin:$PATH
RUN apt-get update && \
    apt-get install -y --no-install-recommends wget runit
RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
    bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
    conda update --all -c conda-forge -y && \
    conda clean -ay && \
    rm -rf /opt/miniconda/pkgs && \
    rm /tmp/miniconda.sh && \
    find / -type d -name __pycache__ | xargs rm -rf

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/default

# Create conda environment with py310
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
    python=3.10 \
    -c conda-forge 

ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH

ENV CONDA_DEFAULT_ENV=$AZUREML_CONDA_ENVIRONMENT_PATH
ENV CONDA_PREFIX=$AZUREML_CONDA_ENVIRONMENT_PATH

WORKDIR /

# Needed for megablocks
RUN pip install torch~=2.4.0 --index-url https://download.pytorch.org/whl/cu121

# mixtral specific
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
RUN pip install git+https://github.com/stanford-futuredata/megablocks.git@5897cd6

# For local testing
# Need to copy src code and install in editable mode
# COPY . .
# RUN pip install -e ./ --no-cache-dir

# When copied to assets repo, change to install from public pypi
RUN pip install llm-optimized-inference==0.2.12 --no-cache-dir

RUN pip uninstall transformers -y
RUN pip uninstall -y vllm
RUN pip install https://automlsamplenotebookdata.blob.core.windows.net/vllm/transformers-4.45.0.dev0-py3-none-any.whl
RUN pip install https://automlsamplenotebookdata.blob.core.windows.net/vllm/vllm-0.6.1.post2+cu124-cp38-abi3-manylinux1_x86_64.whl

# clean conda and pip caches
RUN rm -rf ~/.cache/pip

ADD runit_folder/api_server /var/runit/api_server
RUN sed -i 's/\r$//g' /var/runit/api_server/run
RUN chmod +x /var/runit/api_server/run

ENV SVDIR=/var/runit
ENV WORKER_TIMEOUT=3600
EXPOSE 5001
CMD [ "runsvdir", "/var/runit" ]
Clone this wiki locally