Azure · zhuangqh · Oct 24, 2024
@@ -5,24 +5,23 @@ ARG MODEL_TYPE
 ARG VERSION
 
 # Set the working directory
-WORKDIR /workspace/tfs
+WORKDIR /workspace
 
 # Write the version to a file
-RUN echo $VERSION > /workspace/tfs/version.txt
+RUN pip install --no-cache-dir virtualenv
 
-# First, copy just the preset files and install dependencies
-# This is done before copying the code to utilize Docker's layer caching and
-# avoid reinstalling dependencies unless the requirements file changes.
-# Inference
+# 1. Huggingface transformers
+# dependencies
 COPY kaito/presets/inference/${MODEL_TYPE}/requirements.txt /workspace/tfs/inference-requirements.txt
-RUN pip install --no-cache-dir -r inference-requirements.txt
-
-COPY kaito/presets/inference/${MODEL_TYPE}/inference_api.py /workspace/tfs/inference_api.py
-
-# Fine Tuning
 COPY kaito/presets/tuning/${MODEL_TYPE}/requirements.txt /workspace/tfs/tuning-requirements.txt
-RUN pip install --no-cache-dir -r tuning-requirements.txt
+RUN virtualenv tfs && \
+    . tfs/bin/activate && \
+    pip install --no-cache-dir -r /workspace/tfs/inference-requirements.txt && \
+    pip install --no-cache-dir -r /workspace/tfs/tuning-requirements.txt && \
+    deactivate
 
+# Copy the inference and tuning scripts
+COPY kaito/presets/inference/${MODEL_TYPE}/inference_api.py /workspace/tfs/inference_api.py
 COPY kaito/presets/tuning/${MODEL_TYPE}/cli.py /workspace/tfs/cli.py
 COPY kaito/presets/tuning/${MODEL_TYPE}/fine_tuning.py /workspace/tfs/fine_tuning.py
 COPY kaito/presets/tuning/${MODEL_TYPE}/parser.py /workspace/tfs/parser.py
@@ -31,5 +30,20 @@ COPY kaito/presets/tuning/${MODEL_TYPE}/dataset.py /workspace/tfs/dataset.py
 # Copy the metrics server
 COPY kaito/presets/tuning/${MODEL_TYPE}/metrics/metrics_server.py /workspace/tfs/metrics_server.py
 
-# Copy the entire model weights to the weights directory
-COPY ${WEIGHTS_PATH} /workspace/tfs/weights
+# 2. vLLM
+COPY kaito/presets/inference/vllm/requirements.txt /workspace/vllm/inference-requirements.txt
+RUN virtualenv vllm && \
+    . vllm/bin/activate && \
+    pip install --no-cache-dir -r /workspace/vllm/inference-requirements.txt && \
+    deactivate
+
+COPY kaito/presets/inference/vllm/inference_api.py /workspace/vllm/inference_api.py
+
+# 3. Model weights
+COPY kaito/docker/presets/models/tfs/entrypoint /workspace/entrypoint
+COPY ${WEIGHTS_PATH} /workspace/weights
+RUN echo $VERSION > /workspace/version.txt && \
+    ln -s /workspace/weights /workspace/tfs/weights && \
+    ln -s /workspace/weights /workspace/vllm/weights
+
+ENTRYPOINT ["/workspace/entrypoint"]
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+if [[ "$1" == "--help" || "$1" == "-h" ]]; then
+    echo "Usage: $0 [backend] -- [command]"
+    echo "Available backends:"
+    echo "  vllm           Use the vllm backend"
+    echo "  transformers   Use the huggingface/transformers backend"
+    echo "Options:"
+    echo "  -h, --help     Show this help message and exit"
+    exit 0
+fi
+
+backend="$1"
+while [[ $# -gt 0 && "$1" != "--" ]]; do
+    shift
+done
+if [[ $# -gt 0 ]]; then
+    shift
+fi
+if [[ $# -eq 0 ]]; then
+    echo "No command provided"
+    exit 1
+fi
+
+case "$backend" in
+    "vllm")
+        backend="vllm"
+        ;;
+    "transformers")
+        backend="tfs"
+        ;;
+    *)
+        echo "Unknown backend: $backend"
+        exit 1
+        ;;
+esac
+
+echo "Switch to backend: $backend"
+. "$backend"/bin/activate
+cd "$backend"
+
+# Evaluate the command after "--"
+eval "$@"
@@ -104,8 +104,9 @@ models:
     type: text-generation 
     version: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/commit/d269012bea6fbe38ce7752c8940fea010eea3383
     runtime: tfs
-    tag: 0.0.2
+    tag: 0.0.3
     # Tag history:
+    # 0.0.3 - Add vllm inference backend
     # 0.0.2 - Add Logging & Metrics Server
     # 0.0.1 - Initial Release