diff --git a/build.py b/build.py index aeb2442763..24bde0f3a4 100755 --- a/build.py +++ b/build.py @@ -76,7 +76,7 @@ "2024.0.0", # ORT OpenVINO "2024.0.0", # Standalone OpenVINO "3.2.6", # DCGM version - "0.4.3", # vLLM version + "0.5.0.post1", # vLLM version ) } @@ -1082,25 +1082,20 @@ def create_dockerfile_linux( """ if "tensorrtllm" in backends: df += """ - -RUN ldconfig -# Remove contents that are not needed in runtime -RUN ARCH="$(uname -i)" \\ - && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\ - && rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\ - && rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples - # Install required packages for TRT-LLM models -RUN python3 -m pip install --upgrade pip \\ - && pip3 install transformers - -# ldconfig for TRT-LLM -RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf -RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf - +# Remove contents that are not needed in runtime # Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1 # The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0 -RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0 +RUN ldconfig && \ + ARCH="$(uname -i)" && \ + rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ + rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ + rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples && \ + python3 -m pip install --upgrade pip && \ + pip3 install --no-cache-dir transformers && \ + find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf && \ + find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf && \ + pip3 install --no-cache-dir setuptools==69.5.1 grpcio-tools==1.64.0 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """ diff --git a/docs/protocol/extension_generate.md b/docs/protocol/extension_generate.md index b54b0caffb..043339eb4a 100644 --- a/docs/protocol/extension_generate.md +++ b/docs/protocol/extension_generate.md @@ -1,5 +1,5 @@