From 74516220273744605f21d232ddaaba144fe2fdb1 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Tue, 25 Jun 2024 09:02:16 -0700 Subject: [PATCH 1/6] Build PA rpeo separately --- Dockerfile.sdk | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 9e83ecca47..a6a938ab07 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -32,6 +32,7 @@ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo +ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo ARG TRITON_COMMON_REPO_TAG=main ARG TRITON_CORE_REPO_TAG=main ARG TRITON_THIRD_PARTY_REPO_TAG=main @@ -103,6 +104,7 @@ RUN rm -f /usr/bin/python && \ # Build the client library and examples ARG TRITON_REPO_ORGANIZATION ARG TRITON_CLIENT_REPO_SUBDIR +ARG TRITON_PA_REPO_SUBDIR ARG TRITON_COMMON_REPO_TAG ARG TRITON_CORE_REPO_TAG ARG TRITON_THIRD_PARTY_REPO_TAG @@ -114,8 +116,32 @@ ARG TARGETPLATFORM WORKDIR /workspace COPY TRITON_VERSION . COPY ${TRITON_CLIENT_REPO_SUBDIR} client +COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer -WORKDIR /workspace/build +# TODO: PA will build the CC clients since it depends on it. +# This should be optimized so that we do not have to build +# the CC clients twice. +WORKDIR /workspace/pa_build +RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ + -DTRITON_ENABLE_PERF_ANALYZER=ON \ + -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \ + -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \ + -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \ + -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \ + -DTRITON_ENABLE_CC_HTTP=ON \ + -DTRITON_ENABLE_CC_GRPC=ON \ + -DTRITON_ENABLE_PYTHON_HTTP=ON \ + -DTRITON_ENABLE_PYTHON_GRPC=ON \ + -DTRITON_PACKAGE_PERF_ANALYZER=ON \ + -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \ + /workspace/perf_analyzer +RUN make -j16 perf-analyzer + +RUN pip3 install build \ + && cd /workspace/perf_analyzer/genai-perf \ + && python3 -m build --wheel --outdir /workspace/install/python + +WORKDIR /workspace/client_build RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ @@ -125,11 +151,6 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ -DTRITON_ENABLE_JAVA_HTTP=ON \ - -DTRITON_ENABLE_PERF_ANALYZER=ON \ - -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \ - -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \ - -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \ - -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \ -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client RUN make -j16 cc-clients python-clients java-clients && \ @@ -144,9 +165,6 @@ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ --jar-install-path /workspace/install/java-api-bindings; \ fi -RUN pip3 install build \ - && cd /workspace/client/src/c++/perf_analyzer/genai-perf \ - && python3 -m build --wheel --outdir /workspace/install/python ############################################################################ ## Create sdk container ############################################################################ From 97fe6f55828334aad531b93af333bdf97263c222 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Wed, 26 Jun 2024 10:57:00 -0700 Subject: [PATCH 2/6] Pass version --- Dockerfile.sdk | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index a6a938ab07..ecb748b27a 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -123,6 +123,7 @@ COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer # the CC clients twice. WORKDIR /workspace/pa_build RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ + -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ -DTRITON_ENABLE_PERF_ANALYZER=ON \ -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \ From 929a067c9671730b861d8bfb990370472f6d14e7 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Mon, 8 Jul 2024 10:55:01 -0700 Subject: [PATCH 3/6] Build PA second --- Dockerfile.sdk | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index ecb748b27a..61976e5d3d 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -118,13 +118,27 @@ COPY TRITON_VERSION . COPY ${TRITON_CLIENT_REPO_SUBDIR} client COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer +WORKDIR /workspace/client_build +RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ + -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ + -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ + -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ + -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ + -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ + -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ + -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ + -DTRITON_ENABLE_JAVA_HTTP=ON \ + -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ + -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client +RUN make -j16 cc-clients python-clients java-clients && \ + rm -fr ~/.m2 + # TODO: PA will build the CC clients since it depends on it. # This should be optimized so that we do not have to build # the CC clients twice. WORKDIR /workspace/pa_build RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ - -DTRITON_ENABLE_PERF_ANALYZER=ON \ -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \ @@ -142,21 +156,6 @@ RUN pip3 install build \ && cd /workspace/perf_analyzer/genai-perf \ && python3 -m build --wheel --outdir /workspace/install/python -WORKDIR /workspace/client_build -RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ - -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ - -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ - -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ - -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ - -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ - -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ - -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ - -DTRITON_ENABLE_JAVA_HTTP=ON \ - -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ - -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client -RUN make -j16 cc-clients python-clients java-clients && \ - rm -fr ~/.m2 - # Install Java API Bindings RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ source /workspace/client/src/java-api-bindings/scripts/install_dependencies_and_build.sh \ From 0477142ef5d86577785c960349ec7f420598a8de Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Thu, 11 Jul 2024 13:17:28 -0700 Subject: [PATCH 4/6] Python client installation tweaks --- Dockerfile.sdk | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 61976e5d3d..d307832043 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -126,16 +126,19 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ - -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ + -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \ -DTRITON_ENABLE_JAVA_HTTP=ON \ -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client -RUN make -j16 cc-clients python-clients java-clients && \ +RUN make -j16 cc-clients java-clients && \ rm -fr ~/.m2 -# TODO: PA will build the CC clients since it depends on it. +# TODO: PA will rebuild the CC clients since it depends on it. # This should be optimized so that we do not have to build -# the CC clients twice. +# the CC clients twice. Similarly, because the SDK expectation is +# that PA is packaged with the python client, we hold off on building +# the python client until now. Post-migration we should focus +# effort on de-tangling these flows. WORKDIR /workspace/pa_build RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ @@ -150,7 +153,7 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_PACKAGE_PERF_ANALYZER=ON \ -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \ /workspace/perf_analyzer -RUN make -j16 perf-analyzer +RUN make -j16 perf-analyzer python-clients RUN pip3 install build \ && cd /workspace/perf_analyzer/genai-perf \ From 7e65b83b000ce08d6cdd76b64ad796a2f5776613 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 12 Jul 2024 14:02:22 -0700 Subject: [PATCH 5/6] Add client repo tag for PA --- Dockerfile.sdk | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index d307832043..441fa6f0f3 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -35,6 +35,7 @@ ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo ARG TRITON_COMMON_REPO_TAG=main ARG TRITON_CORE_REPO_TAG=main +ARG TRITON_CLIENT_REPO_TAG=main ARG TRITON_THIRD_PARTY_REPO_TAG=main ARG TRITON_MODEL_ANALYZER_REPO_TAG=main ARG TRITON_ENABLE_GPU=ON @@ -125,6 +126,7 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ + -DTRITON_ENABLE_PERF_ANALYZER=OFF \ -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \ -DTRITON_ENABLE_JAVA_HTTP=ON \ @@ -142,6 +144,10 @@ RUN make -j16 cc-clients java-clients && \ WORKDIR /workspace/pa_build RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ + -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ + -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ + -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ + -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \ -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \ From 4d55a998870fc5536e786d72f34bd79eaa97fb61 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 12 Jul 2024 15:21:15 -0700 Subject: [PATCH 6/6] Extend arg --- Dockerfile.sdk | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 441fa6f0f3..3f48dab9b1 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -108,6 +108,7 @@ ARG TRITON_CLIENT_REPO_SUBDIR ARG TRITON_PA_REPO_SUBDIR ARG TRITON_COMMON_REPO_TAG ARG TRITON_CORE_REPO_TAG +ARG TRITON_CLIENT_REPO_TAG ARG TRITON_THIRD_PARTY_REPO_TAG ARG TRITON_ENABLE_GPU ARG JAVA_BINDINGS_MAVEN_VERSION