Skip to content

Commit

Permalink
adding graviton docker image release (#3313)
Browse files Browse the repository at this point in the history
* adding graviton docker image

* testing multiplatform ci

* testing multiplatform ci

* testing multiplatform ci

* adding new builder

* removing arm

* removing arm

* testing arm

* tests

* testing driver command

* testing driver command

* testing on newer instance

* testing on newer instance

* testing newer

* rm command

* changing platform

* testing only amd

* testing both arch

* testing both arch

* testing both

* remove builder

* remove builder

* adding amd

* building cache

* cache 3

* cache 4

* cache 4

* final test

* reverting temp changes

* testing official release

* testing official release

* testing official release

* adding kserve changes

* kserve nightly

---------

Co-authored-by: Ankith Gunapal <[email protected]>
  • Loading branch information
udaij12 and agunapal authored Sep 17, 2024
1 parent 646862e commit ba8c268
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 27 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/docker-nightly-build.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name: Push Docker Nightly

on:
# run every day at 1:15pm
# Run every day at 1:15pm
schedule:
- cron: "15 13 * * *"
workflow_dispatch:

jobs:
nightly:
runs-on: [self-hosted, ci-gpu]
Expand Down Expand Up @@ -32,12 +33,14 @@ jobs:
- name: Push Docker Nightly
run: |
cd docker
sudo apt-get update
docker buildx use multibuilder
python docker_nightly.py --cleanup
- name: Push KServe Docker Nightly
run: |
cd kubernetes/kserve
docker buildx use multibuilder
python docker_nightly.py --cleanup
- name: Open issue on failure
if: ${{ failure() && github.event_name == 'schedule' }}
uses: dacbd/create-issue-action@v1
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/official_release_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ jobs:
if: github.event.inputs.upload_docker == 'yes'
run: |
cd docker
docker buildx use multibuilder
python build_upload_release.py --cleanup
- name: Build & Upload pytorch/torchserve-kfs Docker images
if: github.event.inputs.upload_kfs == 'yes'
run: |
cd kubernetes/kserve
docker buildx use multibuilder
python build_upload_release.py --cleanup
23 changes: 17 additions & 6 deletions docker/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ BUILD_NIGHTLY=false
BUILD_FROM_SRC=false
LOCAL_CHANGES=true
PYTHON_VERSION=3.9
ARCH="linux/arm64,linux/amd64"
MULTI=false

for arg in "$@"
do
Expand Down Expand Up @@ -101,6 +103,10 @@ do
BUILD_CPP=true
shift
;;
-m|--multi)
MULTI=true
shift
;;
-n|--nightly)
BUILD_NIGHTLY=true
shift
Expand Down Expand Up @@ -214,12 +220,17 @@ then
fi
fi

if [ "${BUILD_TYPE}" == "production" ]
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target production-image ../
elif [ "${BUILD_TYPE}" == "ci" ]
if [ "${BUILD_TYPE}" == "production" ]; then
if [ "${MULTI}" == "true" ]; then
DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --platform "${ARCH}" --target production-image ../ --push
else
DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target production-image ../ --load
fi
elif [ "${BUILD_TYPE}" == "ci" ];
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
Expand Down
12 changes: 6 additions & 6 deletions docker/build_upload_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
organization = args.organization

# Upload pytorch/torchserve docker binaries
try_and_handle(f"./build_image.sh -t {organization}/torchserve:latest", dry_run)
try_and_handle(f"./build_image.sh -m -t {organization}/torchserve:latest", dry_run)
try_and_handle(
f"./build_image.sh -g -cv cu121 -t {organization}/torchserve:latest-gpu",
dry_run,
Expand All @@ -44,14 +44,17 @@
f"./build_image.sh -bt dev -g -cv cu121 -cpp -t {organization}/torchserve:latest-cpp-dev-gpu",
dry_run,
)

try_and_handle(
f"docker tag {organization}/torchserve:latest {organization}/torchserve:latest-cpu",
f"docker buildx imagetools create --tag {organization}/torchserve:latest-cpu {organization}/torchserve:latest",
dry_run,
)

try_and_handle(
f"docker tag {organization}/torchserve:latest {organization}/torchserve:{check_ts_version()}-cpu",
f"docker buildx imagetools create --tag {organization}/torchserve:{check_ts_version()}-cpu {organization}/torchserve:latest",
dry_run,
)

try_and_handle(
f"docker tag {organization}/torchserve:latest-gpu {organization}/torchserve:{check_ts_version()}-gpu",
dry_run,
Expand All @@ -66,12 +69,9 @@
)

for image in [
f"{organization}/torchserve:latest",
f"{organization}/torchserve:latest-cpu",
f"{organization}/torchserve:latest-gpu",
f"{organization}/torchserve:latest-cpp-dev-cpu",
f"{organization}/torchserve:latest-cpp-dev-gpu",
f"{organization}/torchserve:{check_ts_version()}-cpu",
f"{organization}/torchserve:{check_ts_version()}-gpu",
f"{organization}/torchserve:{check_ts_version()}-cpp-dev-cpu",
f"{organization}/torchserve:{check_ts_version()}-cpp-dev-gpu",
Expand Down
9 changes: 3 additions & 6 deletions docker/docker_nightly.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
cpp_dev_gpu_version = f"{project}:cpp-dev-gpu-{get_nightly_version()}"

# Build Nightly images and append the date in the name
try_and_handle(f"./build_image.sh -n -t {organization}/{cpu_version}", dry_run)
try_and_handle(f"./build_image.sh -m -n -t {organization}/{cpu_version}", dry_run)
try_and_handle(
f"./build_image.sh -g -cv cu121 -n -t {organization}/{gpu_version}",
dry_run,
Expand All @@ -54,18 +54,17 @@
)

# Push Nightly images to official PyTorch Dockerhub account
try_and_handle(f"docker push {organization}/{cpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{gpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{cpp_dev_cpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{cpp_dev_gpu_version}", dry_run)

# Tag nightly images with latest
try_and_handle(
f"docker tag {organization}/{cpu_version} {organization}/{project}:latest-cpu",
f"docker buildx imagetools create --tag {organization}/{project}:latest-cpu {organization}/{cpu_version}",
dry_run,
)
try_and_handle(
f"docker tag {organization}/{gpu_version} {organization}/{project}:latest-gpu",
f"docker buildx imagetools create --tag {organization}/{project}:latest-gpu {organization}/{gpu_version}",
dry_run,
)
try_and_handle(
Expand All @@ -78,8 +77,6 @@
)

# Push images with latest tag
try_and_handle(f"docker push {organization}/{project}:latest-cpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-gpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-cpp-dev-cpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-cpp-dev-gpu", dry_run)

Expand Down
12 changes: 11 additions & 1 deletion kubernetes/kserve/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ BASE_IMAGE="pytorch/torchserve:latest-cpu"
DOCKER_FILE="Dockerfile"
BUILD_NIGHTLY=false
USE_CUSTOM_TAG=false
ARCH="linux/arm64,linux/amd64"
MULTI=false

for arg in "$@"
do
Expand Down Expand Up @@ -38,6 +40,10 @@ do
shift
shift
;;
-m|--multi)
MULTI=true
shift
;;
esac
done

Expand All @@ -57,4 +63,8 @@ fi
cp ../../frontend/server/src/main/resources/proto/*.proto .
cp -r ../../third_party .

DOCKER_BUILDKIT=1 docker build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" .
if [ "${MULTI}" == "true" ]; then
DOCKER_BUILDKIT=1 docker buildx build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE --platform "${ARCH}" -t "$DOCKER_TAG" --push
else
DOCKER_BUILDKIT=1 docker buildx build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" --load
fi
3 changes: 1 addition & 2 deletions kubernetes/kserve/build_upload_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
organization = args.organization

try_and_handle(
f"./build_image.sh -t {organization}/torchserve-kfs:{check_ts_version()}",
f"./build_image.sh -m -t {organization}/torchserve-kfs:{check_ts_version()}",
dry_run,
)
try_and_handle(
Expand All @@ -40,7 +40,6 @@
)

for image in [
f"{organization}/torchserve-kfs:{check_ts_version()}",
f"{organization}/torchserve-kfs:{check_ts_version()}-gpu",
]:
try_and_handle(f"docker push {image}", dry_run)
Expand Down
6 changes: 2 additions & 4 deletions kubernetes/kserve/docker_nightly.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,18 @@
gpu_version = f"{project}:gpu-{get_nightly_version()}"

# Build Nightly images and append the date in the name
try_and_handle(f"./build_image.sh -n -t {organization}/{cpu_version}", dry_run)
try_and_handle(f"./build_image.sh -m -n -t {organization}/{cpu_version}", dry_run)
try_and_handle(
f"./build_image.sh -g -n -t {organization}/{gpu_version}",
dry_run,
)

# Push Nightly images to official PyTorch Dockerhub account
try_and_handle(f"docker push {organization}/{cpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{gpu_version}", dry_run)

# Tag nightly images with latest
try_and_handle(
f"docker tag {organization}/{cpu_version} {organization}/{project}:latest-cpu",
f"docker buildx imagetools create --tag {organization}/{project}:latest-cpu {organization}/{cpu_version}",
dry_run,
)
try_and_handle(
Expand All @@ -58,7 +57,6 @@
)

# Push images with latest tag
try_and_handle(f"docker push {organization}/{project}:latest-cpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-gpu", dry_run)

# Cleanup built images
Expand Down

0 comments on commit ba8c268

Please sign in to comment.