diff --git a/.github/dependabot.yml b/.github/dependabot.yml
deleted file mode 100644
index 1a8098071ba3..000000000000
--- a/.github/dependabot.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-# To get started with Dependabot version updates, you'll need to specify which
-# package ecosystems to update and where the package manifests are located.
-# Please see the documentation for all configuration options:
-# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
-
-version: 2
-updates:
- - package-ecosystem: "maven"
- directory: "/jvm-packages"
- schedule:
- interval: "monthly"
- - package-ecosystem: "maven"
- directory: "/jvm-packages/xgboost4j"
- schedule:
- interval: "monthly"
- - package-ecosystem: "maven"
- directory: "/jvm-packages/xgboost4j-gpu"
- schedule:
- interval: "monthly"
- - package-ecosystem: "maven"
- directory: "/jvm-packages/xgboost4j-example"
- schedule:
- interval: "monthly"
- - package-ecosystem: "maven"
- directory: "/jvm-packages/xgboost4j-spark"
- schedule:
- interval: "monthly"
- - package-ecosystem: "maven"
- directory: "/jvm-packages/xgboost4j-spark-gpu"
- schedule:
- interval: "monthly"
- - package-ecosystem: "github-actions"
- directory: /
- schedule:
- interval: "monthly"
diff --git a/.github/runs-on.yml b/.github/runs-on.yml
index d951a08e8273..e21895ee8c3b 100644
--- a/.github/runs-on.yml
+++ b/.github/runs-on.yml
@@ -34,4 +34,3 @@ runners:
cpu: 32
family: ["c7i-flex", "c7i", "c7a", "c5", "c5a"]
image: windows-amd64
-
diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml
index d3208a1294d1..d0eb13c20fb6 100644
--- a/.github/workflows/freebsd.yml
+++ b/.github/workflows/freebsd.yml
@@ -15,20 +15,20 @@ jobs:
timeout-minutes: 20
name: A job to run test in FreeBSD
steps:
- - uses: actions/checkout@v4
- with:
- submodules: 'true'
- - name: Test in FreeBSD
- id: test
- uses: vmactions/freebsd-vm@v1
- with:
- usesh: true
- prepare: |
- pkg install -y cmake git ninja googletest
+ - uses: actions/checkout@v4
+ with:
+ submodules: 'true'
+ - name: Test in FreeBSD
+ id: test
+ uses: vmactions/freebsd-vm@v1
+ with:
+ usesh: true
+ prepare: |
+ pkg install -y cmake git ninja googletest
- run: |
- mkdir build
- cd build
- cmake .. -GNinja -DGOOGLE_TEST=ON
- ninja -v
- ./testxgboost
+ run: |
+ mkdir build
+ cd build
+ cmake .. -GNinja -DGOOGLE_TEST=ON
+ ninja -v
+ ./testxgboost
diff --git a/.github/workflows/i386.yml b/.github/workflows/i386.yml
index aec7e9d31087..455d6ea91033 100644
--- a/.github/workflows/i386.yml
+++ b/.github/workflows/i386.yml
@@ -19,25 +19,25 @@ jobs:
ports:
- 5000:5000
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v3.7.1
- with:
- driver-opts: network=host
- - name: Build and push container
- uses: docker/build-push-action@v6
- with:
- context: .
- file: tests/ci_build/Dockerfile.i386
- push: true
- tags: localhost:5000/xgboost/build-32bit:latest
- cache-from: type=gha
- cache-to: type=gha,mode=max
- - name: Build XGBoost
- run: |
- docker run --rm -v $PWD:/workspace -w /workspace \
- -e CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move' \
- localhost:5000/xgboost/build-32bit:latest \
- tests/ci_build/build_via_cmake.sh
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3.7.1
+ with:
+ driver-opts: network=host
+ - name: Build and push container
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ops/docker/dockerfile/Dockerfile.i386
+ push: true
+ tags: localhost:5000/xgboost/build-32bit:latest
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ - name: Build XGBoost
+ run: |
+ docker run --rm -v $PWD:/workspace -w /workspace \
+ -e CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move' \
+ localhost:5000/xgboost/build-32bit:latest \
+ bash ops/script/build_via_cmake.sh
diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
index dcbd9de55b50..549094d52e37 100644
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -1,100 +1,284 @@
-name: XGBoost-JVM-Tests
+name: XGBoost CI (JVM packages)
on: [push, pull_request]
permissions:
- contents: read # to fetch code (actions/checkout)
+ contents: read # to fetch code (actions/checkout)
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+ USE_DOCKER_CACHE: 1
+
jobs:
- test-with-jvm:
- name: Test JVM on OS ${{ matrix.os }}
+ build-containers:
+ name: Build CI containers (${{ matrix.container_id }})
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=${{ matrix.runner }}
+ strategy:
+ max-parallel: 2
+ matrix:
+ container_id:
+ - xgb-ci.manylinux2014_x86_64
+ - xgb-ci.jvm
+ - xgb-ci.jvm_gpu_build
+ runner: [linux-amd64-cpu]
+ include:
+ - container_id: xgb-ci.manylinux2014_aarch64
+ runner: linux-arm64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Build ${{ matrix.container_id }}
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: ${{ matrix.container_id }}
+
+ build-jvm-manylinux2014:
+ name: >-
+ Build libxgboost4j.so targeting glibc 2.17
+ (arch ${{ matrix.arch }}, runner ${{ matrix.runner }})
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=${{ matrix.runner }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - arch: aarch64
+ runner: linux-arm64-cpu
+ - arch: x86_64
+ runner: linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.manylinux2014_${{ matrix.arch }}
+ - run: bash ops/pipeline/build-jvm-manylinux2014.sh ${{ matrix.arch }}
+
+ build-jvm-gpu:
+ name: Build libxgboost4j.so with CUDA
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.jvm_gpu_build
+ - run: bash ops/pipeline/build-jvm-gpu.sh
+ - name: Stash files
+ run: bash ops/stash_artifacts.sh lib/libxgboost4j.so
+ env:
+ COMMAND: upload
+ KEY: build-jvm-gpu
+
+ build-jvm-mac:
+ name: "Build libxgboost4j.dylib for ${{ matrix.description }}"
+ runs-on: ${{ matrix.runner }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - description: "MacOS (Apple Silicon)"
+ script: ops/pipeline/build-jvm-macos-apple-silicon.sh
+ runner: macos-14
+ - description: "MacOS (Intel)"
+ script: ops/pipeline/build-jvm-macos-intel.sh
+ runner: macos-13
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - run: bash ${{ matrix.script }}
+ env:
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
+
+ build-jvm-docs:
+ name: Build docs for JVM packages
+ needs: [build-jvm-gpu]
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.jvm_gpu_build
+ - name: Unstash files
+ run: bash ops/stash_artifacts.sh lib/libxgboost4j.so
+ env:
+ COMMAND: download
+ KEY: build-jvm-gpu
+ - run: bash ops/pipeline/build-jvm-doc.sh
+
+ build-test-jvm-packages:
+ name: Build and test JVM packages (Linux)
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.jvm
+ - name: Build and test JVM packages (Scala 2.12)
+ run: bash ops/pipeline/build-test-jvm-packages.sh
+ env:
+ SCALA_VERSION: 2.12
+ - name: Build and test JVM packages (Scala 2.13)
+ run: bash ops/pipeline/build-test-jvm-packages.sh
+ env:
+ SCALA_VERSION: 2.13
+ - name: Stash files
+ run: bash ops/stash_artifacts.sh lib/libxgboost4j.so
+ env:
+ COMMAND: upload
+ KEY: build-test-jvm-packages
+
+ build-test-jvm-packages-other-os:
+ name: Build and test JVM packages (${{ matrix.os }})
timeout-minutes: 30
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
- os: [windows-latest, ubuntu-latest, macos-13]
+ os: [windows-latest, macos-13]
+
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+
+ - uses: actions/setup-java@v4.5.0
+ with:
+ distribution: 'temurin'
+ java-version: '8'
+
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: jvm_tests
+ environment-file: ops/conda_env/jvm_tests.yml
+ use-mamba: true
+ - name: Cache Maven packages
+ uses: actions/cache@v4.1.2
+ with:
+ path: ~/.m2
+ key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
+ restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
+
+ - name: Test XGBoost4J (Core)
+ run: |
+ cd jvm-packages
+ mvn test -B -pl :xgboost4j_2.12
+
+ - name: Publish artifact xgboost4j.dll to S3
+ run: |
+ cd lib/
+ Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
+ dir
+ python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll `
+ s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/libxgboost4j/ `
+ --acl public-read --region us-west-2
+ if: |
+ (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
+ matrix.os == 'windows-latest'
+ env:
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
+
+ test-jvm-packages-gpu:
+ name: Test JVM packages with CUDA
+ needs: [build-jvm-gpu]
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-mgpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.jvm_gpu_build
+ - name: Unstash files
+ run: bash ops/stash_artifacts.sh lib/libxgboost4j.so
+ env:
+ COMMAND: download
+ KEY: build-jvm-gpu
+ - run: bash ops/pipeline/test-jvm-gpu.sh
+
+ deploy-jvm-packages:
+ name: Deploy JVM packages to S3 (${{ matrix.variant }})
+ needs: [build-jvm-gpu, build-test-jvm-packages, test-jvm-packages-gpu]
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - variant: cpu
+ container_id: xgb-ci.jvm
+ artifact_from: build-test-jvm-packages
+ - variant: gpu
+ container_id: xgb-ci.jvm_gpu_build
+ artifact_from: build-jvm-gpu
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - uses: actions/setup-java@b36c23c0d998641eff861008f374ee103c25ac73 # v4.4.0
- with:
- distribution: 'temurin'
- java-version: '8'
-
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: jvm_tests
- environment-file: tests/ci_build/conda_env/jvm_tests.yml
- use-mamba: true
-
- - name: Cache Maven packages
- uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
- with:
- path: ~/.m2
- key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
- restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
-
- - name: Test XGBoost4J (Core)
- run: |
- cd jvm-packages
- mvn test -B -pl :xgboost4j_2.12
-
- - name: Test XGBoost4J (Core, Spark, Examples)
- run: |
- rm -rfv build/
- cd jvm-packages
- mvn -B test
- if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
-
- - name: Extract branch name
- shell: bash
- run: |
- echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
- id: extract_branch
- if: |
- (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
- (matrix.os == 'windows-latest' || matrix.os == 'macos-13')
-
- - name: Publish artifact xgboost4j.dll to S3
- run: |
- cd lib/
- Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
- dir
- python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
- if: |
- (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
- matrix.os == 'windows-latest'
- env:
- AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
-
- - name: Publish artifact libxgboost4j.dylib to S3
- shell: bash -l {0}
- run: |
- cd lib/
- mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
- ls
- python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
- if: |
- (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
- matrix.os == 'macos-13'
- env:
- AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
-
- - name: Build and Test XGBoost4J with scala 2.13
- run: |
- rm -rfv build/
- cd jvm-packages
- mvn -B clean install test -Pdefault,scala-2.13
- if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: ${{ matrix.container_id }}
+ - name: Unstash files
+ run: |
+ bash ops/stash_artifacts.sh lib/libxgboost4j.so
+ ls -lh lib/libxgboost4j.so
+ env:
+ COMMAND: download
+ KEY: ${{ matrix.artifact_from }}
+ - name: Deploy JVM packages to S3
+ run: >-
+ bash ops/pipeline/deploy-jvm-packages.sh ${{ matrix.variant }} \
+ ${{ matrix.container_id }}
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 000000000000..70d892b1061d
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,131 @@
+name: XGBoost CI (Lint)
+
+on: [push, pull_request]
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+
+jobs:
+ build-containers:
+ name: Build CI containers
+ env:
+ CONTAINER_ID: xgb-ci.clang_tidy
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Build ${{ env.CONTAINER_ID }}
+ run: bash ops/docker_build.sh
+
+ clang-tidy:
+ name: Run clang-tidy
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.clang_tidy
+ - run: bash ops/pipeline/run-clang-tidy.sh
+
+ python-mypy-lint:
+ runs-on: ubuntu-latest
+ name: Type and format checks for the Python package
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: python_lint
+ environment-file: ops/conda_env/python_lint.yml
+ use-mamba: true
+ - name: Display Conda env
+ shell: bash -el {0}
+ run: |
+ conda info
+ conda list
+ - name: Run mypy
+ shell: bash -el {0}
+ run: |
+ python ops/script/lint_python.py --format=0 --type-check=1 --pylint=0
+ - name: Run formatter
+ shell: bash -el {0}
+ run: |
+ python ops/script/lint_python.py --format=1 --type-check=0 --pylint=0
+ - name: Run pylint
+ shell: bash -el {0}
+ run: |
+ python ops/script/lint_python.py --format=0 --type-check=0 --pylint=1
+
+ cpp-lint:
+ runs-on: ubuntu-latest
+ name: Code linting for C++
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - uses: actions/setup-python@v5.3.0
+ with:
+ python-version: "3.10"
+ architecture: 'x64'
+ - name: Install Python packages
+ run: |
+ python -m pip install wheel setuptools cmakelint cpplint==1.6.1 pylint
+ - name: Run lint
+ run: |
+ python3 ops/script/lint_cpp.py
+ bash ops/script/lint_cmake.sh
+
+ lintr:
+ runs-on: ubuntu-latest
+ name: Run R linters on Ubuntu
+ env:
+ R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+
+ - uses: r-lib/actions/setup-r@v2.11.0
+ with:
+ r-version: "release"
+
+ - name: Cache R packages
+ uses: actions/cache@v4.1.2
+ with:
+ path: ${{ env.R_LIBS_USER }}
+ key: ${{ runner.os }}-r-release-7-${{ hashFiles('R-package/DESCRIPTION') }}
+ restore-keys: ${{ runner.os }}-r-release-7-${{ hashFiles('R-package/DESCRIPTION') }}
+
+ - name: Install dependencies
+ shell: Rscript {0}
+ run: |
+ source("./R-package/tests/helper_scripts/install_deps.R")
+
+ - name: Run lintr
+ run: |
+ MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
+ Rscript ops/script/lint_r.R $(pwd)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3c0a67b4f463..15822c55f0d5 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,193 +1,297 @@
-# This is a basic workflow to help you get started with Actions
+name: XGBoost CI
-name: XGBoost-CI
-
-# Controls when the action will run. Triggers the workflow on push or pull request
-# events but only for the master branch
on: [push, pull_request]
permissions:
- contents: read # to fetch code (actions/checkout)
+ contents: read # to fetch code (actions/checkout)
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+ USE_DOCKER_CACHE: 1
+
jobs:
- gtest-cpu:
- name: Test Google C++ test (CPU)
- runs-on: ${{ matrix.os }}
+ build-containers:
+ name: Build CI containers (${{ matrix.container_id }})
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=${{ matrix.runner }}
strategy:
- fail-fast: false
+ max-parallel: 2
matrix:
- os: [macos-12]
+ container_id:
+ - xgb-ci.gpu_build_rockylinux8
+ - xgb-ci.gpu_build_r_rockylinux8
+ - xgb-ci.gpu
+ - xgb-ci.gpu_dev_ver
+ - xgb-ci.cpu
+ - xgb-ci.manylinux_2_28_x86_64
+ - xgb-ci.manylinux2014_x86_64
+ runner: [linux-amd64-cpu]
+ include:
+ - container_id: xgb-ci.manylinux2014_aarch64
+ runner: linux-arm64-cpu
+ - container_id: xgb-ci.aarch64
+ runner: linux-arm64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Build ${{ matrix.container_id }}
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: ${{ matrix.container_id }}
+
+ build-cpu:
+ name: Build CPU
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.cpu
+ - run: bash ops/pipeline/build-cpu.sh
+ - name: Stash CLI executable
+ run: bash ops/stash_artifacts.sh ./xgboost
+ env:
+ COMMAND: upload
+ KEY: build-cpu
+
+ build-cpu-arm64:
+ name: Build CPU ARM64 + manylinux_2_28_aarch64 wheel
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-arm64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.aarch64
+ - run: bash ops/pipeline/build-cpu-arm64.sh
+ - name: Stash files
+ run: bash ops/stash_artifacts.sh ./xgboost python-package/dist/*.whl
+ env:
+ COMMAND: upload
+ KEY: build-cpu-arm64
+
+ build-cuda:
+ name: Build CUDA + manylinux_2_28_x86_64 wheel
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - name: Install system packages
- run: |
- brew install ninja libomp
- - name: Build gtest binary
- run: |
- mkdir build
- cd build
- cmake .. -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja -DBUILD_DEPRECATED_CLI=ON -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=address -DCMAKE_BUILD_TYPE=RelWithDebInfo
- ninja -v
- - name: Run gtest binary
- run: |
- cd build
- ./testxgboost
- ctest -R TestXGBoostCLI --extra-verbose
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.gpu_build_rockylinux8
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.manylinux_2_28_x86_64
+ - run: bash ops/pipeline/build-cuda.sh
+ - name: Stash files
+ run: |
+ bash ops/stash_artifacts.sh \
+ build/testxgboost ./xgboost python-package/dist/*.whl
+ env:
+ COMMAND: upload
+ KEY: build-cuda
- gtest-cpu-nonomp:
- name: Test Google C++ unittest (CPU Non-OMP)
- runs-on: ${{ matrix.os }}
+ build-cuda-with-rmm:
+ name: Build CUDA with RMM
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.gpu_build_rockylinux8
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.manylinux_2_28_x86_64
+ - run: bash ops/pipeline/build-cuda-with-rmm.sh
+ - name: Stash files
+ run: bash ops/stash_artifacts.sh build/testxgboost
+ env:
+ COMMAND: upload
+ KEY: build-cuda-with-rmm
+
+ build-manylinux2014:
+ name: Build manylinux2014_${{ matrix.arch }} wheel
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest]
+ include:
+ - arch: aarch64
+ runner: linux-arm64-cpu
+ - arch: x86_64
+ runner: linux-amd64-cpu
+ steps:
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.manylinux2014_${{ matrix.arch }}
+ - run: bash ops/pipeline/build-manylinux2014.sh ${{ matrix.arch }}
+
+ build-gpu-rpkg:
+ name: Build GPU-enabled R package
+ needs: build-containers
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=linux-amd64-cpu
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - name: Install system packages
- run: |
- sudo apt-get install -y --no-install-recommends ninja-build
- - name: Build and install XGBoost
- shell: bash -l {0}
- run: |
- mkdir build
- cd build
- cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_OPENMP=OFF -DBUILD_DEPRECATED_CLI=ON
- ninja -v
- - name: Run gtest binary
- run: |
- cd build
- ctest --extra-verbose
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.gpu_build_r_rockylinux8
+ - run: bash ops/pipeline/build-gpu-rpkg.sh
- gtest-cpu-sycl:
- name: Test Google C++ unittest (CPU SYCL)
- runs-on: ${{ matrix.os }}
+ test-cpp-gpu:
+ name: >-
+ Run Google Tests with GPUs
+ (Suite ${{ matrix.suite }}, Runner ${{ matrix.runner }})
+ needs: [build-cuda, build-cuda-with-rmm]
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=${{ matrix.runner }}
strategy:
fail-fast: false
+ max-parallel: 2
matrix:
- os: [ubuntu-latest]
- python-version: ["3.10"]
+ include:
+ - suite: gpu
+ runner: linux-amd64-gpu
+ artifact_from: build-cuda
+ - suite: gpu-rmm
+ runner: linux-amd64-gpu
+ artifact_from: build-cuda-with-rmm
+ - suite: mgpu
+ runner: linux-amd64-mgpu
+ artifact_from: build-cuda
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: linux_sycl_test
- environment-file: tests/ci_build/conda_env/linux_sycl_test.yml
- use-mamba: true
- - name: Display Conda env
- run: |
- conda info
- conda list
- - name: Build and install XGBoost
- shell: bash -l {0}
- run: |
- mkdir build
- cd build
- cmake .. -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX
- make -j$(nproc)
- - name: Run gtest binary for SYCL
- run: |
- cd build
- ./testxgboost --gtest_filter=Sycl*
- - name: Run gtest binary for non SYCL
- run: |
- cd build
- ./testxgboost --gtest_filter=-Sycl*
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: xgb-ci.gpu
+ - name: Unstash gtest
+ run: |
+ bash ops/stash_artifacts.sh build/testxgboost
+ chmod +x build/testxgboost
+ env:
+ COMMAND: download
+ KEY: ${{ matrix.artifact_from }}
+ - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }}
- c-api-demo:
- name: Test installing XGBoost lib + building the C API demo
- runs-on: ${{ matrix.os }}
- defaults:
- run:
- shell: bash -l {0}
+ test-python:
+ name: Run Python tests (${{ matrix.description }})
+ needs: [build-cuda, build-cpu-arm64]
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=${{ matrix.runner }}
strategy:
fail-fast: false
+ max-parallel: 2
matrix:
- os: ["ubuntu-latest"]
- python-version: ["3.10"]
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: cpp_test
- environment-file: tests/ci_build/conda_env/cpp_test.yml
- use-mamba: true
- - name: Display Conda env
- run: |
- conda info
- conda list
-
- - name: Build and install XGBoost static library
- run: |
- mkdir build
- cd build
- cmake .. -DBUILD_STATIC_LIB=ON -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja
- ninja -v install
- cd -
- - name: Build and run C API demo with static
- run: |
- pushd .
- cd demo/c-api/
- mkdir build
- cd build
- cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
- ninja -v
- ctest
- cd ..
- rm -rf ./build
- popd
-
- - name: Build and install XGBoost shared library
- run: |
- cd build
- cmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja -DPLUGIN_FEDERATED=ON -DGOOGLE_TEST=ON
- ninja -v install
- ./testxgboost
- cd -
- - name: Build and run C API demo with shared
- run: |
- pushd .
- cd demo/c-api/
- mkdir build
- cd build
- cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
- ninja -v
- ctest
- popd
- ./tests/ci_build/verify_link.sh ./demo/c-api/build/basic/api-demo
- ./tests/ci_build/verify_link.sh ./demo/c-api/build/external-memory/external-memory-demo
-
- cpp-lint:
- runs-on: ubuntu-latest
- name: Code linting for C++
+ include:
+ - description: "single GPU"
+ container: xgb-ci.gpu
+ suite: gpu
+ runner: linux-amd64-gpu
+ artifact_from: build-cuda
+ - description: "single GPU, nightly deps"
+ container: xgb-ci.gpu_dev_ver
+ suite: gpu
+ runner: linux-amd64-gpu
+ artifact_from: build-cuda
+ - description: "multiple GPUs"
+ container: xgb-ci.gpu
+ suite: mgpu
+ runner: linux-amd64-mgpu
+ artifact_from: build-cuda
+ - description: "multiple GPUs, nightly deps"
+ container: xgb-ci.gpu_dev_ver
+ suite: mgpu
+ runner: linux-amd64-mgpu
+ artifact_from: build-cuda
+ - description: "CPU"
+ container: xgb-ci.cpu
+ suite: cpu
+ runner: linux-amd64-cpu
+ artifact_from: build-cuda
+ - description: "CPU ARM64"
+ container: xgb-ci.aarch64
+ suite: cpu-arm64
+ runner: linux-arm64-cpu
+ artifact_from: build-cpu-arm64
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
- with:
- python-version: "3.10"
- architecture: 'x64'
- - name: Install Python packages
- run: |
- python -m pip install wheel setuptools cmakelint cpplint==1.6.1 pylint
- - name: Run lint
- run: |
- python3 tests/ci_build/lint_cpp.py
- sh ./tests/ci_build/lint_cmake.sh
+ # Restart Docker daemon so that it recognizes the ephemeral disks
+ - run: sudo systemctl restart docker
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Fetch container from cache
+ run: bash ops/docker_build.sh
+ env:
+ CONTAINER_ID: ${{ matrix.container }}
+ - name: Unstash Python wheel
+ run: |
+ bash ops/stash_artifacts.sh python-package/dist/*.whl ./xgboost
+ chmod +x ./xgboost
+ env:
+ COMMAND: download
+ KEY: ${{ matrix.artifact_from }}
+ - name: Run Python tests, ${{ matrix.description }}
+ run: bash ops/pipeline/test-python.sh ${{ matrix.suite }} ${{ matrix.container }}
diff --git a/.github/workflows/misc.yml b/.github/workflows/misc.yml
new file mode 100644
index 000000000000..1e6df46615d5
--- /dev/null
+++ b/.github/workflows/misc.yml
@@ -0,0 +1,120 @@
+name: XGBoost CI (misc)
+
+on: [push, pull_request]
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+
+jobs:
+ gtest-cpu:
+ name: Test Google C++ test (CPU)
+ runs-on: macos-13
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - name: Install system packages
+ run: |
+ brew install ninja libomp
+ - name: Build gtest binary
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja -DBUILD_DEPRECATED_CLI=ON -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=address -DCMAKE_BUILD_TYPE=RelWithDebInfo
+ ninja -v
+ - name: Run gtest binary
+ run: |
+ cd build
+ ./testxgboost
+ ctest -R TestXGBoostCLI --extra-verbose
+
+ gtest-cpu-nonomp:
+ name: Test Google C++ unittest (CPU Non-OMP)
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - name: Install system packages
+ run: |
+ sudo apt-get install -y --no-install-recommends ninja-build
+ - name: Build and install XGBoost
+ shell: bash -l {0}
+ run: |
+ mkdir build
+ cd build
+ cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_OPENMP=OFF -DBUILD_DEPRECATED_CLI=ON
+ ninja -v
+ - name: Run gtest binary
+ run: |
+ cd build
+ ctest --extra-verbose
+
+ c-api-demo:
+ name: Test installing XGBoost lib + building the C API demo
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ shell: bash -l {0}
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: cpp_test
+ environment-file: ops/conda_env/cpp_test.yml
+ use-mamba: true
+ - name: Display Conda env
+ run: |
+ conda info
+ conda list
+ - name: Build and install XGBoost static library
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DBUILD_STATIC_LIB=ON -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja
+ ninja -v install
+ cd -
+ - name: Build and run C API demo with static
+ run: |
+ pushd .
+ cd demo/c-api/
+ mkdir build
+ cd build
+ cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
+ ninja -v
+ ctest
+ cd ..
+ rm -rf ./build
+ popd
+
+ - name: Build and install XGBoost shared library
+ run: |
+ cd build
+ cmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja -DPLUGIN_FEDERATED=ON -DGOOGLE_TEST=ON
+ ninja -v install
+ ./testxgboost
+ cd -
+ - name: Build and run C API demo with shared
+ run: |
+ pushd .
+ cd demo/c-api/
+ mkdir build
+ cd build
+ cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
+ ninja -v
+ ctest
+ popd
+ ./ops/script/verify_link.sh ./demo/c-api/build/basic/api-demo
+ ./ops/script/verify_link.sh ./demo/c-api/build/external-memory/external-memory-demo
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 907cf98e1011..bcc0f5b8ba81 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -1,4 +1,4 @@
-name: XGBoost-Python-Tests
+name: XGBoost CI (Python tests)
on: [push, pull_request]
@@ -14,67 +14,32 @@ concurrency:
cancel-in-progress: true
jobs:
- python-mypy-lint:
- runs-on: ubuntu-latest
- name: Type and format checks for the Python package
- strategy:
- matrix:
- os: [ubuntu-latest]
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: python_lint
- environment-file: tests/ci_build/conda_env/python_lint.yml
- use-mamba: true
- - name: Display Conda env
- run: |
- conda info
- conda list
- - name: Run mypy
- run: |
- python tests/ci_build/lint_python.py --format=0 --type-check=1 --pylint=0
- - name: Run formatter
- run: |
- python tests/ci_build/lint_python.py --format=1 --type-check=0 --pylint=0
- - name: Run pylint
- run: |
- python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1
-
python-sdist-test-on-Linux:
- # Mismatched glibcxx version between system and conda forge.
- runs-on: ${{ matrix.os }}
- name: Test installing XGBoost Python source package on ${{ matrix.os }}
- strategy:
- matrix:
- os: [ubuntu-latest]
+ runs-on: ubuntu-latest
+ name: Test installing XGBoost Python source package
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: sdist_test
- environment-file: tests/ci_build/conda_env/sdist_test.yml
- use-mamba: true
- - name: Display Conda env
- run: |
- conda info
- conda list
- - name: Build and install XGBoost
- run: |
- cd python-package
- python --version
- python -m build --sdist
- pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False
- cd ..
- python -c 'import xgboost'
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: sdist_test
+ environment-file: ops/conda_env/sdist_test.yml
+ use-mamba: true
+ - name: Display Conda env
+ run: |
+ conda info
+ conda list
+ - name: Build and install XGBoost
+ run: |
+ cd python-package
+ python --version
+ python -m build --sdist
+ pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False
+ cd ..
+ python -c 'import xgboost'
python-sdist-test:
# Use system toolchain instead of conda toolchain for macos and windows.
@@ -82,244 +47,97 @@ jobs:
runs-on: ${{ matrix.os }}
name: Test installing XGBoost Python source package on ${{ matrix.os }}
strategy:
+ fail-fast: false
matrix:
os: [macos-13, windows-latest]
python-version: ["3.10"]
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - name: Install osx system dependencies
- if: matrix.os == 'macos-13'
- run: |
- brew install ninja libomp
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- auto-update-conda: true
- python-version: ${{ matrix.python-version }}
- activate-environment: test
- - name: Install build
- run: |
- conda install -c conda-forge python-build
- - name: Display Conda env
- run: |
- conda info
- conda list
- - name: Build and install XGBoost
- run: |
- cd python-package
- python --version
- python -m build --sdist
- pip install -v ./dist/xgboost-*.tar.gz
- cd ..
- python -c 'import xgboost'
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - name: Install osx system dependencies
+ if: matrix.os == 'macos-13'
+ run: |
+ brew install ninja libomp
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ auto-update-conda: true
+ python-version: ${{ matrix.python-version }}
+ activate-environment: test
+ - name: Install build
+ run: |
+ conda install -c conda-forge python-build
+ - name: Display Conda env
+ run: |
+ conda info
+ conda list
+ - name: Build and install XGBoost
+ run: |
+ cd python-package
+ python --version
+ python -m build --sdist
+ pip install -v ./dist/xgboost-*.tar.gz
+ cd ..
+ python -c 'import xgboost'
python-tests-on-macos:
- name: Test XGBoost Python package on ${{ matrix.config.os }}
- runs-on: ${{ matrix.config.os }}
+ name: Test XGBoost Python package on macos-13
+ runs-on: macos-13
timeout-minutes: 60
- strategy:
- matrix:
- config:
- - {os: macos-13}
-
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: macos_cpu_test
- environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
- use-mamba: true
-
- - name: Display Conda env
- run: |
- conda info
- conda list
-
- - name: Build XGBoost on macos
- run: |
- brew install ninja
-
- mkdir build
- cd build
- # Set prefix, to use OpenMP library from Conda env
- # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
- # to learn why we don't use libomp from Homebrew.
- cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON
- ninja
-
- - name: Install Python package
- run: |
- cd python-package
- python --version
- pip install -v .
-
- - name: Test Python package
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/python
-
- - name: Test Dask Interface
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask
-
- python-tests-on-win:
- name: Test XGBoost Python package on ${{ matrix.config.os }}
- runs-on: ${{ matrix.config.os }}
- timeout-minutes: 60
- strategy:
- matrix:
- config:
- - {os: windows-latest, python-version: '3.10'}
-
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- auto-update-conda: true
- python-version: ${{ matrix.config.python-version }}
- activate-environment: win64_env
- environment-file: tests/ci_build/conda_env/win64_cpu_test.yml
-
- - name: Display Conda env
- run: |
- conda info
- conda list
-
- - name: Build XGBoost on Windows
- run: |
- mkdir build_msvc
- cd build_msvc
- cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DBUILD_DEPRECATED_CLI=ON
- cmake --build . --config Release --parallel $(nproc)
-
- - name: Install Python package
- run: |
- cd python-package
- python --version
- pip wheel -v . --wheel-dir dist/
- pip install ./dist/*.whl
-
- - name: Test Python package
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/python
-
- python-tests-on-ubuntu:
- name: Test XGBoost Python package on ${{ matrix.config.os }}
- runs-on: ${{ matrix.config.os }}
- timeout-minutes: 90
- strategy:
- matrix:
- config:
- - {os: ubuntu-latest, python-version: "3.10"}
-
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: linux_cpu_test
- environment-file: tests/ci_build/conda_env/linux_cpu_test.yml
- use-mamba: true
-
- - name: Display Conda env
- run: |
- conda info
- conda list
-
- - name: Build XGBoost on Ubuntu
- run: |
- mkdir build
- cd build
- cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON
- ninja
-
- - name: Install Python package
- run: |
- cd python-package
- python --version
- pip install -v .
-
- - name: Test Python package
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/python
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
- - name: Test Dask Interface
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: macos_cpu_test
+ environment-file: ops/conda_env/macos_cpu_test.yml
+ use-mamba: true
- - name: Test PySpark Interface
- shell: bash -l {0}
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_spark
+ - name: Display Conda env
+ run: |
+ conda info
+ conda list
- python-sycl-tests-on-ubuntu:
- name: Test XGBoost Python package with SYCL on ${{ matrix.config.os }}
- runs-on: ${{ matrix.config.os }}
- timeout-minutes: 90
- strategy:
- matrix:
- config:
- - {os: ubuntu-latest, python-version: "3.10"}
+ - name: Build XGBoost on macos
+ run: |
+ brew install ninja
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
+ mkdir build
+ cd build
+ # Set prefix, to use OpenMP library from Conda env
+ # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
+ # to learn why we don't use libomp from Homebrew.
+ cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON
+ ninja
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- activate-environment: linux_sycl_test
- environment-file: tests/ci_build/conda_env/linux_sycl_test.yml
- use-mamba: true
+ - name: Install Python package
+ run: |
+ cd python-package
+ python --version
+ pip install -v .
- - name: Display Conda env
- run: |
- conda info
- conda list
- - name: Build XGBoost on Ubuntu
- run: |
- mkdir build
- cd build
- cmake .. -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
- make -j$(nproc)
- - name: Install Python package
- run: |
- cd python-package
- python --version
- pip install -v .
- - name: Test Python package
- run: |
- pytest -s -v -rxXs --durations=0 ./tests/python-sycl/
+ - name: Test Python package
+ run: |
+ pytest -s -v -rxXs --durations=0 ./tests/python
+ - name: Test Dask Interface
+ run: |
+ pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask
python-system-installation-on-ubuntu:
- name: Test XGBoost Python package System Installation on ${{ matrix.os }}
- runs-on: ${{ matrix.os }}
- strategy:
- matrix:
- os: [ubuntu-latest]
-
+ name: Test XGBoost Python package System Installation on Ubuntu
+ runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ - uses: actions/checkout@v4.2.2
with:
submodules: 'true'
- name: Set up Python 3.10
- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+ uses: actions/setup-python@v5.3.0
with:
python-version: "3.10"
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
deleted file mode 100644
index 3b7a8072c109..000000000000
--- a/.github/workflows/python_wheels.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-name: XGBoost-Python-Wheels
-
-on: [push, pull_request]
-
-permissions:
- contents: read # to fetch code (actions/checkout)
-
-defaults:
- run:
- shell: bash -l {0}
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
- cancel-in-progress: true
-
-jobs:
- python-wheels:
- name: Build wheel for ${{ matrix.platform_id }}
- runs-on: ${{ matrix.os }}
- strategy:
- matrix:
- include:
- - os: macos-13
- platform_id: macosx_x86_64
- - os: macos-14
- platform_id: macosx_arm64
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - name: Set up homebrew
- uses: Homebrew/actions/setup-homebrew@68fa6aeb1ccb0596d311f2b34ec74ec21ee68e54
- - name: Install libomp
- run: brew install libomp
- - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
- with:
- miniforge-variant: Miniforge3
- miniforge-version: latest
- python-version: "3.10"
- use-mamba: true
- - name: Build wheels
- run: bash tests/ci_build/build_python_wheels.sh ${{ matrix.platform_id }} ${{ github.sha }}
- - name: Extract branch name
- run: |
- echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT"
- id: extract_branch
- if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- - name: Upload Python wheel
- if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- run: |
- python -m pip install awscli
- python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read --region us-west-2
- env:
- AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
diff --git a/.github/workflows/python_wheels_macos.yml b/.github/workflows/python_wheels_macos.yml
new file mode 100644
index 000000000000..02f21593c220
--- /dev/null
+++ b/.github/workflows/python_wheels_macos.yml
@@ -0,0 +1,55 @@
+name: Build Python wheels targeting MacOS
+
+on: [push, pull_request]
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
+defaults:
+ run:
+ shell: bash -l {0}
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+
+jobs:
+ python-wheels-macos:
+ name: Build wheel for ${{ matrix.platform_id }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: macos-13
+ platform_id: macosx_x86_64
+ - os: macos-14
+ platform_id: macosx_arm64
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - name: Set up homebrew
+ uses: Homebrew/actions/setup-homebrew@13341b4d5e459a98bbe0b122b12c11bf90518cc8
+ - name: Install libomp
+ run: brew install libomp
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ python-version: "3.10"
+ use-mamba: true
+ - name: Build wheels
+ run: bash ops/pipeline/build-python-wheels-macos.sh ${{ matrix.platform_id }} ${{ github.sha }}
+ - name: Upload Python wheel
+ if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
+ run: |
+ python -m pip install awscli
+ python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/ --acl public-read --region us-west-2
+ env:
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
diff --git a/.github/workflows/r_nold.yml b/.github/workflows/r_nold.yml
index 4b506927e06c..6ff4aa079e95 100644
--- a/.github/workflows/r_nold.yml
+++ b/.github/workflows/r_nold.yml
@@ -22,23 +22,23 @@ jobs:
container:
image: rhub/debian-gcc-devel-nold
steps:
- - name: Install git and system packages
- shell: bash
- run: |
- apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
-
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - name: Install dependencies
- shell: bash -l {0}
- run: |
- /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
-
- - name: Run R tests
- shell: bash
- run: |
- cd R-package && \
- /tmp/R-devel/bin/R CMD INSTALL . && \
- /tmp/R-devel/bin/R -q -e "library(testthat); setwd('tests'); source('testthat.R')"
+ - name: Install git and system packages
+ shell: bash
+ run: |
+ apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
+
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ with:
+ submodules: 'true'
+
+ - name: Install dependencies
+ shell: bash -l {0}
+ run: |
+ /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
+
+ - name: Run R tests
+ shell: bash
+ run: |
+ cd R-package && \
+ /tmp/R-devel/bin/R CMD INSTALL . && \
+ /tmp/R-devel/bin/R -q -e "library(testthat); setwd('tests'); source('testthat.R')"
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
index c56d1f8ef943..f5e5152fa29a 100644
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -13,98 +13,66 @@ concurrency:
cancel-in-progress: true
jobs:
- lintr:
- runs-on: ${{ matrix.config.os }}
- name: Run R linters on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }}
- strategy:
- matrix:
- config:
- - {os: ubuntu-latest, r: 'release'}
- env:
- R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
- RSPM: ${{ matrix.config.rspm }}
-
- steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - uses: r-lib/actions/setup-r@929c772977a3a13c8733b363bf5a2f685c25dd91 # v2.9.0
- with:
- r-version: ${{ matrix.config.r }}
-
- - name: Cache R packages
- uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
- with:
- path: ${{ env.R_LIBS_USER }}
- key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
- restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
-
- - name: Install dependencies
- shell: Rscript {0}
- run: |
- source("./R-package/tests/helper_scripts/install_deps.R")
-
- - name: Run lintr
- run: |
- MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
- Rscript tests/ci_build/lint_r.R $(pwd)
-
test-Rpkg:
- runs-on: ${{ matrix.config.os }}
- name: Test R on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }}
+ runs-on: ${{ matrix.os }}
+ name: Test R on OS ${{ matrix.os }}, R ${{ matrix.r }}, Compiler ${{ matrix.compiler }}, Build ${{ matrix.build }}
strategy:
fail-fast: false
matrix:
- config:
- - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
- - {os: ubuntu-latest, r: 'release', compiler: 'none', build: 'cmake'}
+ include:
+ - os: windows-latest
+ r: release
+ compiler: mingw
+ build: autotools
+ - os: ubuntu-latest
+ r: release
+ compiler: none
+ build: cmake
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
- RSPM: ${{ matrix.config.rspm }}
steps:
- - name: Install system dependencies
- run: |
- sudo apt update
- sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
- if: matrix.config.os == 'ubuntu-latest'
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - uses: r-lib/actions/setup-r@929c772977a3a13c8733b363bf5a2f685c25dd91 # v2.9.0
- with:
- r-version: ${{ matrix.config.r }}
-
- - name: Cache R packages
- uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
- with:
- path: ${{ env.R_LIBS_USER }}
- key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
- restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
-
- - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
- with:
- python-version: "3.10"
- architecture: 'x64'
-
- - uses: r-lib/actions/setup-tinytex@v2
-
- - name: Install dependencies
- shell: Rscript {0}
- run: |
- source("./R-package/tests/helper_scripts/install_deps.R")
-
- - name: Test R
- run: |
- python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool="${{ matrix.config.build }}" --task=check
- if: matrix.config.compiler != 'none'
-
- - name: Test R
- run: |
- python tests/ci_build/test_r_package.py --build-tool="${{ matrix.config.build }}" --task=check
- if: matrix.config.compiler == 'none'
+ - name: Install system dependencies
+ run: |
+ sudo apt update
+ sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
+ if: matrix.os == 'ubuntu-latest'
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+
+ - uses: r-lib/actions/setup-r@v2.11.0
+ with:
+ r-version: ${{ matrix.r }}
+
+ - name: Cache R packages
+ uses: actions/cache@v4.1.2
+ with:
+ path: ${{ env.R_LIBS_USER }}
+ key: ${{ runner.os }}-r-${{ matrix.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
+ restore-keys: ${{ runner.os }}-r-${{ matrix.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
+
+ - uses: actions/setup-python@v5.3.0
+ with:
+ python-version: "3.10"
+ architecture: 'x64'
+
+ - uses: r-lib/actions/setup-tinytex@v2
+
+ - name: Install dependencies
+ shell: Rscript {0}
+ run: |
+ source("./R-package/tests/helper_scripts/install_deps.R")
+
+ - name: Test R
+ run: |
+ python ops/script/test_r_package.py --compiler='${{ matrix.compiler }}' --build-tool="${{ matrix.build }}" --task=check
+ if: matrix.compiler != 'none'
+
+ - name: Test R
+ run: |
+ python ops/script/test_r_package.py --build-tool="${{ matrix.build }}" --task=check
+ if: matrix.compiler == 'none'
test-R-on-Debian:
name: Test R package on Debian
@@ -113,38 +81,38 @@ jobs:
image: rhub/debian-gcc-release
steps:
- - name: Install system dependencies
- run: |
- # Must run before checkout to have the latest git installed.
- # No need to add pandoc, the container has it figured out.
- apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
-
- - name: Trust git cloning project sources
- run: |
- git config --global --add safe.directory "${GITHUB_WORKSPACE}"
-
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
-
- - name: Install dependencies
- shell: bash -l {0}
- run: |
- Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
-
- - name: Test R
- shell: bash -l {0}
- run: |
- python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check
-
- - uses: dorny/paths-filter@v3
- id: changes
- with:
- filters: |
- r_package:
- - 'R-package/**'
-
- - name: Run document check
- if: steps.changes.outputs.r_package == 'true'
- run: |
- python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc
+ - name: Install system dependencies
+ run: |
+ # Must run before checkout to have the latest git installed.
+ # No need to add pandoc, the container has it figured out.
+ apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
+
+ - name: Trust git cloning project sources
+ run: |
+ git config --global --add safe.directory "${GITHUB_WORKSPACE}"
+
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+
+ - name: Install dependencies
+ shell: bash -l {0}
+ run: |
+ Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
+
+ - name: Test R
+ shell: bash -l {0}
+ run: |
+ python3 ops/script/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check
+
+ - uses: dorny/paths-filter@v3
+ id: changes
+ with:
+ filters: |
+ r_package:
+ - 'R-package/**'
+
+ - name: Run document check
+ if: steps.changes.outputs.r_package == 'true'
+ run: |
+ python3 ops/script/test_r_package.py --r=/usr/bin/R --task=doc
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 85a9abb57e1b..8ab77ec4c382 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -22,7 +22,7 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+ uses: actions/checkout@v4.2.2
with:
persist-credentials: false
diff --git a/.github/workflows/sycl_tests.yml b/.github/workflows/sycl_tests.yml
new file mode 100644
index 000000000000..7f6214016c00
--- /dev/null
+++ b/.github/workflows/sycl_tests.yml
@@ -0,0 +1,86 @@
+name: XGBoost CI (oneAPI)
+
+on: [push, pull_request]
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
+defaults:
+ run:
+ shell: bash -l {0}
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+
+jobs:
+ gtest-cpu-sycl:
+ name: Test Google C++ unittest (CPU SYCL)
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: linux_sycl_test
+ environment-file: ops/conda_env/linux_sycl_test.yml
+ use-mamba: true
+ - name: Display Conda env
+ run: |
+ conda info
+ conda list
+ - name: Build and install XGBoost
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ \
+ -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja
+ ninja
+ - name: Run gtest
+ run: |
+ cd build
+ ./testxgboost
+
+ python-sycl-tests-on-ubuntu:
+ name: Test XGBoost Python package with SYCL
+ runs-on: ubuntu-latest
+ timeout-minutes: 90
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+
+ - uses: conda-incubator/setup-miniconda@v3.1.0
+ with:
+ miniforge-variant: Miniforge3
+ miniforge-version: latest
+ activate-environment: linux_sycl_test
+ environment-file: ops/conda_env/linux_sycl_test.yml
+ use-mamba: true
+
+ - name: Display Conda env
+ run: |
+ conda info
+ conda list
+ - name: Build XGBoost on Ubuntu
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc \
+ -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -GNinja
+ ninja
+ - name: Install Python package
+ run: |
+ cd python-package
+ python --version
+ pip install -v .
+ - name: Test Python package
+ run: |
+ pytest -s -v -rxXs --durations=0 ./tests/python-sycl/
diff --git a/.github/workflows/update_rapids.yml b/.github/workflows/update_rapids.yml
index 5e229db4c050..d6be99d00851 100644
--- a/.github/workflows/update_rapids.yml
+++ b/.github/workflows/update_rapids.yml
@@ -25,20 +25,20 @@ jobs:
name: Check latest RAPIDS
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- with:
- submodules: 'true'
- - name: Check latest RAPIDS and update conftest.sh
- run: |
- bash tests/buildkite/update-rapids.sh
- - name: Create Pull Request
- uses: peter-evans/create-pull-request@v7
- if: github.ref == 'refs/heads/master'
- with:
- add-paths: |
- tests/buildkite
- branch: create-pull-request/update-rapids
- base: master
- title: "[CI] Update RAPIDS to latest stable"
- commit-message: "[CI] Update RAPIDS to latest stable"
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: 'true'
+ - name: Check latest RAPIDS and update conftest.sh
+ run: |
+ bash ops/script/update_rapids.sh
+ - name: Create Pull Request
+ uses: peter-evans/create-pull-request@v7
+ if: github.ref == 'refs/heads/master'
+ with:
+ add-paths: |
+ tests/buildkite
+ branch: create-pull-request/update-rapids
+ base: master
+ title: "[CI] Update RAPIDS to latest stable"
+ commit-message: "[CI] Update RAPIDS to latest stable"
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
new file mode 100644
index 000000000000..afd9e65192ba
--- /dev/null
+++ b/.github/workflows/windows.yml
@@ -0,0 +1,60 @@
+name: XGBoost CI (Windows)
+
+on: [push, pull_request]
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+defaults:
+ run:
+ shell: powershell
+
+env:
+ BRANCH_NAME: >-
+ ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+ ARTIFACT_STASH_PREFIX: cache/${{ github.repository }}/stash/${{ github.run_id }}
+ # TODO(hcho3): Remove
+ RUNS_ON_S3_BUCKET_CACHE: runs-on-s3bucketcache-dv5n3gmnaius
+
+jobs:
+ build-win64-gpu:
+ name: Build XGBoost for Windows with CUDA
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=windows-cpu
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - run: powershell ops/pipeline/build-win64-gpu.ps1
+ - name: Stash files
+ run: |
+ powershell ops/stash_artifacts.ps1 `
+ build/testxgboost.exe xgboost.exe `
+ (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName)
+ env:
+ COMMAND: upload
+ KEY: build-win64-gpu
+
+ test-win64-gpu:
+ name: Test XGBoost on Windows
+ needs: build-win64-gpu
+ runs-on:
+ - runs-on=${{ github.run_id }}
+ - runner=windows-gpu
+ steps:
+ - uses: actions/checkout@v4.2.2
+ with:
+ submodules: "true"
+ - name: Unstash files
+ run: |
+ powershell ops/stash_artifacts.ps1 `
+ build/testxgboost.exe xgboost.exe python-package/dist/*.whl
+ env:
+ COMMAND: download
+ KEY: build-win64-gpu
+ - run: powershell ops/pipeline/test-win64-gpu.ps1
diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py
index 0b4594e2d2c0..c5a72724f707 100644
--- a/dev/prepare_jvm_release.py
+++ b/dev/prepare_jvm_release.py
@@ -203,7 +203,7 @@ def main():
)
print(
"5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
- " python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
+ " python ops/script/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests -Dskip.native.build=true"
)
print(
diff --git a/doc/jvm/api.rst b/doc/jvm/api.rst
index b9e7821aa6fa..3d56cb2c9aa4 100644
--- a/doc/jvm/api.rst
+++ b/doc/jvm/api.rst
@@ -5,4 +5,5 @@ API Docs for the JVM packages
* `XGBoost4J Java API <../jvm_docs/javadocs/index.html>`_
* `XGBoost4J Scala API <../jvm_docs/scaladocs/xgboost4j/index.html>`_
* `XGBoost4J-Spark Scala API <../jvm_docs/scaladocs/xgboost4j-spark/index.html>`_
+* `XGBoost4J-Spark-GPU Scala API <../jvm_docs/scaladocs/xgboost4j-spark-gpu/index.html>`_
* `XGBoost4J-Flink Scala API <../jvm_docs/scaladocs/xgboost4j-flink/index.html>`_
diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py
index 6be7b451ce14..fbd9b4ce5672 100755
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@@ -32,7 +32,7 @@ def cd(path):
path = normpath(path)
cwd = os.getcwd()
os.chdir(path)
- print("cd " + path)
+ print("cd " + path, flush=True)
try:
yield path
finally:
@@ -41,7 +41,7 @@ def cd(path):
def maybe_makedirs(path):
path = normpath(path)
- print("mkdir -p " + path)
+ print("mkdir -p " + path, flush=True)
try:
os.makedirs(path)
except OSError as e:
@@ -50,14 +50,14 @@ def maybe_makedirs(path):
def run(command, **kwargs):
- print(command)
+ print(command, flush=True)
subprocess.run(command, shell=True, check=True, env=os.environ, **kwargs)
def cp(source, target):
source = normpath(source)
target = normpath(target)
- print("cp {0} {1}".format(source, target))
+ print("cp {0} {1}".format(source, target), flush=True)
shutil.copy(source, target)
@@ -78,7 +78,7 @@ def native_build(args):
subprocess.check_output("/usr/libexec/java_home").strip().decode()
)
- print("building Java wrapper")
+ print("building Java wrapper", flush=True)
with cd(".."):
build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
maybe_makedirs(build_dir)
@@ -123,7 +123,7 @@ def native_build(args):
run("cmake .. " + " ".join(args + [generator]))
break
except subprocess.CalledProcessError as e:
- print(f"Failed to build with generator: {generator}", e)
+ print(f"Failed to build with generator: {generator}", e, flush=True)
with cd(os.path.pardir):
shutil.rmtree(build_dir)
maybe_makedirs(build_dir)
@@ -132,7 +132,7 @@ def native_build(args):
run("cmake --build . --config Release" + maybe_parallel_build)
- print("copying native library")
+ print("copying native library", flush=True)
library_name, os_folder = {
"Windows": ("xgboost4j.dll", "windows"),
"Darwin": ("libxgboost4j.dylib", "macos"),
@@ -153,7 +153,7 @@ def native_build(args):
maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder)
- print("copying train/test files")
+ print("copying train/test files", flush=True)
# for xgboost4j
maybe_makedirs("xgboost4j/src/test/resources")
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index ad992464a2bc..4f2be9cee080 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -116,6 +116,22 @@
+
+ docs
+
+ ON
+ true
+ true
+ true
+
+
+ xgboost4j
+ xgboost4j-spark
+ xgboost4j-spark-gpu
+ xgboost4j-flink
+
+
+
release
diff --git a/tests/ci_build/conda_env/aarch64_test.yml b/ops/conda_env/aarch64_test.yml
similarity index 100%
rename from tests/ci_build/conda_env/aarch64_test.yml
rename to ops/conda_env/aarch64_test.yml
diff --git a/tests/ci_build/conda_env/cpp_test.yml b/ops/conda_env/cpp_test.yml
similarity index 100%
rename from tests/ci_build/conda_env/cpp_test.yml
rename to ops/conda_env/cpp_test.yml
diff --git a/tests/ci_build/conda_env/jvm_tests.yml b/ops/conda_env/jvm_tests.yml
similarity index 100%
rename from tests/ci_build/conda_env/jvm_tests.yml
rename to ops/conda_env/jvm_tests.yml
diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/ops/conda_env/linux_cpu_test.yml
similarity index 100%
rename from tests/ci_build/conda_env/linux_cpu_test.yml
rename to ops/conda_env/linux_cpu_test.yml
diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/ops/conda_env/linux_sycl_test.yml
similarity index 97%
rename from tests/ci_build/conda_env/linux_sycl_test.yml
rename to ops/conda_env/linux_sycl_test.yml
index 5b3a15f7e3b1..f1ce49492d42 100644
--- a/tests/ci_build/conda_env/linux_sycl_test.yml
+++ b/ops/conda_env/linux_sycl_test.yml
@@ -18,6 +18,7 @@ dependencies:
- pytest-timeout
- pytest-cov
- dask
+- ninja
- dpcpp_linux-64
- onedpl-devel
- intel-openmp
diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/ops/conda_env/macos_cpu_test.yml
similarity index 100%
rename from tests/ci_build/conda_env/macos_cpu_test.yml
rename to ops/conda_env/macos_cpu_test.yml
diff --git a/tests/ci_build/conda_env/python_lint.yml b/ops/conda_env/python_lint.yml
similarity index 100%
rename from tests/ci_build/conda_env/python_lint.yml
rename to ops/conda_env/python_lint.yml
diff --git a/tests/ci_build/conda_env/sdist_test.yml b/ops/conda_env/sdist_test.yml
similarity index 100%
rename from tests/ci_build/conda_env/sdist_test.yml
rename to ops/conda_env/sdist_test.yml
diff --git a/tests/ci_build/conda_env/win64_test.yml b/ops/conda_env/win64_test.yml
similarity index 100%
rename from tests/ci_build/conda_env/win64_test.yml
rename to ops/conda_env/win64_test.yml
diff --git a/ops/docker/ci_container.yml b/ops/docker/ci_container.yml
new file mode 100644
index 000000000000..f5eb7eb982df
--- /dev/null
+++ b/ops/docker/ci_container.yml
@@ -0,0 +1,65 @@
+## List of CI containers with definitions and build arguments
+
+# Each container will be built using the definition from
+# ops/docker/dockerfile/Dockerfile.CONTAINER_DEF
+
+rapids_versions:
+ stable: &rapids_version "24.10"
+ dev: &dev_rapids_version "24.12"
+
+xgb-ci.gpu_build_rockylinux8:
+ container_def: gpu_build_rockylinux8
+ build_args:
+ CUDA_VERSION_ARG: "12.4.1"
+ NCCL_VERSION_ARG: "2.23.4-1"
+ RAPIDS_VERSION_ARG: *rapids_version
+
+xgb-ci.gpu_build_r_rockylinux8:
+ container_def: gpu_build_r_rockylinux8
+ build_args:
+ CUDA_VERSION_ARG: "12.4.1"
+ R_VERSION_ARG: "4.3.2"
+
+xgb-ci.gpu:
+ container_def: gpu
+ build_args:
+ CUDA_VERSION_ARG: "12.4.1"
+ NCCL_VERSION_ARG: "2.23.4-1"
+ RAPIDS_VERSION_ARG: *rapids_version
+
+xgb-ci.gpu_dev_ver:
+ container_def: gpu
+ build_args:
+ CUDA_VERSION_ARG: "12.4.1"
+ NCCL_VERSION_ARG: "2.23.4-1"
+ RAPIDS_VERSION_ARG: *dev_rapids_version
+ RAPIDSAI_CONDA_CHANNEL_ARG: "rapidsai-nightly"
+
+xgb-ci.clang_tidy:
+ container_def: clang_tidy
+ build_args:
+ CUDA_VERSION_ARG: "12.4.1"
+
+xgb-ci.cpu:
+ container_def: cpu
+
+xgb-ci.aarch64:
+ container_def: aarch64
+
+xgb-ci.manylinux_2_28_x86_64:
+ container_def: manylinux_2_28_x86_64
+
+xgb-ci.manylinux2014_x86_64:
+ container_def: manylinux2014_x86_64
+
+xgb-ci.manylinux2014_aarch64:
+ container_def: manylinux2014_aarch64
+
+xgb-ci.jvm:
+ container_def: jvm
+
+xgb-ci.jvm_gpu_build:
+ container_def: jvm_gpu_build
+ build_args:
+ CUDA_VERSION_ARG: "12.4.1"
+ NCCL_VERSION_ARG: "2.23.4-1"
diff --git a/ops/docker/docker_cache_ecr.yml b/ops/docker/docker_cache_ecr.yml
new file mode 100644
index 000000000000..e20f35fc8020
--- /dev/null
+++ b/ops/docker/docker_cache_ecr.yml
@@ -0,0 +1,4 @@
+## Constants for AWS ECR (Elastic Container Registry), used for the Docker cache
+
+DOCKER_CACHE_ECR_ID: "492475357299"
+DOCKER_CACHE_ECR_REGION: "us-west-2"
diff --git a/tests/ci_build/Dockerfile.aarch64 b/ops/docker/dockerfile/Dockerfile.aarch64
similarity index 97%
rename from tests/ci_build/Dockerfile.aarch64
rename to ops/docker/dockerfile/Dockerfile.aarch64
index 8d6cfaca39fa..9dff2a05230b 100644
--- a/tests/ci_build/Dockerfile.aarch64
+++ b/ops/docker/dockerfile/Dockerfile.aarch64
@@ -32,7 +32,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.clang_tidy b/ops/docker/dockerfile/Dockerfile.clang_tidy
similarity index 96%
rename from tests/ci_build/Dockerfile.clang_tidy
rename to ops/docker/dockerfile/Dockerfile.clang_tidy
index 2e7751a20185..de7d9bd3f254 100644
--- a/tests/ci_build/Dockerfile.clang_tidy
+++ b/ops/docker/dockerfile/Dockerfile.clang_tidy
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION_ARG
+ARG CUDA_VERSION_ARG=notset
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu22.04
ARG CUDA_VERSION_ARG
@@ -44,7 +44,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.cpu b/ops/docker/dockerfile/Dockerfile.cpu
similarity index 92%
rename from tests/ci_build/Dockerfile.cpu
rename to ops/docker/dockerfile/Dockerfile.cpu
index 22db93572207..a426ce5da30c 100644
--- a/tests/ci_build/Dockerfile.cpu
+++ b/ops/docker/dockerfile/Dockerfile.cpu
@@ -41,8 +41,7 @@ RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \
COPY conda_env/linux_cpu_test.yml /scripts/
RUN mamba create -n linux_cpu_test && \
mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \
- mamba clean --all --yes && \
- conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector
+ mamba clean --all --yes
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
@@ -52,7 +51,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.gpu b/ops/docker/dockerfile/Dockerfile.gpu
similarity index 76%
rename from tests/ci_build/Dockerfile.gpu
rename to ops/docker/dockerfile/Dockerfile.gpu
index 501726e9ffba..96a532fc2ff1 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/ops/docker/dockerfile/Dockerfile.gpu
@@ -1,8 +1,10 @@
-ARG CUDA_VERSION_ARG
+ARG CUDA_VERSION_ARG=notset
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
+ # Should be first 4 digits (e.g. 24.06)
ARG NCCL_VERSION_ARG
+ARG RAPIDSAI_CONDA_CHANNEL_ARG="rapidsai"
# Environment
ENV DEBIAN_FRONTEND=noninteractive
@@ -24,16 +26,16 @@ ENV PATH=/opt/miniforge/bin:$PATH
RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \
- mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \
- python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_SHORT_VER \
+ mamba create -y -n gpu_test -c ${RAPIDSAI_CONDA_CHANNEL_ARG} -c conda-forge -c nvidia \
+ python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \
"nccl>=${NCCL_SHORT_VER}" \
- dask \
- dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
+ "dask<=2024.10.0" \
+ "distributed<=2024.10.0" \
+ "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \
python-kubernetes urllib3 graphviz hypothesis loky \
"pyspark>=3.4.0" cloudpickle cuda-python && \
- mamba clean --all --yes && \
- conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
+ mamba clean --all --yes
ENV GOSU_VERSION=1.10
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
@@ -46,7 +48,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.gpu_build_r_rockylinux8 b/ops/docker/dockerfile/Dockerfile.gpu_build_r_rockylinux8
similarity index 97%
rename from tests/ci_build/Dockerfile.gpu_build_r_rockylinux8
rename to ops/docker/dockerfile/Dockerfile.gpu_build_r_rockylinux8
index 159e5d776c16..2d18b1eeb315 100644
--- a/tests/ci_build/Dockerfile.gpu_build_r_rockylinux8
+++ b/ops/docker/dockerfile/Dockerfile.gpu_build_r_rockylinux8
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION_ARG
+ARG CUDA_VERSION_ARG=notset
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-rockylinux8
ARG CUDA_VERSION_ARG
ARG R_VERSION_ARG
@@ -52,7 +52,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.gpu_build_rockylinux8 b/ops/docker/dockerfile/Dockerfile.gpu_build_rockylinux8
similarity index 98%
rename from tests/ci_build/Dockerfile.gpu_build_rockylinux8
rename to ops/docker/dockerfile/Dockerfile.gpu_build_rockylinux8
index 8869fb468e12..ae79e88b15b3 100644
--- a/tests/ci_build/Dockerfile.gpu_build_rockylinux8
+++ b/ops/docker/dockerfile/Dockerfile.gpu_build_rockylinux8
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION_ARG
+ARG CUDA_VERSION_ARG=notset
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-rockylinux8
ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
@@ -76,7 +76,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.i386 b/ops/docker/dockerfile/Dockerfile.i386
similarity index 100%
rename from tests/ci_build/Dockerfile.i386
rename to ops/docker/dockerfile/Dockerfile.i386
diff --git a/tests/ci_build/Dockerfile.jvm b/ops/docker/dockerfile/Dockerfile.jvm
similarity index 97%
rename from tests/ci_build/Dockerfile.jvm
rename to ops/docker/dockerfile/Dockerfile.jvm
index c4584747f5db..9fd62e52de93 100644
--- a/tests/ci_build/Dockerfile.jvm
+++ b/ops/docker/dockerfile/Dockerfile.jvm
@@ -37,7 +37,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/ops/docker/dockerfile/Dockerfile.jvm_gpu_build
similarity index 97%
rename from tests/ci_build/Dockerfile.jvm_gpu_build
rename to ops/docker/dockerfile/Dockerfile.jvm_gpu_build
index edb5918b8bbc..4983493a6878 100644
--- a/tests/ci_build/Dockerfile.jvm_gpu_build
+++ b/ops/docker/dockerfile/Dockerfile.jvm_gpu_build
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION_ARG
+ARG CUDA_VERSION_ARG=notset
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-rockylinux8
ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
@@ -48,7 +48,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.manylinux2014_aarch64 b/ops/docker/dockerfile/Dockerfile.manylinux2014_aarch64
similarity index 82%
rename from tests/ci_build/Dockerfile.manylinux2014_aarch64
rename to ops/docker/dockerfile/Dockerfile.manylinux2014_aarch64
index 9627e15c64a0..7800033f552d 100644
--- a/tests/ci_build/Dockerfile.manylinux2014_aarch64
+++ b/ops/docker/dockerfile/Dockerfile.manylinux2014_aarch64
@@ -1,5 +1,7 @@
FROM quay.io/pypa/manylinux2014_aarch64
+RUN yum update -y && yum install -y java-1.8.0-openjdk-devel
+
# Install lightweight sudo (not bound to TTY)
ENV GOSU_VERSION=1.10
RUN set -ex; \
@@ -9,7 +11,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.manylinux2014_x86_64 b/ops/docker/dockerfile/Dockerfile.manylinux2014_x86_64
similarity index 82%
rename from tests/ci_build/Dockerfile.manylinux2014_x86_64
rename to ops/docker/dockerfile/Dockerfile.manylinux2014_x86_64
index 11beb116ee43..8214b598d8d4 100644
--- a/tests/ci_build/Dockerfile.manylinux2014_x86_64
+++ b/ops/docker/dockerfile/Dockerfile.manylinux2014_x86_64
@@ -1,5 +1,7 @@
FROM quay.io/pypa/manylinux2014_x86_64
+RUN yum update -y && yum install -y java-1.8.0-openjdk-devel
+
# Install lightweight sudo (not bound to TTY)
ENV GOSU_VERSION=1.10
RUN set -ex; \
@@ -9,7 +11,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.manylinux_2_28_x86_64 b/ops/docker/dockerfile/Dockerfile.manylinux_2_28_x86_64
similarity index 92%
rename from tests/ci_build/Dockerfile.manylinux_2_28_x86_64
rename to ops/docker/dockerfile/Dockerfile.manylinux_2_28_x86_64
index 5e264e2f16e6..f5dac54b9b8f 100644
--- a/tests/ci_build/Dockerfile.manylinux_2_28_x86_64
+++ b/ops/docker/dockerfile/Dockerfile.manylinux_2_28_x86_64
@@ -9,7 +9,7 @@ RUN set -ex; \
# Default entry-point to use if running locally
# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
+COPY docker/entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/entrypoint.sh b/ops/docker/entrypoint.sh
similarity index 70%
rename from tests/ci_build/entrypoint.sh
rename to ops/docker/entrypoint.sh
index a0c5f56bb52d..40135c197c73 100755
--- a/tests/ci_build/entrypoint.sh
+++ b/ops/docker/entrypoint.sh
@@ -1,12 +1,10 @@
#!/usr/bin/env bash
-# This script is a wrapper creating the same user inside container as the one
-# running the ci_build.sh outside the container. It also set the home directory
-# for the user inside container to match the same absolute path as the workspace
-# outside of container. Do not run this manually. It does not make sense. It is
-# intended to be called by ci_build.sh only.
+# This wrapper script propagates the user information from the host
+# to the container. This way, any files generated by processes running
+# in the container will be accessible in the host.
-set -e
+set -euo pipefail
COMMAND=("$@")
@@ -19,7 +17,11 @@ else
rm /this_is_writable_file_system
fi
-if [[ -n $CI_BUILD_UID ]] && [[ -n $CI_BUILD_GID ]]; then
+## Assumption: the host passes correct user information via environment variables
+## CI_BUILD_UID, CI_BUILD_GID, CI_BUILD_USER, CI_BUILD_GROUP
+
+if [[ -n ${CI_BUILD_UID:-} ]] && [[ -n ${CI_BUILD_GID:-} ]]
+then
groupadd -o -g "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" || true
useradd -o -m -g "${CI_BUILD_GID}" -u "${CI_BUILD_UID}" \
"${CI_BUILD_USER}" || true
diff --git a/ops/docker/extract_build_args.jq b/ops/docker/extract_build_args.jq
new file mode 100644
index 000000000000..682b62cb63cb
--- /dev/null
+++ b/ops/docker/extract_build_args.jq
@@ -0,0 +1,8 @@
+def compute_build_args($input; $container_id):
+ $input |
+ .[$container_id] |
+ select(.build_args != null) |
+ .build_args |
+ to_entries |
+ map("--build-arg " + .key + "=" + .value) |
+ join(" ");
diff --git a/ops/docker/extract_build_args.sh b/ops/docker/extract_build_args.sh
new file mode 100755
index 000000000000..0fa7b132b760
--- /dev/null
+++ b/ops/docker/extract_build_args.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+## Extract container definition and build args from ops/docker/ci_container.yml,
+## given the container ID.
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 [container_id]"
+ exit 1
+fi
+
+CONTAINER_ID="$1"
+CONTAINER_DEF=$(
+ yq -o json ops/docker/ci_container.yml |
+ jq -r --arg container_id "${CONTAINER_ID}" '.[$container_id].container_def'
+)
+BUILD_ARGS=$(
+ yq -o json ops/docker/ci_container.yml |
+ jq -r --arg container_id "${CONTAINER_ID}" \
+ 'include "ops/docker/extract_build_args";
+ compute_build_args(.; $container_id)'
+)
+echo "CONTAINER_DEF='${CONTAINER_DEF}' BUILD_ARGS='${BUILD_ARGS}'"
diff --git a/ops/docker_build.py b/ops/docker_build.py
new file mode 100644
index 000000000000..1fed975ce223
--- /dev/null
+++ b/ops/docker_build.py
@@ -0,0 +1,137 @@
+"""
+Wrapper script to build a Docker container with layer caching
+"""
+
+import argparse
+import itertools
+import pathlib
+import subprocess
+import sys
+from typing import Optional
+
+from docker_run import OPS_DIR, fancy_print_cli_args
+
+
+def parse_build_args(raw_build_args: list[str]) -> dict[str, str]:
+ parsed_build_args = dict()
+ for arg in raw_build_args:
+ try:
+ key, value = arg.split("=", maxsplit=1)
+ except ValueError as e:
+ raise ValueError(
+ f"Build argument must be of form KEY=VALUE. Got: {arg}"
+ ) from e
+ parsed_build_args[key] = value
+ return parsed_build_args
+
+
+def docker_build(
+ container_id: str,
+ *,
+ build_args: dict[str, str],
+ dockerfile_path: pathlib.Path,
+ docker_context_path: pathlib.Path,
+ cache_from: Optional[str],
+ cache_to: Optional[str],
+) -> None:
+ ## Set up command-line arguments to be passed to `docker build`
+ # Build args
+ docker_build_cli_args = list(
+ itertools.chain.from_iterable(
+ [["--build-arg", f"{k}={v}"] for k, v in build_args.items()]
+ )
+ )
+ # When building an image using a non-default driver, we need to specify
+ # `--load` to load it to the image store.
+ # See https://docs.docker.com/build/builders/drivers/
+ docker_build_cli_args.append("--load")
+ # Layer caching
+ if cache_from:
+ docker_build_cli_args.extend(["--cache-from", cache_from])
+ if cache_to:
+ docker_build_cli_args.extend(["--cache-to", cache_to])
+ # Remaining CLI args
+ docker_build_cli_args.extend(
+ [
+ "--progress=plain",
+ "--ulimit",
+ "nofile=1024000:1024000",
+ "-t",
+ container_id,
+ "-f",
+ str(dockerfile_path),
+ str(docker_context_path),
+ ]
+ )
+ cli_args = ["docker", "build"] + docker_build_cli_args
+ fancy_print_cli_args(cli_args)
+ subprocess.run(cli_args, check=True, encoding="utf-8")
+
+
+def main(args: argparse.Namespace) -> None:
+ # Dockerfile to be used in docker build
+ dockerfile_path = (
+ OPS_DIR / "docker" / "dockerfile" / f"Dockerfile.{args.container_def}"
+ )
+ docker_context_path = OPS_DIR
+
+ build_args = parse_build_args(args.build_arg)
+
+ docker_build(
+ args.container_id,
+ build_args=build_args,
+ dockerfile_path=dockerfile_path,
+ docker_context_path=docker_context_path,
+ cache_from=args.cache_from,
+ cache_to=args.cache_to,
+ )
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Build a Docker container")
+ parser.add_argument(
+ "--container-def",
+ type=str,
+ required=True,
+ help=(
+ "String uniquely identifying the container definition. The container "
+ "definition will be fetched from "
+ "docker/dockerfile/Dockerfile.CONTAINER_DEF."
+ ),
+ )
+ parser.add_argument(
+ "--container-id",
+ type=str,
+ required=True,
+ help="String ID to assign to the newly built container",
+ )
+ parser.add_argument(
+ "--build-arg",
+ type=str,
+ default=[],
+ action="append",
+ help=(
+ "Build-time variable(s) to be passed to `docker build`. Each variable "
+ "should be specified as a key-value pair in the form KEY=VALUE. "
+ "The variables should match the ARG instructions in the Dockerfile. "
+ "When passing multiple variables, specify --build-arg multiple times. "
+ "Example: --build-arg CUDA_VERSION_ARG=12.5 --build-arg RAPIDS_VERSION_ARG=24.10'"
+ ),
+ )
+ parser.add_argument(
+ "--cache-from",
+ type=str,
+ help="Use an external cache source for the Docker build",
+ )
+ parser.add_argument(
+ "--cache-to",
+ type=str,
+ help="Export layers from the container to an external cache destination",
+ )
+
+ if len(sys.argv) == 1:
+ parser.print_help()
+ sys.exit(1)
+
+ parsed_args = parser.parse_args()
+ main(parsed_args)
diff --git a/ops/docker_build.sh b/ops/docker_build.sh
new file mode 100755
index 000000000000..0539f817ba8e
--- /dev/null
+++ b/ops/docker_build.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+## Build a CI container and cache the layers in AWS ECR (Elastic Container Registry).
+## This script provides a convenient wrapper for ops/docker_build.py.
+## Build-time variables (--build-arg) and container defintion are fetched from
+## ops/docker/ci_container.yml.
+##
+## Note. This script takes in all inputs via environment variables.
+
+INPUT_DOC=$(
+cat <<-EOF
+Inputs
+ - CONTAINER_ID: String ID uniquely identifying the container (Required)
+ - BRANCH_NAME: Name of the current git branch or pull request (Required)
+ - USE_DOCKER_CACHE: If set to 1, enable caching
+EOF
+)
+
+ECR_LIFECYCLE_RULE=$(
+cat <<-EOF
+{
+ "rules": [
+ {
+ "rulePriority": 1,
+ "selection": {
+ "tagStatus": "any",
+ "countType": "sinceImagePushed",
+ "countUnit": "days",
+ "countNumber": 30
+ },
+ "action": {
+ "type": "expire"
+ }
+ }
+ ]
+}
+EOF
+)
+
+set -euo pipefail
+
+for arg in "CONTAINER_ID" "BRANCH_NAME"
+do
+ if [[ -z "${!arg:-}" ]]
+ then
+ echo -e "Error: $arg must be set.\n${INPUT_DOC}"
+ exit 1
+ fi
+done
+
+# Fetch CONTAINER_DEF and BUILD_ARGS
+source <(ops/docker/extract_build_args.sh ${CONTAINER_ID} | tee /dev/stderr) 2>&1
+
+if [[ "${USE_DOCKER_CACHE:-}" != "1" ]] # Any value other than 1 is considered false
+then
+ USE_DOCKER_CACHE=0
+fi
+
+if [[ ${USE_DOCKER_CACHE} -eq 0 ]]
+then
+ echo "USE_DOCKER_CACHE not set; caching disabled"
+else
+ DOCKER_CACHE_ECR_ID=$(yq ".DOCKER_CACHE_ECR_ID" ops/docker/docker_cache_ecr.yml)
+ DOCKER_CACHE_ECR_REGION=$(yq ".DOCKER_CACHE_ECR_REGION" ops/docker/docker_cache_ecr.yml)
+ DOCKER_CACHE_REPO="${DOCKER_CACHE_ECR_ID}.dkr.ecr.${DOCKER_CACHE_ECR_REGION}.amazonaws.com"
+ echo "Using AWS ECR; repo URL = ${DOCKER_CACHE_REPO}"
+ # Login for Docker registry
+ echo "aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} |" \
+ "docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}"
+ aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} \
+ | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}
+fi
+
+# Pull pre-built container from the cache
+# First try locating one for the particular branch or pull request
+CACHE_FROM_CMD=""
+IS_CACHED=0
+if [[ ${USE_DOCKER_CACHE} -eq 1 ]]
+then
+ DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allowed in Docker tag
+ DOCKER_URL="${DOCKER_CACHE_REPO}/${CONTAINER_ID}:${DOCKER_TAG}"
+ echo "docker pull --quiet ${DOCKER_URL}"
+ if time docker pull --quiet "${DOCKER_URL}"
+ then
+ echo "Found a cached container for the branch ${BRANCH_NAME}: ${DOCKER_URL}"
+ IS_CACHED=1
+ else
+ # If there's no pre-built container from the cache,
+ # use the pre-built container from the master branch.
+ DOCKER_URL="${DOCKER_CACHE_REPO}/${CONTAINER_ID}:master"
+ echo "Could not find a cached container for the branch ${BRANCH_NAME}." \
+ "Using a cached container from the master branch: ${DOCKER_URL}"
+ echo "docker pull --quiet ${DOCKER_URL}"
+ if time docker pull --quiet "${DOCKER_URL}"
+ then
+ IS_CACHED=1
+ else
+ echo "Could not find a cached container for the master branch either."
+ IS_CACHED=0
+ fi
+ fi
+ if [[ $IS_CACHED -eq 1 ]]
+ then
+ CACHE_FROM_CMD="--cache-from type=registry,ref=${DOCKER_URL}"
+ fi
+fi
+
+# Run Docker build
+set -x
+python3 ops/docker_build.py \
+ --container-def ${CONTAINER_DEF} \
+ --container-id ${CONTAINER_ID} \
+ ${BUILD_ARGS} \
+ --cache-to type=inline \
+ ${CACHE_FROM_CMD}
+set +x
+
+# Now cache the new container
+if [[ ${USE_DOCKER_CACHE} -eq 1 ]]
+then
+ DOCKER_URL="${DOCKER_CACHE_REPO}/${CONTAINER_ID}:${DOCKER_TAG}"
+ echo "docker tag ${CONTAINER_ID} ${DOCKER_URL}"
+ docker tag "${CONTAINER_ID}" "${DOCKER_URL}"
+
+ # Attempt to create Docker repository; it will fail if the repository already exists
+ echo "aws ecr create-repository --repository-name ${CONTAINER_ID} --region ${DOCKER_CACHE_ECR_REGION}"
+ if aws ecr create-repository --repository-name ${CONTAINER_ID} --region ${DOCKER_CACHE_ECR_REGION}
+ then
+ # Repository was created. Now set expiration policy
+ echo "aws ecr put-lifecycle-policy --repository-name ${CONTAINER_ID}" \
+ "--region ${DOCKER_CACHE_ECR_REGION} --lifecycle-policy-text file:///dev/stdin"
+ echo "${ECR_LIFECYCLE_RULE}" | aws ecr put-lifecycle-policy --repository-name ${CONTAINER_ID} \
+ --region ${DOCKER_CACHE_ECR_REGION} --lifecycle-policy-text file:///dev/stdin
+ fi
+
+ echo "docker push --quiet ${DOCKER_URL}"
+ if ! time docker push --quiet "${DOCKER_URL}"
+ then
+ echo "ERROR: could not update Docker cache ${DOCKER_URL}"
+ exit 1
+ fi
+fi
diff --git a/ops/docker_run.py b/ops/docker_run.py
new file mode 100644
index 000000000000..7e61c5a14f39
--- /dev/null
+++ b/ops/docker_run.py
@@ -0,0 +1,168 @@
+"""
+Wrapper script to run a command inside a Docker container
+"""
+
+import argparse
+import grp
+import itertools
+import os
+import pathlib
+import pwd
+import subprocess
+import sys
+import textwrap
+
+OPS_DIR = pathlib.Path(__file__).expanduser().resolve().parent
+PROJECT_ROOT_DIR = OPS_DIR.parent
+LINEWIDTH = 88
+TEXT_WRAPPER = textwrap.TextWrapper(
+ width=LINEWIDTH,
+ initial_indent="",
+ subsequent_indent=" ",
+ break_long_words=False,
+ break_on_hyphens=False,
+)
+
+
+def parse_run_args(raw_run_args: str) -> list[str]:
+ return [x for x in raw_run_args.split() if x]
+
+
+def get_user_ids() -> dict[str, str]:
+ uid = os.getuid()
+ gid = os.getgid()
+ return {
+ "CI_BUILD_UID": str(uid),
+ "CI_BUILD_USER": pwd.getpwuid(uid).pw_name,
+ "CI_BUILD_GID": str(gid),
+ "CI_BUILD_GROUP": grp.getgrgid(gid).gr_name,
+ }
+
+
+def fancy_print_cli_args(cli_args: list[str]) -> None:
+ print(
+ "=" * LINEWIDTH
+ + "\n"
+ + " \\\n".join(TEXT_WRAPPER.wrap(" ".join(cli_args)))
+ + "\n"
+ + "=" * LINEWIDTH
+ + "\n",
+ flush=True,
+ )
+
+
+def docker_run(
+ container_id: str,
+ command_args: list[str],
+ *,
+ use_gpus: bool,
+ workdir: pathlib.Path,
+ user_ids: dict[str, str],
+ extra_args: list[str],
+) -> None:
+ # Command-line arguments to be passed to `docker run`
+ docker_run_cli_args = ["--rm", "--pid=host"]
+
+ if use_gpus:
+ docker_run_cli_args.extend(["--gpus", "all"])
+
+ docker_run_cli_args.extend(["-v", f"{workdir}:/workspace", "-w", "/workspace"])
+ docker_run_cli_args.extend(
+ itertools.chain.from_iterable([["-e", f"{k}={v}"] for k, v in user_ids.items()])
+ )
+ docker_run_cli_args.extend(extra_args)
+ docker_run_cli_args.append(container_id)
+ docker_run_cli_args.extend(command_args)
+
+ cli_args = ["docker", "run"] + docker_run_cli_args
+ fancy_print_cli_args(cli_args)
+ subprocess.run(cli_args, check=True, encoding="utf-8")
+
+
+def main(args: argparse.Namespace) -> None:
+ run_args = parse_run_args(args.run_args)
+ user_ids = get_user_ids()
+
+ if args.use_gpus:
+ print("Using NVIDIA GPUs for `docker run`")
+ if args.interactive:
+ print("Using interactive mode for `docker run`")
+ run_args.append("-it")
+
+ docker_run(
+ args.container_id,
+ args.command_args,
+ use_gpus=args.use_gpus,
+ workdir=args.workdir,
+ user_ids=user_ids,
+ extra_args=run_args,
+ )
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ usage=(
+ f"{sys.argv[0]} --container-id CONTAINER_ID [--use-gpus] [--interactive] "
+ "[--workdir WORKDIR] [--run-args RUN_ARGS] -- COMMAND_ARG "
+ "[COMMAND_ARG ...]"
+ ),
+ description="Run tasks inside a Docker container",
+ )
+ parser.add_argument(
+ "--container-id",
+ type=str,
+ required=True,
+ help="String ID of the container to run.",
+ )
+ parser.add_argument(
+ "--use-gpus",
+ action="store_true",
+ help=(
+ "Grant the container access to NVIDIA GPUs; requires the NVIDIA "
+ "Container Toolkit."
+ ),
+ )
+ parser.add_argument(
+ "--interactive",
+ action="store_true",
+ help=(
+ "Run the container in the interactive mode; requires an interactive shell "
+ "(TTY). With this flag, you can use Ctrl-C to interrupt an long-running "
+ "command."
+ ),
+ )
+ parser.add_argument(
+ "--workdir",
+ type=lambda p: pathlib.Path(p).expanduser().resolve(),
+ default=PROJECT_ROOT_DIR,
+ help="Path to working directory; if unset, use the project's root",
+ )
+ parser.add_argument(
+ "--run-args",
+ type=str,
+ default="",
+ help=(
+ "Argument(s) to be passed to `docker run`. When passing multiple "
+ "arguments, use single quotes to wrap them. Example: "
+ "--run-args '--cap-add SYS_PTRACE --shm-size=4g'"
+ ),
+ )
+ parser.add_argument(
+ "command_args",
+ metavar="COMMAND_ARG",
+ type=str,
+ nargs="+",
+ help=(
+ "Argument(s) for the command to execute. NOTE. Make sure to specify "
+ "double-dash (--) to clearly distinguish between the command and the "
+ "preceding parameters. Example: --run-args '--cap-add SYS_PTRACE "
+ "--shm-size=4g' -- ./myprog"
+ ),
+ )
+
+ if len(sys.argv) == 1:
+ parser.print_help()
+ sys.exit(1)
+
+ parsed_args = parser.parse_args()
+ main(parsed_args)
diff --git a/ops/packer/linux/bootstrap.sh b/ops/packer/linux/bootstrap.sh
new file mode 100644
index 000000000000..57be6e14b507
--- /dev/null
+++ b/ops/packer/linux/bootstrap.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -euo pipefail
+
+## Install Docker
+# Add Docker's official GPG key:
+sudo install -m 0755 -d /etc/apt/keyrings
+sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+# Add the repository to Apt sources:
+echo \
+ "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+ $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+ sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+sudo apt-get update
+sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
+# Allow users to use Docker without sudo
+sudo usermod -aG docker ubuntu
+
+# Start Docker daemon
+sudo systemctl is-active --quiet docker.service || sudo systemctl start docker.service
+sudo systemctl is-enabled --quiet docker.service || sudo systemctl enable docker.service
+sleep 10 # Docker daemon takes time to come up after installing
+sudo docker info
+
+## Install NVIDIA Container Toolkit
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+ && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+ sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+ sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+sudo apt-get update
+sudo apt-get install -y nvidia-container-toolkit
+sudo nvidia-ctk runtime configure --runtime=docker
+sudo systemctl restart docker
+
+sleep 10
+sudo docker run --rm --gpus all ubuntu nvidia-smi
+sudo systemctl stop docker
+
+## Install AWS CLI v2
+wget -nv https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -O awscliv2.zip
+unzip -q awscliv2.zip
+sudo ./aws/install
+rm -rf ./aws/ ./awscliv2.zip
+
+## Install jq and yq
+sudo apt update && sudo apt install jq
+mkdir yq/
+pushd yq/
+wget -nv https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64.tar.gz -O - | \
+ tar xz && sudo mv ./yq_linux_amd64 /usr/bin/yq
+popd
+rm -rf yq/
diff --git a/ops/packer/linux/install_drivers.sh b/ops/packer/linux/install_drivers.sh
new file mode 100644
index 000000000000..07309be836a8
--- /dev/null
+++ b/ops/packer/linux/install_drivers.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -euo pipefail
+
+## Install basic tools
+echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
+sudo apt-get update
+sudo apt-get install -y cmake git build-essential wget ca-certificates curl unzip
+
+## Install CUDA Toolkit 12.6 (Driver will be installed later)
+wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+sudo apt-get -y install cuda-toolkit-12-6 cuda-drivers-565
+rm cuda-keyring_1.1-1_all.deb
diff --git a/ops/packer/linux/linux.pkr.hcl b/ops/packer/linux/linux.pkr.hcl
new file mode 100644
index 000000000000..c6990894764a
--- /dev/null
+++ b/ops/packer/linux/linux.pkr.hcl
@@ -0,0 +1,79 @@
+packer {
+ required_plugins {
+ amazon = {
+ source = "github.com/hashicorp/amazon"
+ version = "~> 1"
+ }
+ }
+}
+
+locals {
+ ami_name_prefix = "xgboost-ci"
+ image_name = "RunsOn worker with Ubuntu 24.04 + CUDA driver"
+ region = "us-west-2"
+ timestamp = regex_replace(timestamp(), "[- TZ:]", "")
+ volume_size = 40
+}
+
+data "amazon-ami" "aws-ubuntu-x64" {
+ filters = {
+ name = "ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-*"
+ root-device-type = "ebs"
+ virtualization-type = "hvm"
+ }
+ most_recent = true
+ owners = ["amazon"]
+}
+
+source "amazon-ebs" "runs-on-linux" {
+ source_ami = "${data.amazon-ami.aws-ubuntu-x64.id}"
+ ami_name = "${local.ami_name_prefix}-runs-on-linux-${local.timestamp}"
+ ami_description = "${local.image_name}"
+ ami_regions = ["${local.region}"]
+ ami_virtualization_type = "hvm"
+ associate_public_ip_address = true
+ communicator = "ssh"
+ instance_type = "g4dn.xlarge"
+ region = "${local.region}"
+ ssh_timeout = "10m"
+ ssh_username = "ubuntu"
+ ssh_file_transfer_method = "sftp"
+ user_data_file = "setup_ssh.sh"
+ launch_block_device_mappings {
+ device_name = "/dev/sda1"
+ volume_size = "${local.volume_size}"
+ volume_type = "gp3"
+ delete_on_termination = true
+ }
+ aws_polling { # Wait up to 1 hour until the AMI is ready
+ delay_seconds = 15
+ max_attempts = 240
+ }
+ snapshot_tags = {
+ Name = "${local.image_name}"
+ BuildTime = "${local.timestamp}"
+ }
+ tags = {
+ Name = "${local.image_name}"
+ BuildTime = "${local.timestamp}"
+ }
+}
+
+build {
+ sources = ["source.amazon-ebs.runs-on-linux"]
+
+ provisioner "shell" {
+ script = "install_drivers.sh"
+ pause_after = "30s"
+ }
+
+ provisioner "shell" {
+ expect_disconnect = true
+ inline = ["echo 'Reboot VM'", "sudo reboot"]
+ }
+
+ provisioner "shell" {
+ pause_before = "1m0s"
+ script = "bootstrap.sh"
+ }
+}
diff --git a/ops/packer/linux/setup_ssh.sh b/ops/packer/linux/setup_ssh.sh
new file mode 100644
index 000000000000..501b4da455f5
--- /dev/null
+++ b/ops/packer/linux/setup_ssh.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+systemctl start ssh
diff --git a/ops/packer/windows/bootstrap.ps1 b/ops/packer/windows/bootstrap.ps1
new file mode 100644
index 000000000000..c67f3b73fb9a
--- /dev/null
+++ b/ops/packer/windows/bootstrap.ps1
@@ -0,0 +1,73 @@
+## Install packages from Chocolatey
+
+# jq & yq
+Write-Output "Installing jq and yq..."
+choco install jq --version=1.7.1
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+choco install yq --version=4.40.2
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# AWS CLI
+Write-Output "Installing AWS CLI..."
+choco install awscli --version=2.18.11
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# Git
+Write-Host '>>> Installing Git...'
+choco install git --version=2.47.0
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# CMake
+Write-Host '>>> Installing CMake 3.30.5...'
+choco install cmake --version 3.30.5 --installargs "ADD_CMAKE_TO_PATH=System"
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# Notepad++
+Write-Host '>>> Installing Notepad++...'
+choco install notepadplusplus
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# Miniforge3
+Write-Host '>>> Installing Miniforge3...'
+choco install miniforge3 --params="'/InstallationType:AllUsers /RegisterPython:1 /D:C:\tools\miniforge3'"
+C:\tools\miniforge3\Scripts\conda.exe init --user --system
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+conda config --set auto_activate_base false
+
+# Java 11
+Write-Host '>>> Installing Java 11...'
+choco install openjdk11
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# Maven
+Write-Host '>>> Installing Maven...'
+choco install maven
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# GraphViz
+Write-Host '>>> Installing GraphViz...'
+choco install graphviz
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# Visual Studio 2022 Community
+Write-Host '>>> Installing Visual Studio 2022 Community...'
+choco install visualstudio2022community `
+ --params "--wait --passive --norestart"
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+choco install visualstudio2022-workload-nativedesktop --params `
+ "--wait --passive --norestart --includeOptional"
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# CUDA 12.5
+Write-Host '>>> Installing CUDA 12.5...'
+choco install cuda --version=12.5.1.555
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+# R 4.3
+Write-Host '>>> Installing R...'
+choco install r.project --version=4.3.2
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+choco install rtools --version=4.3.5550
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
diff --git a/ops/packer/windows/install_choco.ps1 b/ops/packer/windows/install_choco.ps1
new file mode 100644
index 000000000000..131e8129feaa
--- /dev/null
+++ b/ops/packer/windows/install_choco.ps1
@@ -0,0 +1,14 @@
+## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/InstallChoco.ps1
+## Author: Christopher Horrell (https://github.com/chorrell)
+
+$ErrorActionPreference = "Stop"
+
+# Install Chocolatey
+# See https://chocolatey.org/install#individual
+Set-ExecutionPolicy Bypass -Scope Process -Force
+[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
+Invoke-Expression ((New-Object System.Net.WebClient).DownloadString("https://community.chocolatey.org/install.ps1"))
+
+# Globally Auto confirm every action
+# See: https://docs.chocolatey.org/en-us/faqs#why-do-i-have-to-confirm-packages-now-is-there-a-way-to-remove-this
+choco feature enable -n allowGlobalConfirmation
diff --git a/ops/packer/windows/setup_ssh.ps1 b/ops/packer/windows/setup_ssh.ps1
new file mode 100644
index 000000000000..a7bdee898002
--- /dev/null
+++ b/ops/packer/windows/setup_ssh.ps1
@@ -0,0 +1,58 @@
+
+## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/SetupSsh.ps1
+## Author: Christopher Horrell (https://github.com/chorrell)
+
+# Don't display progress bars
+# See: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_preference_variables?view=powershell-7.3#progresspreference
+$ProgressPreference = "SilentlyContinue"
+$ErrorActionPreference = "Stop"
+
+# Install OpenSSH using Add-WindowsCapability
+# See: https://learn.microsoft.com/en-us/windows-server/administration/openssh/openssh_install_firstuse?tabs=powershell#install-openssh-for-windows
+
+Write-Host "Installing and starting ssh-agent"
+Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
+Set-Service -Name ssh-agent -StartupType Automatic
+Start-Service ssh-agent
+
+Write-Host "Installing and starting sshd"
+Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
+Set-Service -Name sshd -StartupType Automatic
+Start-Service sshd
+
+# Confirm the Firewall rule is configured. It should be created automatically by setup. Run the following to verify
+if (!(Get-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -ErrorAction SilentlyContinue | Select-Object Name, Enabled)) {
+ Write-Output "Firewall Rule 'OpenSSH-Server-In-TCP' does not exist, creating it..."
+ New-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -DisplayName "OpenSSH Server (sshd)" -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22
+} else {
+ Write-Output "Firewall rule 'OpenSSH-Server-In-TCP' has been created and exists."
+}
+
+# Set default shell to Powershell
+New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value "C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe" -PropertyType String -Force
+
+$keyDownloadScript = Join-Path $env:ProgramData "ssh\download-key.ps1"
+
+@'
+# Download private key to $env:ProgramData\ssh\administrators_authorized_keys
+$openSSHAuthorizedKeys = Join-Path $env:ProgramData "ssh\administrators_authorized_keys"
+
+$keyUrl = "http://169.254.169.254/latest/meta-data/public-keys/0/openssh-key"
+Invoke-WebRequest $keyUrl -OutFile $openSSHAuthorizedKeys
+
+# Ensure ACL for administrators_authorized_keys is correct
+# See https://learn.microsoft.com/en-us/windows-server/administration/openssh/openssh_server_configuration#authorizedkeysfile
+icacls.exe $openSSHAuthorizedKeys /inheritance:r /grant "Administrators:F" /grant "SYSTEM:F"
+'@ | Out-File $keyDownloadScript
+
+# Create Task
+$taskName = "DownloadKey"
+$principal = New-ScheduledTaskPrincipal -UserID "NT AUTHORITY\SYSTEM" -LogonType ServiceAccount -RunLevel Highest
+$action = New-ScheduledTaskAction -Execute "Powershell.exe" -Argument "-NoProfile -File ""$keyDownloadScript"""
+$trigger = New-ScheduledTaskTrigger -AtStartup
+Register-ScheduledTask -Action $action -Trigger $trigger -Principal $principal -TaskName $taskName -Description $taskName
+
+# Fetch key via $keyDownloadScript
+& Powershell.exe -ExecutionPolicy Bypass -File $keyDownloadScript
+
+
diff --git a/ops/packer/windows/sysprep.ps1 b/ops/packer/windows/sysprep.ps1
new file mode 100644
index 000000000000..a0470309f9da
--- /dev/null
+++ b/ops/packer/windows/sysprep.ps1
@@ -0,0 +1,14 @@
+## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/PrepareImage.ps1
+## Author: Christopher Horrell (https://github.com/chorrell)
+
+$ErrorActionPreference = "Stop"
+
+Write-Output "Cleaning up keys"
+$openSSHAuthorizedKeys = Join-Path $env:ProgramData "ssh\administrators_authorized_keys"
+Remove-Item -Recurse -Force -Path $openSSHAuthorizedKeys
+
+# Make sure task is enabled
+Enable-ScheduledTask "DownloadKey"
+
+Write-Output "Running Sysprep"
+& "$Env:Programfiles\Amazon\EC2Launch\ec2launch.exe" sysprep
diff --git a/ops/packer/windows/windows.pkr.hcl b/ops/packer/windows/windows.pkr.hcl
new file mode 100644
index 000000000000..4c14b7b75806
--- /dev/null
+++ b/ops/packer/windows/windows.pkr.hcl
@@ -0,0 +1,90 @@
+packer {
+ required_plugins {
+ amazon = {
+ source = "github.com/hashicorp/amazon"
+ version = "~> 1"
+ }
+ windows-update = {
+ version = "0.15.0"
+ source = "github.com/rgl/windows-update"
+ }
+ }
+}
+
+locals {
+ ami_name_prefix = "xgboost-ci"
+ image_name = "RunsOn worker with Windows Server 2022 + ssh + CUDA driver"
+ region = "us-west-2"
+ timestamp = regex_replace(timestamp(), "[- TZ:]", "")
+ volume_size = 120
+}
+
+data "amazon-ami" "aws-windows-x64" {
+ filters = {
+ name = "Windows_Server-2022-English-Full-Base-*"
+ root-device-type = "ebs"
+ virtualization-type = "hvm"
+ }
+ most_recent = true
+ owners = ["amazon"]
+}
+
+source "amazon-ebs" "runs-on-windows" {
+ source_ami = "${data.amazon-ami.aws-windows-x64.id}"
+ ami_name = "${local.ami_name_prefix}-runs-on-windows-${local.timestamp}"
+ ami_description = "${local.image_name}"
+ ami_regions = ["${local.region}"]
+ ami_virtualization_type = "hvm"
+ associate_public_ip_address = true
+ communicator = "ssh"
+ instance_type = "g4dn.xlarge"
+ region = "${local.region}"
+ ssh_timeout = "10m"
+ ssh_username = "Administrator"
+ ssh_file_transfer_method = "sftp"
+ user_data_file = "setup_ssh.ps1"
+ launch_block_device_mappings {
+ device_name = "/dev/sda1"
+ volume_size = "${local.volume_size}"
+ volume_type = "gp3"
+ delete_on_termination = true
+ }
+ aws_polling { # Wait up to 2.5 hours until the AMI is ready
+ delay_seconds = 15
+ max_attempts = 600
+ }
+ fast_launch {
+ enable_fast_launch = true
+ target_resource_count = 10
+ }
+ snapshot_tags = {
+ Name = "${local.image_name}"
+ BuildTime = "${local.timestamp}"
+ }
+ tags = {
+ Name = "${local.image_name}"
+ BuildTime = "${local.timestamp}"
+ }
+}
+
+build {
+ sources = ["source.amazon-ebs.runs-on-windows"]
+
+ provisioner "windows-update" {}
+
+ provisioner "powershell" {
+ script = "install_choco.ps1"
+ }
+
+ provisioner "windows-restart" {
+ max_retries = 3
+ }
+
+ provisioner "powershell" {
+ script = "bootstrap.ps1"
+ }
+
+ provisioner "powershell" { # Sysprep should run the last
+ script = "sysprep.ps1"
+ }
+}
diff --git a/tests/buildkite/cpu_only_pypkg.patch b/ops/patch/cpu_only_pypkg.patch
similarity index 100%
rename from tests/buildkite/cpu_only_pypkg.patch
rename to ops/patch/cpu_only_pypkg.patch
diff --git a/tests/buildkite/manylinux2014_warning.patch b/ops/patch/manylinux2014_warning.patch
similarity index 100%
rename from tests/buildkite/manylinux2014_warning.patch
rename to ops/patch/manylinux2014_warning.patch
diff --git a/tests/buildkite/remove_nccl_dep.patch b/ops/patch/remove_nccl_dep.patch
similarity index 100%
rename from tests/buildkite/remove_nccl_dep.patch
rename to ops/patch/remove_nccl_dep.patch
diff --git a/tests/buildkite/build-cpu-arm64.sh b/ops/pipeline/build-cpu-arm64.sh
similarity index 53%
rename from tests/buildkite/build-cpu-arm64.sh
rename to ops/pipeline/build-cpu-arm64.sh
index 8b3847ed58b9..4be57557ea36 100755
--- a/tests/buildkite/build-cpu-arm64.sh
+++ b/ops/pipeline/build-cpu-arm64.sh
@@ -1,47 +1,55 @@
#!/bin/bash
-set -euo pipefail
+set -euox pipefail
WHEEL_TAG=manylinux_2_28_aarch64
echo "--- Build CPU code targeting ARM64"
-source tests/buildkite/conftest.sh
-
-command_wrapper="tests/ci_build/ci_build.sh aarch64"
+source ops/pipeline/enforce-ci.sh
echo "--- Build libxgboost from the source"
-$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \
- -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOL=ON
+python3 ops/docker_run.py \
+ --container-id xgb-ci.aarch64 \
+ -- ops/script/build_via_cmake.sh \
+ --conda-env=aarch64_test \
+ -DUSE_OPENMP=ON \
+ -DHIDE_CXX_SYMBOL=ON
+
echo "--- Run Google Test"
-$command_wrapper bash -c "cd build && ctest --extra-verbose"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.aarch64 \
+ -- bash -c "cd build && ctest --extra-verbose"
echo "--- Build binary wheel"
-$command_wrapper bash -c \
+python3 ops/docker_run.py \
+ --container-id xgb-ci.aarch64 \
+ -- bash -c \
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
-$command_wrapper python tests/ci_build/rename_whl.py \
+python3 ops/script/rename_whl.py \
--wheel-path python-package/dist/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
+ --commit-hash ${GITHUB_SHA} \
--platform-tag ${WHEEL_TAG}
echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
-$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
-$command_wrapper python tests/ci_build/rename_whl.py \
+python3 ops/docker_run.py \
+ --container-id xgb-ci.aarch64 \
+ -- auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
+python3 ops/script/rename_whl.py \
--wheel-path wheelhouse/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
+ --commit-hash ${GITHUB_SHA} \
--platform-tag ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/
+
# Make sure that libgomp.so is vendored in the wheel
-$command_wrapper bash -c \
+python3 ops/docker_run.py \
+ --container-id xgb-ci.aarch64 \
+ -- bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
echo "--- Upload Python wheel"
-buildkite-agent artifact upload "python-package/dist/*.whl"
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
--acl public-read --no-progress
fi
-
-echo "--- Stash XGBoost CLI executable"
-buildkite-agent artifact upload ./xgboost
diff --git a/ops/pipeline/build-cpu.sh b/ops/pipeline/build-cpu.sh
new file mode 100755
index 000000000000..22384d056f15
--- /dev/null
+++ b/ops/pipeline/build-cpu.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -euox pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+echo "--- Build CPU code"
+
+# This step is not necessary, but here we include it, to ensure that
+# DMLC_CORE_USE_CMAKE flag is correctly propagated. We want to make sure that we use
+# the configured header build/dmlc/build_config.h instead of
+# include/dmlc/build_config_default.h.
+rm -fv dmlc-core/include/dmlc/build_config_default.h
+
+# Sanitizer tests
+echo "--- Run Google Test with sanitizer enabled"
+# Work around https://github.com/google/sanitizers/issues/1614
+sudo sysctl vm.mmap_rnd_bits=28
+python3 ops/docker_run.py \
+ --container-id xgb-ci.cpu \
+ -- ops/script/build_via_cmake.sh \
+ -DUSE_SANITIZER=ON \
+ -DENABLED_SANITIZERS="address;leak;undefined" \
+ -DCMAKE_BUILD_TYPE=Debug \
+ -DSANITIZER_PATH=/usr/lib/x86_64-linux-gnu/
+python3 ops/docker_run.py \
+ --container-id xgb-ci.cpu \
+ --run-args '-e ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer
+ -e ASAN_OPTIONS=symbolize=1
+ -e UBSAN_OPTIONS=print_stacktrace=1:log_path=ubsan_error.log
+ --cap-add SYS_PTRACE' \
+ -- bash -c \
+ "cd build && ./testxgboost --gtest_filter=-*DeathTest*"
+
+echo "--- Run Google Test"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.cpu \
+ -- ops/script/build_via_cmake.sh \
+ -DCMAKE_PREFIX_PATH=/opt/grpc \
+ -DPLUGIN_FEDERATED=ON
+python3 ops/docker_run.py \
+ --container-id xgb-ci.cpu \
+ -- bash -c "cd build && ctest --extra-verbose"
diff --git a/ops/pipeline/build-cuda-with-rmm.sh b/ops/pipeline/build-cuda-with-rmm.sh
new file mode 100755
index 000000000000..50bbf8b340f3
--- /dev/null
+++ b/ops/pipeline/build-cuda-with-rmm.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+set -euox pipefail
+
+WHEEL_TAG=manylinux_2_28_x86_64
+
+source ops/pipeline/enforce-ci.sh
+
+echo "--- Build with CUDA with RMM"
+
+if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
+then
+ arch_flag="-DGPU_COMPUTE_VER=75"
+else
+ arch_flag=""
+fi
+
+echo "--- Build libxgboost from the source"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.gpu_build_rockylinux8 \
+ -- ops/script/build_via_cmake.sh \
+ -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake" \
+ -DUSE_CUDA=ON \
+ -DUSE_OPENMP=ON \
+ -DHIDE_CXX_SYMBOLS=ON \
+ -DPLUGIN_FEDERATED=ON \
+ -DPLUGIN_RMM=ON \
+ -DUSE_NCCL=ON \
+ -DUSE_NCCL_LIB_PATH=ON \
+ -DNCCL_INCLUDE_DIR=/usr/include \
+ -DUSE_DLOPEN_NCCL=ON \
+ ${arch_flag}
+
+echo "--- Build binary wheel"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.gpu_build_rockylinux8 \
+ -- bash -c \
+ "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
+python3 ops/script/rename_whl.py \
+ --wheel-path python-package/dist/*.whl \
+ --commit-hash ${GITHUB_SHA} \
+ --platform-tag ${WHEEL_TAG}
+
+echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.$WHEEL_TAG \
+ -- auditwheel repair \
+ --plat ${WHEEL_TAG} python-package/dist/*.whl
+python3 ops/script/rename_whl.py \
+ --wheel-path wheelhouse/*.whl \
+ --commit-hash ${GITHUB_SHA} \
+ --platform-tag ${WHEEL_TAG}
+mv -v wheelhouse/*.whl python-package/dist/
+# Make sure that libgomp.so is vendored in the wheel
+python3 ops/docker_run.py \
+ --container-id xgb-ci.$WHEEL_TAG \
+ -- bash -c \
+ "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
+
+echo "--- Upload Python wheel"
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+ aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/experimental_build_with_rmm/ \
+ --acl public-read --no-progress
+fi
diff --git a/ops/pipeline/build-cuda.sh b/ops/pipeline/build-cuda.sh
new file mode 100755
index 000000000000..4ed82618da23
--- /dev/null
+++ b/ops/pipeline/build-cuda.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+set -euox pipefail
+
+WHEEL_TAG=manylinux_2_28_x86_64
+
+source ops/pipeline/enforce-ci.sh
+
+echo "--- Build with CUDA"
+
+if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
+then
+ arch_flag="-DGPU_COMPUTE_VER=75"
+else
+ arch_flag=""
+fi
+
+echo "--- Build libxgboost from the source"
+set -x
+# Work around https://github.com/NVIDIA/cccl/issues/1956
+# TODO(hcho3): Remove this once new CUDA version ships with CCCL 2.6.0+
+git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet
+python3 ops/docker_run.py \
+ --container-id xgb-ci.gpu_build_rockylinux8 \
+ -- ops/script/build_via_cmake.sh \
+ -DCMAKE_PREFIX_PATH="/opt/grpc;/workspace/cccl" \
+ -DUSE_CUDA=ON \
+ -DUSE_OPENMP=ON \
+ -DHIDE_CXX_SYMBOLS=ON \
+ -DPLUGIN_FEDERATED=ON \
+ -DUSE_NCCL=ON \
+ -DUSE_NCCL_LIB_PATH=ON \
+ -DNCCL_INCLUDE_DIR=/usr/include \
+ -DUSE_DLOPEN_NCCL=ON \
+ ${arch_flag}
+
+echo "--- Build binary wheel"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.gpu_build_rockylinux8 \
+ -- bash -c \
+ "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
+python3 ops/script/rename_whl.py \
+ --wheel-path python-package/dist/*.whl \
+ --commit-hash ${GITHUB_SHA} \
+ --platform-tag ${WHEEL_TAG}
+
+echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.manylinux_2_28_x86_64 \
+ -- auditwheel repair \
+ --plat ${WHEEL_TAG} python-package/dist/*.whl
+python3 ops/script/rename_whl.py \
+ --wheel-path wheelhouse/*.whl \
+ --commit-hash ${GITHUB_SHA} \
+ --platform-tag ${WHEEL_TAG}
+mv -v wheelhouse/*.whl python-package/dist/
+# Make sure that libgomp.so is vendored in the wheel
+python3 ops/docker_run.py \
+ --container-id xgb-ci.manylinux_2_28_x86_64 \
+ -- bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
+
+echo "--- Upload Python wheel"
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+ aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
+ --acl public-read --no-progress
+
+ # Generate the meta info which includes xgboost version and the commit info
+ python3 ops/docker_run.py \
+ --container-id xgb-ci.gpu_build_rockylinux8 \
+ -- python ops/script/format_wheel_meta.py \
+ --wheel-path python-package/dist/*.whl \
+ --commit-hash ${GITHUB_SHA} \
+ --platform-tag ${WHEEL_TAG} \
+ --meta-path python-package/dist/
+ aws s3 cp python-package/dist/meta.json s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
+ --acl public-read --no-progress
+fi
+echo "-- Stash C++ test executable (testxgboost)"
diff --git a/tests/ci_build/build_r_pkg_with_cuda.sh b/ops/pipeline/build-gpu-rpkg-impl.sh
similarity index 73%
rename from tests/ci_build/build_r_pkg_with_cuda.sh
rename to ops/pipeline/build-gpu-rpkg-impl.sh
index 78a2afc1cdf7..2815b8f448f1 100755
--- a/tests/ci_build/build_r_pkg_with_cuda.sh
+++ b/ops/pipeline/build-gpu-rpkg-impl.sh
@@ -1,8 +1,12 @@
#!/bin/bash
-set -e
-set -x
-if [ "$#" -ne 1 ]
+## Build XGBoost R package with GPU support and package it in a tarball.
+## Users will be able to install it without having CTK installed
+## (only a compatible NVIDIA driver is needed).
+
+set -euo pipefail
+
+if [[ "$#" -ne 1 ]]
then
echo "Build the R package tarball with CUDA code. Usage: $0 [commit hash]"
exit 1
@@ -10,7 +14,7 @@ fi
commit_hash="$1"
-python tests/ci_build/test_r_package.py --task=pack
+python3 ops/script/test_r_package.py --task=pack
mv xgboost/ xgboost_rpack/
mkdir build
diff --git a/tests/buildkite/build-gpu-rpkg.sh b/ops/pipeline/build-gpu-rpkg.sh
similarity index 50%
rename from tests/buildkite/build-gpu-rpkg.sh
rename to ops/pipeline/build-gpu-rpkg.sh
index 83bcd9eb9c7b..e85826f36a26 100755
--- a/tests/buildkite/build-gpu-rpkg.sh
+++ b/ops/pipeline/build-gpu-rpkg.sh
@@ -1,16 +1,14 @@
#!/bin/bash
-set -euo pipefail
+set -euox pipefail
-source tests/buildkite/conftest.sh
+source ops/pipeline/enforce-ci.sh
echo "--- Build XGBoost R package with CUDA"
-
-tests/ci_build/ci_build.sh gpu_build_r_rockylinux8 \
- --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
- --build-arg R_VERSION_ARG=${R_VERSION} \
- tests/ci_build/build_r_pkg_with_cuda.sh \
- ${BUILDKITE_COMMIT}
+python3 ops/docker_run.py \
+ --container-id xgb-ci.gpu_build_r_rockylinux8 \
+ -- ops/pipeline/build-gpu-rpkg-impl.sh \
+ ${GITHUB_SHA}
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
diff --git a/ops/pipeline/build-jvm-doc-impl.sh b/ops/pipeline/build-jvm-doc-impl.sh
new file mode 100755
index 000000000000..4e95f284e25c
--- /dev/null
+++ b/ops/pipeline/build-jvm-doc-impl.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+## Build docs for the JVM packages and package it in a tarball
+## Note: Note: this script assumes that the user has already built libxgboost4j.so
+## and place it in the lib/ directory.
+
+if [[ $# -ne 1 ]]
+then
+ echo "Usage: $0 [branch name]"
+ exit 1
+fi
+
+set -euo pipefail
+
+branch_name=$1
+
+# Copy in libxgboost4j.so
+mkdir -p jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/
+cp -v lib/libxgboost4j.so jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/
+
+cd jvm-packages/
+# Install JVM packages in local Maven repository
+mvn --no-transfer-progress install -Pdocs
+# Build Scaladocs
+mvn --no-transfer-progress scala:doc -Pdocs
+# Build Javadocs
+mvn --no-transfer-progress javadoc:javadoc -Pdocs
+
+# Package JVM docs in a tarball
+mkdir -p tmp/scaladocs
+cp -rv xgboost4j/target/reports/apidocs/ ./tmp/javadocs/
+cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/
+cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/
+cp -rv xgboost4j-spark-gpu/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark-gpu/
+cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/
+
+cd tmp
+tar cvjf ${branch_name}.tar.bz2 javadocs/ scaladocs/
+mv ${branch_name}.tar.bz2 ..
+cd ..
+rm -rfv tmp/
+
+set +x
+set +e
diff --git a/ops/pipeline/build-jvm-doc.sh b/ops/pipeline/build-jvm-doc.sh
new file mode 100755
index 000000000000..0c1afe46e212
--- /dev/null
+++ b/ops/pipeline/build-jvm-doc.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+## Build docs for the JVM packages and package it in a tarball
+## Note: Note: this script assumes that the user has already built libxgboost4j.so
+## and place it in the lib/ directory.
+
+set -euox pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+echo "--- Build JVM packages doc"
+python3 ops/docker_run.py \
+ --container-id xgb-ci.jvm_gpu_build \
+ -- ops/pipeline/build-jvm-doc-impl.sh ${BRANCH_NAME}
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+ echo "--- Upload JVM packages doc"
+ aws s3 cp jvm-packages/${BRANCH_NAME}.tar.bz2 \
+ s3://xgboost-docs/${BRANCH_NAME}.tar.bz2 --acl public-read --no-progress
+fi
diff --git a/ops/pipeline/build-jvm-gpu.sh b/ops/pipeline/build-jvm-gpu.sh
new file mode 100755
index 000000000000..6bcd2a327553
--- /dev/null
+++ b/ops/pipeline/build-jvm-gpu.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+## Build libxgboost4j.so with CUDA
+
+set -euo pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+echo "--- Build libxgboost4j.so with CUDA"
+
+if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
+then
+ arch_flag="-DGPU_COMPUTE_VER=75"
+else
+ arch_flag=""
+fi
+
+COMMAND=$(
+cat <<-EOF
+cd build-gpu/ && \
+cmake .. -DCMAKE_PREFIX_PATH=/workspace/cccl -GNinja -DUSE_CUDA=ON -DUSE_NCCL=ON \
+ -DJVM_BINDINGS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ${arch_flag} && \
+ ninja
+EOF
+)
+
+set -x
+mkdir -p build-gpu/
+# Work around https://github.com/NVIDIA/cccl/issues/1956
+# TODO(hcho3): Remove this once new CUDA version ships with CCCL 2.6.0+
+git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet --depth 1
+python3 ops/docker_run.py \
+ --container-id xgb-ci.jvm_gpu_build \
+ -- bash -c "${COMMAND}"
diff --git a/ops/pipeline/build-jvm-macos-apple-silicon.sh b/ops/pipeline/build-jvm-macos-apple-silicon.sh
new file mode 100755
index 000000000000..99ca20d7e1e3
--- /dev/null
+++ b/ops/pipeline/build-jvm-macos-apple-silicon.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+## Build libxgboost4j.dylib targeting MacOS (Apple Silicon)
+
+set -euox pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+# Display system info
+echo "--- Display system information"
+set -x
+system_profiler SPSoftwareDataType
+sysctl -n machdep.cpu.brand_string
+uname -m
+set +x
+
+brew install ninja libomp
+
+# Build XGBoost4J binary
+echo "--- Build libxgboost4j.dylib"
+set -x
+mkdir build
+pushd build
+export JAVA_HOME=$(/usr/libexec/java_home)
+cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
+ninja -v
+popd
+rm -rf build
+otool -L lib/libxgboost.dylib
+set +x
+
+echo "--- Upload libxgboost4j.dylib"
+set -x
+pushd lib
+libname=libxgboost4j_m1_${GITHUB_SHA}.dylib
+mv -v libxgboost4j.dylib ${libname}
+
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+ aws s3 cp ${libname} \
+ s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
+ --acl public-read --no-progress
+fi
+popd
+set +x
diff --git a/tests/buildkite/build-jvm-macos-m1.sh b/ops/pipeline/build-jvm-macos-intel.sh
old mode 100644
new mode 100755
similarity index 80%
rename from tests/buildkite/build-jvm-macos-m1.sh
rename to ops/pipeline/build-jvm-macos-intel.sh
index 1d2e5e8703bc..ecf480d3c063
--- a/tests/buildkite/build-jvm-macos-m1.sh
+++ b/ops/pipeline/build-jvm-macos-intel.sh
@@ -1,8 +1,9 @@
#!/bin/bash
+## Build libxgboost4j.dylib targeting MacOS (Intel)
-set -euo pipefail
+set -euox pipefail
-source tests/buildkite/conftest.sh
+source ops/pipeline/enforce-ci.sh
# Display system info
echo "--- Display system information"
@@ -12,6 +13,8 @@ sysctl -n machdep.cpu.brand_string
uname -m
set +x
+brew install ninja libomp
+
# Build XGBoost4J binary
echo "--- Build libxgboost4j.dylib"
set -x
@@ -28,9 +31,9 @@ set +x
echo "--- Upload libxgboost4j.dylib"
set -x
pushd lib
-libname=libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
+libname=libxgboost4j_intel_${GITHUB_SHA}.dylib
mv -v libxgboost4j.dylib ${libname}
-buildkite-agent artifact upload ${libname}
+
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp ${libname} \
diff --git a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh b/ops/pipeline/build-jvm-manylinux2014.sh
old mode 100644
new mode 100755
similarity index 62%
rename from tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
rename to ops/pipeline/build-jvm-manylinux2014.sh
index e7fec780b956..93fa03d2eb0b
--- a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh
+++ b/ops/pipeline/build-jvm-manylinux2014.sh
@@ -1,25 +1,34 @@
#!/bin/bash
+## Build libxgboost4j.so targeting glibc 2.17 systems
-set -euo pipefail
+set -euox pipefail
-source tests/buildkite/conftest.sh
+source ops/pipeline/enforce-ci.sh
-command_wrapper="tests/ci_build/ci_build.sh jvm_manylinux2014_aarch64"
+if [ $# -ne 1 ]; then
+ echo "Usage: $0 {x86_64,aarch64}"
+ exit 1
+fi
+
+arch=$1
+
+image="xgb-ci.manylinux2014_${arch}"
# Build XGBoost4J binary
echo "--- Build libxgboost4j.so (targeting glibc 2.17)"
set -x
mkdir build
-$command_wrapper bash -c \
+python3 ops/docker_run.py \
+ --container-id ${image} \
+ -- bash -c \
"cd build && cmake .. -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && make -j$(nproc)"
ldd lib/libxgboost4j.so
objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu
echo "--- Upload libxgboost4j.so"
pushd lib
-libname=libxgboost4j_linux_arm64_${BUILDKITE_COMMIT}.so
+libname=libxgboost4j_linux_${arch}_${GITHUB_SHA}.so
mv -v libxgboost4j.so ${libname}
-buildkite-agent artifact upload ${libname}
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp ${libname} \
diff --git a/tests/buildkite/build-manylinux2014.sh b/ops/pipeline/build-manylinux2014.sh
similarity index 59%
rename from tests/buildkite/build-manylinux2014.sh
rename to ops/pipeline/build-manylinux2014.sh
index 426d32b5c361..7802fa555187 100755
--- a/tests/buildkite/build-manylinux2014.sh
+++ b/ops/pipeline/build-manylinux2014.sh
@@ -1,6 +1,8 @@
#!/bin/bash
-set -euo pipefail
+set -euox pipefail
+
+source ops/pipeline/enforce-ci.sh
if [ $# -ne 1 ]; then
echo "Usage: $0 {x86_64,aarch64}"
@@ -9,24 +11,28 @@ fi
arch=$1
-source tests/buildkite/conftest.sh
-
WHEEL_TAG="manylinux2014_${arch}"
-command_wrapper="tests/ci_build/ci_build.sh ${WHEEL_TAG}"
+image="xgb-ci.$WHEEL_TAG"
+
python_bin="/opt/python/cp310-cp310/bin/python"
echo "--- Build binary wheel for ${WHEEL_TAG}"
# Patch to add warning about manylinux2014 variant
-patch -p0 < tests/buildkite/remove_nccl_dep.patch
-patch -p0 < tests/buildkite/manylinux2014_warning.patch
-$command_wrapper bash -c \
+patch -p0 < ops/patch/remove_nccl_dep.patch
+patch -p0 < ops/patch/manylinux2014_warning.patch
+python3 ops/docker_run.py \
+ --container-id ${image} \
+ -- bash -c \
"cd python-package && ${python_bin} -m pip wheel --no-deps -v . --wheel-dir dist/"
-git checkout python-package/pyproject.toml python-package/xgboost/core.py # discard the patch
+git checkout python-package/pyproject.toml python-package/xgboost/core.py
+ # discard the patch
-$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
-$command_wrapper ${python_bin} tests/ci_build/rename_whl.py \
+python3 ops/docker_run.py \
+ --container-id ${image} \
+ -- auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl
+python3 ops/script/rename_whl.py \
--wheel-path wheelhouse/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
+ --commit-hash ${GITHUB_SHA} \
--platform-tag ${WHEEL_TAG}
rm -rf python-package/dist/
mkdir python-package/dist/
@@ -34,25 +40,25 @@ mv -v wheelhouse/*.whl python-package/dist/
echo "--- Build binary wheel for ${WHEEL_TAG} (CPU only)"
# Patch to rename pkg to xgboost-cpu
-patch -p0 < tests/buildkite/remove_nccl_dep.patch
-patch -p0 < tests/buildkite/cpu_only_pypkg.patch
-$command_wrapper bash -c \
+patch -p0 < ops/patch/remove_nccl_dep.patch
+patch -p0 < ops/patch/cpu_only_pypkg.patch
+python3 ops/docker_run.py \
+ --container-id ${image} \
+ -- bash -c \
"cd python-package && ${python_bin} -m pip wheel --no-deps -v . --wheel-dir dist/"
git checkout python-package/pyproject.toml # discard the patch
-$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/xgboost_cpu-*.whl
-$command_wrapper ${python_bin} tests/ci_build/rename_whl.py \
+python3 ops/docker_run.py \
+ --container-id ${image} \
+ -- auditwheel repair --plat ${WHEEL_TAG} python-package/dist/xgboost_cpu-*.whl
+python3 ops/script/rename_whl.py \
--wheel-path wheelhouse/xgboost_cpu-*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
+ --commit-hash ${GITHUB_SHA} \
--platform-tag ${WHEEL_TAG}
rm -v python-package/dist/xgboost_cpu-*.whl
mv -v wheelhouse/xgboost_cpu-*.whl python-package/dist/
echo "--- Upload Python wheel"
-for wheel in python-package/dist/*.whl
-do
- buildkite-agent artifact upload "${wheel}"
-done
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
for wheel in python-package/dist/*.whl
diff --git a/tests/ci_build/build_python_wheels.sh b/ops/pipeline/build-python-wheels-macos.sh
old mode 100644
new mode 100755
similarity index 94%
rename from tests/ci_build/build_python_wheels.sh
rename to ops/pipeline/build-python-wheels-macos.sh
index d9927905cf83..697514c0c3ad
--- a/tests/ci_build/build_python_wheels.sh
+++ b/ops/pipeline/build-python-wheels-macos.sh
@@ -1,7 +1,6 @@
#!/bin/bash
-set -e
-set -x
+set -euox pipefail
if [[ $# -ne 2 ]]; then
echo "Usage: $0 [platform_id] [commit ID]"
@@ -31,7 +30,6 @@ if [[ "$platform_id" == macosx_* ]]; then
# Set up environment variables to configure cibuildwheel
export CIBW_BUILD=cp${cpython_ver}-${platform_id}
export CIBW_ARCHS=${cibw_archs}
- export CIBW_ENVIRONMENT=${setup_env_var}
export CIBW_TEST_SKIP='*-macosx_arm64'
export CIBW_BUILD_VERBOSITY=3
else
@@ -44,7 +42,7 @@ export CIBW_REPAIR_WHEEL_COMMAND_MACOS="delocate-wheel --require-archs {delocate
python -m pip install cibuildwheel
python -m cibuildwheel python-package --output-dir wheelhouse
-python tests/ci_build/rename_whl.py \
+python ops/script/rename_whl.py \
--wheel-path wheelhouse/*.whl \
--commit-hash ${commit_id} \
--platform-tag ${wheel_tag}
diff --git a/ops/pipeline/build-test-jvm-packages-impl.sh b/ops/pipeline/build-test-jvm-packages-impl.sh
new file mode 100755
index 000000000000..3290bf0f17c9
--- /dev/null
+++ b/ops/pipeline/build-test-jvm-packages-impl.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+## Build and test JVM packages.
+##
+## Note. This script takes in all inputs via environment variables.
+
+INPUT_DOC=$(
+cat <<-EOF
+Inputs
+ - SCALA_VERSION: Scala version, either 2.12 or 2.13 (Required)
+ - USE_CUDA: Set to 1 to enable CUDA
+ - SKIP_NATIVE_BUILD: Set to 1 to have the JVM packages use an externally provided
+ libxgboost4j.so. (Usually Maven will invoke create_jni.py to
+ build it from scratch.) When using this option, make sure to
+ place libxgboost4j.so in lib/ directory.
+EOF
+)
+
+set -euo pipefail
+
+for arg in "SCALA_VERSION"
+do
+ if [[ -z "${!arg:-}" ]]
+ then
+ echo -e "Error: $arg must be set.\n${INPUT_DOC}"
+ exit 1
+ fi
+done
+
+set -x
+
+# Set Scala version
+if [[ "${SCALA_VERSION}" == "2.12" || "${SCALA_VERSION}" == "2.13" ]]
+then
+ python ops/script/change_scala_version.py --scala-version ${SCALA_VERSION} --purge-artifacts
+else
+ echo "Error: SCALA_VERSION must be either 2.12 or 2.13"
+ exit 2
+fi
+
+# If SKIP_NATIVE_BUILD is set, copy in libxgboost4j.so from lib/
+# Also copy in other files needed for testing. (Usually create_jni.py would perform this
+# step, but we need to do it manually here.)
+if [[ "${SKIP_NATIVE_BUILD:-}" == "1" ]]
+then
+ echo "Using externally provided libxgboost4j.so. Locating one from lib/..."
+ mkdir -p jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/
+ cp -v lib/libxgboost4j.so jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/
+ mkdir -p jvm-packages/xgboost4j/src/test/resources
+ mkdir -p jvm-packages/xgboost4j-spark/src/test/resources
+ mkdir -p jvm-packages/xgboost4j-spark-gpu/src/test/resources
+
+ # Generate machine.txt.* files from the CLI regression demo
+ # TODO(hcho3): Remove once CLI is removed
+ pushd demo/CLI/regression
+ python3 mapfeat.py
+ python3 mknfold.py machine.txt 1
+ popd
+
+ cp -v demo/data/agaricus.* \
+ jvm-packages/xgboost4j/src/test/resources
+ cp -v demo/CLI/regression/machine.txt.t* demo/data/agaricus.* \
+ jvm-packages/xgboost4j-spark/src/test/resources
+ cp -v demo/data/veterans_lung_cancer.csv \
+ jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv \
+ jvm-packages/xgboost4j-spark-gpu/src/test/resources
+fi
+
+cd jvm-packages/
+
+# Ensure that XGBoost4J-Spark is compatible with multiple versions of Spark
+if [[ "${USE_CUDA:-}" != "1" && "${SCALA_VERSION}" == "2.12" ]]
+then
+ for spark_version in 3.1.3 3.2.4 3.3.4 3.4.3
+ do
+ mvn --no-transfer-progress clean package -Dspark.version=${spark_version} \
+ -pl xgboost4j,xgboost4j-spark
+ done
+fi
+
+set +x
+mvn_options=""
+if [[ "${USE_CUDA:-}" == "1" ]]
+then
+ mvn_options="${mvn_options} -Pgpu"
+fi
+if [[ "${SKIP_NATIVE_BUILD:-}" == "1" ]]
+then
+ mvn_options="${mvn_options} -Dskip.native.build=true"
+fi
+set -x
+
+mvn --no-transfer-progress clean install ${mvn_options}
+
+# Integration tests
+if [[ "${USE_CUDA:-}" != "1" ]]
+then
+ mvn --no-transfer-progress test -pl xgboost4j-example
+fi
diff --git a/ops/pipeline/build-test-jvm-packages.sh b/ops/pipeline/build-test-jvm-packages.sh
new file mode 100755
index 000000000000..1feddf2bff98
--- /dev/null
+++ b/ops/pipeline/build-test-jvm-packages.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+## Build and test JVM packages.
+##
+## Note. This script takes in all inputs via environment variables.
+
+INPUT_DOC=$(
+cat <<-EOF
+Inputs
+ - SCALA_VERSION: Scala version, either 2.12 or 2.13 (Required)
+EOF
+)
+
+set -euo pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+for arg in "SCALA_VERSION"
+do
+ if [[ -z "${!arg:-}" ]]
+ then
+ echo -e "Error: $arg must be set.\n${INPUT_DOC}"
+ exit 1
+ fi
+done
+
+set -x
+
+python3 ops/docker_run.py --container-id xgb-ci.jvm \
+ --run-args "-e SCALA_VERSION=${SCALA_VERSION}" \
+ -- ops/pipeline/build-test-jvm-packages-impl.sh
diff --git a/ops/pipeline/build-win64-gpu.ps1 b/ops/pipeline/build-win64-gpu.ps1
new file mode 100644
index 000000000000..76cc955059b8
--- /dev/null
+++ b/ops/pipeline/build-win64-gpu.ps1
@@ -0,0 +1,46 @@
+$ErrorActionPreference = "Stop"
+
+. ops/pipeline/enforce-ci.ps1
+
+Write-Host "--- Build libxgboost on Windows with CUDA"
+
+nvcc --version
+if ( $is_release_branch -eq 0 ) {
+ $arch_flag = "-DGPU_COMPUTE_VER=75"
+} else {
+ $arch_flag = ""
+}
+
+# Work around https://github.com/NVIDIA/cccl/issues/1956
+# TODO(hcho3): Remove this once new CUDA version ships with CCCL 2.6.0+
+git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet
+mkdir build
+cd build
+cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON `
+ -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON `
+ -DCMAKE_PREFIX_PATH="$(Get-Location)/../cccl" ${arch_flag}
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+cmake --build . --config Release -- /m /nodeReuse:false `
+ "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+Write-Host "--- Build binary wheel"
+cd ../python-package
+conda activate
+pip install --user -v "pip>=23"
+pip --version
+pip wheel --no-deps -v . --wheel-dir dist/
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+python ../ops/script/rename_whl.py `
+ --wheel-path (Get-ChildItem dist/*.whl | Select-Object -Expand FullName) `
+ --commit-hash $Env:GITHUB_SHA `
+ --platform-tag win_amd64
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+Write-Host "--- Upload Python wheel"
+cd ..
+if ( $is_release_branch -eq 1 ) {
+ aws s3 cp (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) `
+ s3://xgboost-nightly-builds/$Env:BRANCH_NAME/ --acl public-read --no-progress
+ if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+}
diff --git a/ops/pipeline/deploy-jvm-packages-impl.sh b/ops/pipeline/deploy-jvm-packages-impl.sh
new file mode 100755
index 000000000000..36fd23a583d6
--- /dev/null
+++ b/ops/pipeline/deploy-jvm-packages-impl.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+## Deploy JVM packages to xgboost-maven-repo S3 bucket
+
+set -euox pipefail
+
+if [[ "$#" -lt 1 ]]
+then
+ echo "Usage: $0 {cpu,gpu}"
+ exit 1
+fi
+
+variant="$1"
+
+maven_options="-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true"
+case "$variant" in
+ cpu)
+ # CPU variant
+ for scala_version in 2.12 2.13
+ do
+ python ops/script/change_scala_version.py --scala-version ${scala_version} --purge-artifacts
+ pushd jvm-packages
+ mvn --no-transfer-progress deploy -Pdefault,release-to-s3 ${maven_options}
+ mvn clean
+ mvn clean -Pdefault,release-to-s3
+ popd
+ done
+ ;;
+ gpu)
+ # GPU variant
+ for scala_version in 2.12 2.13
+ do
+ python ops/script/change_scala_version.py --scala-version ${scala_version} --purge-artifacts
+ pushd jvm-packages
+ mvn --no-transfer-progress install -Pgpu ${maven_options}
+ mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu ${maven_options}
+ mvn clean
+ mvn clean -Pgpu,release-to-s3
+ popd
+ done
+ ;;
+ *)
+ echo "Unrecognized argument: $variant"
+ exit 2
+ ;;
+esac
diff --git a/ops/pipeline/deploy-jvm-packages.sh b/ops/pipeline/deploy-jvm-packages.sh
new file mode 100755
index 000000000000..866b6dded393
--- /dev/null
+++ b/ops/pipeline/deploy-jvm-packages.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -euox pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+if [[ "$#" -lt 2 ]]
+then
+ echo "Usage: $0 {cpu,gpu} {container_id}"
+ exit 1
+fi
+
+variant="$1"
+container_id="$2"
+
+# if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+# then
+ echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
+ python3 ops/docker_run.py --container-id "${container_id}" \
+ -- ops/pipeline/deploy-jvm-packages-impl.sh "${variant}"
+# fi
diff --git a/ops/pipeline/enforce-ci.ps1 b/ops/pipeline/enforce-ci.ps1
new file mode 100644
index 000000000000..0528472be6cb
--- /dev/null
+++ b/ops/pipeline/enforce-ci.ps1
@@ -0,0 +1,28 @@
+## Ensure that a script is running inside the CI.
+## Usage: . ops/pipeline/enforce-ci.ps1
+
+if ( -Not $Env:GITHUB_ACTION ) {
+ $script_name = (Split-Path -Path $PSCommandPath -Leaf)
+ Write-Host "$script_name is not meant to run locally; it should run inside GitHub Actions."
+ Write-Host "Please inspect the content of $script_name and locate the desired command manually."
+ exit 1
+}
+
+if ( -Not $Env:BRANCH_NAME ) {
+ Write-Host "Make sure to define environment variable BRANCH_NAME."
+ exit 2
+}
+
+if ( $Env:GITHUB_BASE_REF ) {
+ $is_pull_request = 1
+} else {
+ $is_pull_request = 0
+}
+
+if ( ($Env:BRANCH_NAME -eq "master") -or ($Env:BRANCH_NAME -match "release_.+") ) {
+ $is_release_branch = 1
+ $enforce_daily_budget = 0
+} else {
+ $is_release_branch = 0
+ $enforce_daily_budget = 1
+}
diff --git a/ops/pipeline/enforce-ci.sh b/ops/pipeline/enforce-ci.sh
new file mode 100755
index 000000000000..eefb6450b98d
--- /dev/null
+++ b/ops/pipeline/enforce-ci.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+## Ensure that a script is running inside the CI.
+## Usage: source ops/pipeline/enforce-ci.sh
+
+set -euo pipefail
+
+if [[ -z ${GITHUB_ACTION:-} ]]
+then
+ echo "$0 is not meant to run locally; it should run inside GitHub Actions."
+ echo "Please inspect the content of $0 and locate the desired command manually."
+ exit 1
+fi
+
+if [[ -z ${BRANCH_NAME:-} ]]
+then
+ echo "Make sure to define environment variable BRANCH_NAME."
+ exit 2
+fi
+
+if [[ -n ${GITHUB_BASE_REF:-} ]]
+then
+ is_pull_request=1
+else
+ is_pull_request=0
+fi
+
+if [[ $BRANCH_NAME == "master" || $BRANCH_NAME == "release_"* || $BRANCH_NAME == "federated-secure" ]]
+then
+ is_release_branch=1
+ enforce_daily_budget=0
+else
+ is_release_branch=0
+ enforce_daily_budget=1
+fi
+
+if [[ -n ${DISABLE_RELEASE:-} ]]
+then
+ is_release_branch=0
+fi
diff --git a/ops/pipeline/run-clang-tidy.sh b/ops/pipeline/run-clang-tidy.sh
new file mode 100755
index 000000000000..a9ff039ee4ca
--- /dev/null
+++ b/ops/pipeline/run-clang-tidy.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -euox pipefail
+
+echo "--- Run clang-tidy"
+
+source ops/pipeline/enforce-ci.sh
+
+python3 ops/docker_run.py \
+ --container-id xgb-ci.clang_tidy \
+ -- python3 ops/script/run_clang_tidy.py --cuda-archs 75
diff --git a/ops/pipeline/test-cpp-gpu.sh b/ops/pipeline/test-cpp-gpu.sh
new file mode 100755
index 000000000000..b66162d66a50
--- /dev/null
+++ b/ops/pipeline/test-cpp-gpu.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+set -euox pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+if [[ "$#" -lt 1 ]]
+then
+ echo "Usage: $0 {gpu,gpu-rmm,mgpu}"
+ exit 1
+fi
+arg=$1
+
+case "${arg}" in
+ gpu)
+ echo "--- Run Google Tests, using a single GPU"
+ python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \
+ -- nvidia-smi
+ python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \
+ -- build/testxgboost
+ ;;
+
+ gpu-rmm)
+ echo "--- Run Google Tests, using a single GPU, RMM enabled"
+ python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \
+ -- nvidia-smi
+ python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \
+ -- build/testxgboost --use-rmm-pool
+ ;;
+
+ mgpu)
+ echo "--- Run Google Tests, using multiple GPUs"
+ python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \
+ -- nvidia-smi
+ python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \
+ --run-args='--shm-size=4g' \
+ -- build/testxgboost --gtest_filter=*MGPU*
+ ;;
+
+ *)
+ echo "Unrecognized arg: ${arg}"
+ exit 2
+ ;;
+esac
diff --git a/ops/pipeline/test-jvm-gpu.sh b/ops/pipeline/test-jvm-gpu.sh
new file mode 100755
index 000000000000..272b55ad0d1a
--- /dev/null
+++ b/ops/pipeline/test-jvm-gpu.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+## Test JVM packages with CUDA. Note: this script assumes that
+## the user has already built libxgboost4j.so with CUDA support
+## and place it in the lib/ directory.
+
+set -euo pipefail
+
+# source ops/pipeline/enforce-ci.sh
+
+SCALA_VERSION=2.12
+
+set -x
+
+python3 ops/docker_run.py --container-id xgb-ci.jvm_gpu_build --use-gpus \
+ -- nvidia-smi
+python3 ops/docker_run.py --container-id xgb-ci.jvm_gpu_build --use-gpus \
+ --run-args "-e SCALA_VERSION=${SCALA_VERSION} -e USE_CUDA=1 -e SKIP_NATIVE_BUILD=1" \
+ -- ops/pipeline/build-test-jvm-packages-impl.sh
diff --git a/ops/pipeline/test-python-impl.sh b/ops/pipeline/test-python-impl.sh
new file mode 100755
index 000000000000..be1cb410c96c
--- /dev/null
+++ b/ops/pipeline/test-python-impl.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+set -eo pipefail
+
+if [[ "$#" -lt 1 ]]
+then
+ echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64}"
+ exit 1
+fi
+
+suite="$1"
+
+# Cannot set -u before Conda env activation
+case "$suite" in
+ gpu|mgpu)
+ source activate gpu_test
+ ;;
+ cpu)
+ source activate linux_cpu_test
+ ;;
+ cpu-arm64)
+ source activate aarch64_test
+ ;;
+ *)
+ echo "Unrecognized argument: $suite"
+ exit 1
+ ;;
+esac
+
+set -xu
+
+export PYSPARK_DRIVER_PYTHON=$(which python)
+export PYSPARK_PYTHON=$(which python)
+export SPARK_TESTING=1
+
+pip install -v ./python-package/dist/*.whl
+
+case "$suite" in
+ gpu)
+ echo "-- Run Python tests, using a single GPU"
+ python -c 'from cupy.cuda import jitify; jitify._init_module()'
+ pytest -v -s -rxXs --fulltrace --durations=0 -m 'not mgpu' tests/python-gpu
+ ;;
+ mgpu)
+ echo "-- Run Python tests, using multiple GPUs"
+ python -c 'from cupy.cuda import jitify; jitify._init_module()'
+ pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' tests/python-gpu
+ pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \
+ tests/test_distributed/test_gpu_with_dask
+ pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \
+ tests/test_distributed/test_gpu_with_spark
+ pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \
+ tests/test_distributed/test_gpu_federated
+ ;;
+ cpu)
+ echo "-- Run Python tests (CPU)"
+ export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1
+ pytest -v -s -rxXs --fulltrace --durations=0 tests/python
+ pytest -v -s -rxXs --fulltrace --durations=0 tests/test_distributed/test_with_dask
+ pytest -v -s -rxXs --fulltrace --durations=0 tests/test_distributed/test_with_spark
+ pytest -v -s -rxXs --fulltrace --durations=0 tests/test_distributed/test_federated
+ ;;
+ cpu-arm64)
+ echo "-- Run Python tests (CPU, ARM64)"
+ pytest -v -s -rxXs --fulltrace --durations=0 \
+ tests/python/test_basic.py tests/python/test_basic_models.py \
+ tests/python/test_model_compatibility.py
+ ;;
+ *)
+ echo "Unrecognized argument: $suite"
+ exit 1
+ ;;
+esac
diff --git a/ops/pipeline/test-python.sh b/ops/pipeline/test-python.sh
new file mode 100755
index 000000000000..507deb37d9c0
--- /dev/null
+++ b/ops/pipeline/test-python.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+if [[ "$#" -lt 2 ]]
+then
+ echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64} {container_id}"
+ exit 1
+fi
+
+suite="$1"
+container_id="$2"
+
+if [[ "$suite" == "gpu" || "$suite" == "mgpu" ]]
+then
+ gpu_option="--use-gpus"
+else
+ gpu_option=""
+fi
+
+python3 ops/docker_run.py --container-id "${container_id}" ${gpu_option} \
+ --run-args='--shm-size=4g' \
+ -- bash ops/pipeline/test-python-impl.sh "${suite}"
diff --git a/ops/pipeline/test-win64-gpu.ps1 b/ops/pipeline/test-win64-gpu.ps1
new file mode 100644
index 000000000000..2416d53b3f85
--- /dev/null
+++ b/ops/pipeline/test-win64-gpu.ps1
@@ -0,0 +1,28 @@
+$ErrorActionPreference = "Stop"
+
+. ops/pipeline/enforce-ci.ps1
+
+Write-Host "--- Test XGBoost on Windows with CUDA"
+
+nvcc --version
+
+Write-Host "--- Run Google Tests"
+build/testxgboost.exe
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+Write-Host "--- Set up Python env"
+conda activate
+$env_name = -join("win64_", (New-Guid).ToString().replace("-", ""))
+mamba env create -n ${env_name} --file=ops/conda_env/win64_test.yml
+conda activate ${env_name}
+python -m pip install `
+ (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName)
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+
+Write-Host "--- Run Python tests"
+python -X faulthandler -m pytest -v -s -rxXs --fulltrace tests/python
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
+Write-Host "--- Run Python tests with GPU"
+python -X faulthandler -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)"`
+ tests/python-gpu
+if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
diff --git a/tests/ci_build/build_via_cmake.sh b/ops/script/build_via_cmake.sh
similarity index 56%
rename from tests/ci_build/build_via_cmake.sh
rename to ops/script/build_via_cmake.sh
index 3238c41e1bcb..86e3677f4392 100755
--- a/tests/ci_build/build_via_cmake.sh
+++ b/ops/script/build_via_cmake.sh
@@ -1,9 +1,17 @@
-#!/usr/bin/env bash
-set -e
+#!/bin/bash
-if [[ "$1" == --conda-env=* ]]
+set -euo pipefail
+
+if [[ "$#" -lt 1 ]]
+then
+ conda_env=""
+else
+ conda_env="$1"
+fi
+
+if [[ "${conda_env}" == --conda-env=* ]]
then
- conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -)
+ conda_env=$(echo "${conda_env}" | sed 's/^--conda-env=//g' -)
echo "Activating Conda environment ${conda_env}"
shift 1
cmake_args="$@"
@@ -26,7 +34,17 @@ mkdir build
cd build
# Disable CMAKE_COMPILE_WARNING_AS_ERROR option temporarily until
# https://github.com/dmlc/xgboost/issues/10400 is fixed
-cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_ALL_WARNINGS=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF -GNinja ${cmake_prefix_flag} -DHIDE_CXX_SYMBOLS=ON -DBUILD_DEPRECATED_CLI=ON
+set -x
+cmake .. ${cmake_args} \
+ -DGOOGLE_TEST=ON \
+ -DUSE_DMLC_GTEST=ON \
+ -DENABLE_ALL_WARNINGS=ON \
+ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \
+ -GNinja \
+ ${cmake_prefix_flag} \
+ -DHIDE_CXX_SYMBOLS=ON \
+ -DBUILD_DEPRECATED_CLI=ON
ninja clean
time ninja -v
cd ..
+set +x
diff --git a/dev/change_scala_version.py b/ops/script/change_scala_version.py
similarity index 93%
rename from dev/change_scala_version.py
rename to ops/script/change_scala_version.py
index c8a9b54ccf91..ed475a1f9582 100644
--- a/dev/change_scala_version.py
+++ b/ops/script/change_scala_version.py
@@ -4,7 +4,7 @@
import shutil
-def main(args):
+def main(args: argparse.Namespace) -> None:
if args.scala_version == "2.12":
scala_ver = "2.12"
scala_patchver = "2.12.18"
@@ -20,6 +20,9 @@ def main(args):
if target.is_dir():
print(f"Removing {target}...")
shutil.rmtree(target)
+ for target in pathlib.Path("jvm-packages/").glob("**/*.so"):
+ print(f"Removing {target}...")
+ target.unlink()
# Update pom.xml
for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
diff --git a/tests/ci_build/change_version.py b/ops/script/change_version.py
similarity index 100%
rename from tests/ci_build/change_version.py
rename to ops/script/change_version.py
diff --git a/tests/ci_build/format_wheel_meta.py b/ops/script/format_wheel_meta.py
similarity index 92%
rename from tests/ci_build/format_wheel_meta.py
rename to ops/script/format_wheel_meta.py
index 9e7bad907687..a7def879905e 100644
--- a/tests/ci_build/format_wheel_meta.py
+++ b/ops/script/format_wheel_meta.py
@@ -2,18 +2,19 @@
Script to generate meta.json to store metadata for a nightly build of
XGBoost Python package.
"""
+
+import argparse
import json
import pathlib
-from argparse import ArgumentParser
-def main(args):
+def main(args: argparse.Namespace) -> None:
wheel_path = pathlib.Path(args.wheel_path).expanduser().resolve()
if not wheel_path.exists():
raise ValueError(f"Wheel cannot be found at path {wheel_path}")
if not wheel_path.is_file():
raise ValueError(f"Path {wheel_path} is not a valid file")
- wheel_dir, wheel_name = wheel_path.parent, wheel_path.name
+ wheel_name = wheel_path.name
meta_path = pathlib.Path(args.meta_path)
if not meta_path.exists():
@@ -36,7 +37,7 @@ def main(args):
if __name__ == "__main__":
- parser = ArgumentParser(
+ parser = argparse.ArgumentParser(
description="Format meta.json encoding the latest nightly version of the Python wheel"
)
parser.add_argument(
diff --git a/tests/ci_build/lint_cmake.sh b/ops/script/lint_cmake.sh
old mode 100644
new mode 100755
similarity index 94%
rename from tests/ci_build/lint_cmake.sh
rename to ops/script/lint_cmake.sh
index d67ecd0844ed..55aeb20e8fb2
--- a/tests/ci_build/lint_cmake.sh
+++ b/ops/script/lint_cmake.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-set -e
+set -euo pipefail
cmake_files=$(
find . -name CMakeLists.txt -o -path "./cmake/*.cmake" \
diff --git a/tests/ci_build/lint_cpp.py b/ops/script/lint_cpp.py
similarity index 86%
rename from tests/ci_build/lint_cpp.py
rename to ops/script/lint_cpp.py
index d4775d6b6b3e..2d00b219ceab 100644
--- a/tests/ci_build/lint_cpp.py
+++ b/ops/script/lint_cpp.py
@@ -2,6 +2,7 @@
import os
import re
import sys
+from typing import TextIO
import cpplint
from cpplint import _cpplint_state
@@ -9,7 +10,7 @@
CXX_SUFFIX = set(["cc", "c", "cpp", "h", "cu", "hpp"])
-def filepath_enumerate(paths):
+def filepath_enumerate(paths: list[str]) -> list[str]:
"""Enumerate the file paths of all subfiles of the list of paths"""
out = []
for path in paths:
@@ -22,7 +23,7 @@ def filepath_enumerate(paths):
return out
-def get_header_guard_dmlc(filename):
+def get_header_guard_dmlc(filename: str) -> str:
"""Get Header Guard Convention for DMLC Projects.
For headers in include, directly use the path
@@ -54,11 +55,10 @@ def get_header_guard_dmlc(filename):
class Lint:
- def __init__(self):
+ def __init__(self) -> None:
self.project_name = "xgboost"
- self.cpp_header_map = {}
- self.cpp_src_map = {}
- self.python_map = {}
+ self.cpp_header_map: dict[str, dict[str, int]] = {}
+ self.cpp_src_map: dict[str, dict[str, int]] = {}
self.pylint_cats = set(["error", "warning", "convention", "refactor"])
# setup cpp lint
@@ -78,7 +78,7 @@ def __init__(self):
cpplint._SetCountingStyle("toplevel")
cpplint._line_length = 100
- def process_cpp(self, path, suffix):
+ def process_cpp(self, path: str, suffix: str) -> None:
"""Process a cpp file."""
_cpplint_state.ResetErrorCounts()
cpplint.ProcessFile(str(path), _cpplint_state.verbose_level)
@@ -91,7 +91,9 @@ def process_cpp(self, path, suffix):
self.cpp_src_map[str(path)] = errors
@staticmethod
- def _print_summary_map(strm, result_map, ftype):
+ def _print_summary_map(
+ strm: TextIO, result_map: dict[str, dict[str, int]], ftype: str
+ ) -> int:
"""Print summary of certain result map."""
if len(result_map) == 0:
return 0
@@ -105,7 +107,7 @@ def _print_summary_map(strm, result_map, ftype):
)
return len(result_map) - npass
- def print_summary(self, strm):
+ def print_summary(self, strm: TextIO) -> int:
"""Print summary of lint."""
nerr = 0
nerr += Lint._print_summary_map(strm, self.cpp_header_map, "cpp-header")
@@ -122,7 +124,7 @@ def print_summary(self, strm):
cpplint.GetHeaderGuardCPPVariable = get_header_guard_dmlc
-def process(fname, allow_type):
+def process(fname: str, allow_type: list[str]) -> None:
"""Process a file."""
fname = str(fname)
arr = fname.rsplit(".", 1)
@@ -132,13 +134,19 @@ def process(fname, allow_type):
_HELPER.process_cpp(fname, arr[-1])
-def main():
+def main() -> None:
parser = argparse.ArgumentParser(description="run cpp lint")
parser.add_argument(
"path",
nargs="*",
help="Path to traverse",
- default=["src", "include", os.path.join("R-package", "src"), "python-package", "plugin/sycl"],
+ default=[
+ "src",
+ "include",
+ os.path.join("R-package", "src"),
+ "python-package",
+ "plugin/sycl",
+ ],
)
parser.add_argument(
"--exclude_path",
@@ -149,7 +157,7 @@ def main():
args = parser.parse_args()
excluded_paths = filepath_enumerate(args.exclude_path)
- allow_type = []
+ allow_type: list[str] = []
allow_type += CXX_SUFFIX
for path in args.path:
diff --git a/tests/ci_build/lint_python.py b/ops/script/lint_python.py
similarity index 95%
rename from tests/ci_build/lint_python.py
rename to ops/script/lint_python.py
index e97b13f2c465..67343cc430ac 100644
--- a/tests/ci_build/lint_python.py
+++ b/ops/script/lint_python.py
@@ -16,8 +16,6 @@ class LintersPaths:
BLACK = (
# core
"python-package/",
- # CI
- "tests/ci_build/tidy.py",
# tests
"tests/python/test_config.py",
"tests/python/test_callback.py",
@@ -70,10 +68,7 @@ class LintersPaths:
"demo/guide-python/update_process.py",
"demo/aft_survival/aft_survival_viz_demo.py",
# CI
- "tests/ci_build/lint_python.py",
- "tests/ci_build/test_r_package.py",
- "tests/ci_build/test_utils.py",
- "tests/ci_build/change_version.py",
+ "ops/",
)
ISORT = (
@@ -83,12 +78,13 @@ class LintersPaths:
"tests/test_distributed/",
"tests/python/",
"tests/python-gpu/",
- "tests/ci_build/",
# demo
"demo/",
# misc
"dev/",
"doc/",
+ # CI
+ "ops/",
)
MYPY = (
@@ -130,11 +126,7 @@ class LintersPaths:
"demo/guide-python/learning_to_rank.py",
"demo/aft_survival/aft_survival_viz_demo.py",
# CI
- "tests/ci_build/tidy.py",
- "tests/ci_build/lint_python.py",
- "tests/ci_build/test_r_package.py",
- "tests/ci_build/test_utils.py",
- "tests/ci_build/change_version.py",
+ "ops/",
)
diff --git a/tests/ci_build/lint_r.R b/ops/script/lint_r.R
similarity index 100%
rename from tests/ci_build/lint_r.R
rename to ops/script/lint_r.R
diff --git a/tests/ci_build/rename_whl.py b/ops/script/rename_whl.py
similarity index 95%
rename from tests/ci_build/rename_whl.py
rename to ops/script/rename_whl.py
index 500196190b3d..d4467720c738 100644
--- a/tests/ci_build/rename_whl.py
+++ b/ops/script/rename_whl.py
@@ -1,8 +1,8 @@
+import argparse
import pathlib
-from argparse import ArgumentParser
-def main(args):
+def main(args: argparse.Namespace) -> None:
wheel_path = pathlib.Path(args.wheel_path).expanduser().resolve()
if not wheel_path.exists():
raise ValueError(f"Wheel cannot be found at path {wheel_path}")
@@ -43,7 +43,7 @@ def main(args):
if __name__ == "__main__":
- parser = ArgumentParser(
+ parser = argparse.ArgumentParser(
description="Format a Python wheel's name using the git commit hash and platform tag"
)
parser.add_argument(
diff --git a/tests/ci_build/tidy.py b/ops/script/run_clang_tidy.py
similarity index 97%
rename from tests/ci_build/tidy.py
rename to ops/script/run_clang_tidy.py
index 13bbedc0b4b5..dca5d1069598 100755
--- a/tests/ci_build/tidy.py
+++ b/ops/script/run_clang_tidy.py
@@ -19,7 +19,9 @@ def call(args: list[str]) -> tuple[int, int, str, list[str]]:
# `workspace` is a name used in the CI container. Normally we should keep the dir
# as `xgboost`.
matched = re.search(
- "(workspace|xgboost)/.*(src|tests|include)/.*warning:", error_msg, re.MULTILINE
+ "(workspace|xgboost)/.*(ops|src|tests|include)/.*warning:",
+ error_msg,
+ re.MULTILINE,
)
if matched is None:
@@ -265,7 +267,7 @@ def test_tidy(args: argparse.Namespace) -> None:
"""
root_path = os.path.abspath(os.path.curdir)
tidy_file = os.path.join(root_path, ".clang-tidy")
- test_file_path = os.path.join(root_path, "tests", "ci_build", "test_tidy.cc")
+ test_file_path = os.path.join(root_path, "ops", "script", "test_tidy.cc")
tidy_config = "--config-file=" + tidy_file
if not args.tidy_version:
@@ -274,8 +276,8 @@ def test_tidy(args: argparse.Namespace) -> None:
tidy = "clang-tidy-" + str(args.tidy_version)
cmd = [tidy, tidy_config, test_file_path]
(proc_code, tidy_status, error_msg, _) = call(cmd)
- assert proc_code == 0
- assert tidy_status == 1
+ if proc_code != 0 or tidy_status != 1:
+ raise RuntimeError(error_msg)
print("clang-tidy is working.")
diff --git a/tests/ci_build/test_r_package.py b/ops/script/test_r_package.py
similarity index 99%
rename from tests/ci_build/test_r_package.py
rename to ops/script/test_r_package.py
index 5ca7fa69b21a..3ce886c1bc41 100644
--- a/tests/ci_build/test_r_package.py
+++ b/ops/script/test_r_package.py
@@ -42,7 +42,7 @@ def pkgroot(path: str) -> None:
else:
would_remove = output.stdout.decode("utf-8").strip().split("\n")
- if would_remove and not all(f.find("tests/ci_build") != -1 for f in would_remove):
+ if would_remove and not all(f.find("ops") != -1 for f in would_remove):
raise ValueError(
"\n".join(would_remove) + "\nPlease cleanup the working git repository."
)
diff --git a/tests/ci_build/test_tidy.cc b/ops/script/test_tidy.cc
similarity index 100%
rename from tests/ci_build/test_tidy.cc
rename to ops/script/test_tidy.cc
diff --git a/tests/ci_build/test_utils.py b/ops/script/test_utils.py
similarity index 100%
rename from tests/ci_build/test_utils.py
rename to ops/script/test_utils.py
diff --git a/tests/buildkite/update-rapids.sh b/ops/script/update_rapids.sh
similarity index 50%
rename from tests/buildkite/update-rapids.sh
rename to ops/script/update_rapids.sh
index f6a2675bdfa9..d7958ce70d86 100755
--- a/tests/buildkite/update-rapids.sh
+++ b/ops/script/update_rapids.sh
@@ -7,7 +7,10 @@ echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION"
DEV_RAPIDS_VERSION=$(date +%Y-%m-%d -d "20${LATEST_RAPIDS_VERSION//./-}-01 + 2 month" | cut -c3-7 | tr - .)
echo "DEV_RAPIDS_VERSION = $DEV_RAPIDS_VERSION"
-PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
+OPS_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")/.." ; pwd -P )
+CONTAINER_YAML="$OPS_PATH/docker/ci_container.yml"
-sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
-sed -i "s/^DEV_RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/DEV_RAPIDS_VERSION=${DEV_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
+sed -i "s/\&rapids_version \"[[:digit:]]\+\.[[:digit:]]\+\"/\&rapids_version \"${LATEST_RAPIDS_VERSION}\"/" \
+ "$CONTAINER_YAML"
+sed -i "s/\&dev_rapids_version \"[[:digit:]]\+\.[[:digit:]]\+\"/\&dev_rapids_version \"${DEV_RAPIDS_VERSION}\"/" \
+ "$CONTAINER_YAML"
diff --git a/tests/ci_build/verify_link.sh b/ops/script/verify_link.sh
similarity index 100%
rename from tests/ci_build/verify_link.sh
rename to ops/script/verify_link.sh
diff --git a/ops/stash_artifacts.ps1 b/ops/stash_artifacts.ps1
new file mode 100644
index 000000000000..57a58d884226
--- /dev/null
+++ b/ops/stash_artifacts.ps1
@@ -0,0 +1,47 @@
+[CmdletBinding()]
+Param(
+ [Parameter(
+ Mandatory=$true,
+ Position=0,
+ ValueFromRemainingArguments=$true
+ )][string[]]$artifacts
+)
+
+## Convenience wrapper for ops/stash_artifacts.py
+## Meant to be used inside GitHub Actions
+
+$ENV_VAR_DOC = @'
+Inputs
+ - COMMAND: Either "upload" or "download"
+ - KEY: Unique string to identify a group of artifacts
+'@
+
+$ErrorActionPreference = "Stop"
+
+. ops/pipeline/enforce-ci.ps1
+
+foreach ($env in "COMMAND", "KEY", "GITHUB_REPOSITORY", "GITHUB_RUN_ID",
+ "RUNS_ON_S3_BUCKET_CACHE") {
+ $val = [Environment]::GetEnvironmentVariable($env)
+ if ($val -eq $null) {
+ Write-Host "Error: $env must be set.`n${ENV_VAR_DOC}"
+ exit 1
+ }
+}
+
+$artifact_stash_prefix = "cache/${Env:GITHUB_REPOSITORY}/stash/${Env:GITHUB_RUN_ID}"
+
+conda activate
+
+Write-Host @"
+python ops/stash_artifacts.py `
+ --command "${Env:COMMAND}" `
+ --s3-bucket "${Env:RUNS_ON_S3_BUCKET_CACHE}" `
+ --prefix "${artifact_stash_prefix}/${Env:KEY}" `
+ -- $artifacts
+"@
+python ops/stash_artifacts.py `
+ --command "${Env:COMMAND}" `
+ --s3-bucket "${Env:RUNS_ON_S3_BUCKET_CACHE}" `
+ --prefix "${artifact_stash_prefix}/${Env:KEY}" `
+ -- $artifacts
diff --git a/ops/stash_artifacts.py b/ops/stash_artifacts.py
new file mode 100644
index 000000000000..827e448ac49e
--- /dev/null
+++ b/ops/stash_artifacts.py
@@ -0,0 +1,144 @@
+"""
+Stash an artifact in an S3 bucket for later use
+
+Note. This script takes in all inputs via environment variables
+ except the path to the artifact(s).
+"""
+
+import argparse
+import os
+import subprocess
+from pathlib import Path
+from urllib.parse import SplitResult, urlsplit, urlunsplit
+
+
+def resolve(x: Path) -> Path:
+ return x.expanduser().resolve()
+
+
+def path_equals(a: Path, b: Path) -> bool:
+ return resolve(a) == resolve(b)
+
+
+def compute_s3_url(s3_bucket: str, prefix: str, artifact: Path) -> str:
+ filename = artifact.name
+ relative_path = resolve(artifact).relative_to(Path.cwd())
+ if resolve(artifact.parent) == resolve(Path.cwd()):
+ full_prefix = prefix
+ else:
+ full_prefix = f"{prefix}/{str(relative_path.parent)}"
+ return f"s3://{s3_bucket}/{full_prefix}/{filename}"
+
+
+def aws_s3_upload(src: Path, dest: str) -> None:
+ cli_args = ["aws", "s3", "cp", "--no-progress", str(src), dest]
+ print(" ".join(cli_args))
+ subprocess.run(
+ cli_args,
+ check=True,
+ encoding="utf-8",
+ )
+
+
+def aws_s3_download(src: str, dest: Path) -> None:
+ cli_args = ["aws", "s3", "cp", "--no-progress", src, str(dest)]
+ print(" ".join(cli_args))
+ subprocess.run(
+ cli_args,
+ check=True,
+ encoding="utf-8",
+ )
+
+
+def aws_s3_download_with_wildcard(src: str, dest: Path) -> None:
+ parsed_src = urlsplit(src)
+ src_dir = urlunsplit(
+ SplitResult(
+ scheme="s3",
+ netloc=parsed_src.netloc,
+ path=os.path.dirname(parsed_src.path),
+ query="",
+ fragment="",
+ )
+ )
+ dest_dir = dest.parent
+ src_glob = os.path.basename(parsed_src.path)
+ cli_args = [
+ "aws",
+ "s3",
+ "cp",
+ "--recursive",
+ "--no-progress",
+ "--exclude",
+ "'*'",
+ "--include",
+ src_glob,
+ src_dir,
+ str(dest_dir),
+ ]
+ print(" ".join(cli_args))
+ subprocess.run(
+ cli_args,
+ check=True,
+ encoding="utf-8",
+ )
+
+
+def upload(args: argparse.Namespace) -> None:
+ print(f"Uploading artifacts with prefix {args.prefix}...")
+ for artifact in args.artifacts:
+ artifact_path = Path(artifact)
+ s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path)
+ aws_s3_upload(artifact_path, s3_url)
+
+
+def download(args: argparse.Namespace) -> None:
+ print(f"Downloading artifacts with prefix {args.prefix}...")
+ for artifact in args.artifacts:
+ artifact_path = Path(artifact)
+ print(f"mkdir -p {str(artifact_path.parent)}")
+ artifact_path.parent.mkdir(parents=True, exist_ok=True)
+ s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path)
+ if "*" in artifact:
+ aws_s3_download_with_wildcard(s3_url, artifact_path)
+ else:
+ aws_s3_download(s3_url, artifact_path)
+
+
+if __name__ == "__main__":
+ # Ensure that the current working directory is the project root
+ if not (Path.cwd() / "ops").is_dir() or not path_equals(
+ Path(__file__).parent, Path.cwd() / "ops"
+ ):
+ x = Path(__file__).name
+ raise RuntimeError(f"Script {x} must be run at the project's root directory")
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--command",
+ type=str,
+ choices=["upload", "download"],
+ required=True,
+ help="Whether to upload or download the artifact (upload/download)",
+ )
+ parser.add_argument(
+ "--s3-bucket",
+ type=str,
+ required=True,
+ help="Name of the S3 bucket to store the artifact",
+ )
+ parser.add_argument(
+ "--prefix",
+ type=str,
+ required=True,
+ help=(
+ "Where the artifact would be stored. The artifact will be stored in "
+ "s3://[s3-bucket]/[prefix]."
+ ),
+ )
+ parser.add_argument("artifacts", type=str, nargs="+", metavar="artifact")
+ parsed_args = parser.parse_args()
+ if parsed_args.command == "upload":
+ upload(parsed_args)
+ elif parsed_args.command == "download":
+ download(parsed_args)
diff --git a/ops/stash_artifacts.sh b/ops/stash_artifacts.sh
new file mode 100755
index 000000000000..c2a16f42a26c
--- /dev/null
+++ b/ops/stash_artifacts.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+## Convenience wrapper for ops/stash_artifacts.py
+## Meant to be used inside GitHub Actions
+
+ENV_VAR_DOC=$(
+cat <<-EOF
+Inputs
+ - COMMAND: Either "upload" or "download"
+ - KEY: Unique string to identify a group of artifacts
+EOF
+)
+
+set -euo pipefail
+
+source ops/pipeline/enforce-ci.sh
+
+if [ "$#" -lt 1 ]; then
+ echo "Usage: $0 [artifact] [artifact ...]"
+ exit 1
+fi
+
+for arg in "COMMAND" "KEY" "GITHUB_REPOSITORY" "GITHUB_RUN_ID" "RUNS_ON_S3_BUCKET_CACHE"
+do
+ if [[ -z "${!arg:-}" ]]
+ then
+ echo -e "Error: $arg must be set.\n${ENV_VAR_DOC}"
+ exit 1
+ fi
+done
+
+artifact_stash_prefix="cache/${GITHUB_REPOSITORY}/stash/${GITHUB_RUN_ID}"
+
+set -x
+python3 ops/stash_artifacts.py \
+ --command "${COMMAND}" \
+ --s3-bucket "${RUNS_ON_S3_BUCKET_CACHE}" \
+ --prefix "${artifact_stash_prefix}/${KEY}" \
+ -- "$@"
diff --git a/tests/buildkite/build-containers.sh b/tests/buildkite/build-containers.sh
deleted file mode 100755
index aa8f572483a3..000000000000
--- a/tests/buildkite/build-containers.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-set -x
-
-if [ "$#" -lt 1 ]
-then
- echo "Usage: $0 [container to build]"
- exit 1
-fi
-container=$1
-
-source tests/buildkite/conftest.sh
-
-echo "--- Build container ${container}"
-
-BUILD_ARGS=""
-
-case "${container}" in
- cpu)
- ;;
-
- gpu|gpu_build_rockylinux8)
- BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
- BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
- BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
- ;;
-
- gpu_dev_ver)
- BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
- BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
- BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$DEV_RAPIDS_VERSION"
- ;;
-
- jvm_gpu_build)
- BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
- BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
- ;;
-
- *)
- echo "Unrecognized container ID: ${container}"
- exit 2
- ;;
-esac
-
-# Run a no-op command. This will simply build the container and push it to the private registry
-tests/ci_build/ci_build.sh ${container} ${BUILD_ARGS} bash
diff --git a/tests/buildkite/build-cpu.sh b/tests/buildkite/build-cpu.sh
deleted file mode 100755
index 11679d644de1..000000000000
--- a/tests/buildkite/build-cpu.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-echo "--- Build CPU code"
-
-source tests/buildkite/conftest.sh
-
-command_wrapper="tests/ci_build/ci_build.sh cpu"
-
-$command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
- # This step is not necessary, but here we include it, to ensure that
- # DMLC_CORE_USE_CMAKE flag is correctly propagated. We want to make sure that we use
- # the configured header build/dmlc/build_config.h instead of
- # include/dmlc/build_config_default.h.
-echo "--- Build libxgboost from the source"
-$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \
- -DPLUGIN_FEDERATED=ON
-echo "--- Run Google Test"
-$command_wrapper bash -c "cd build && ctest --extra-verbose"
-echo "--- Stash XGBoost CLI executable"
-buildkite-agent artifact upload ./xgboost
-
-# Sanitizer test
-echo "--- Run Google Test with sanitizer enabled"
-$command_wrapper tests/ci_build/build_via_cmake.sh -DUSE_SANITIZER=ON \
- -DENABLED_SANITIZERS="address;leak;undefined" -DCMAKE_BUILD_TYPE=Debug \
- -DSANITIZER_PATH=/usr/lib/x86_64-linux-gnu/
-CI_DOCKER_EXTRA_PARAMS_INIT="-e ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer "`
- `"-e ASAN_OPTIONS=symbolize=1 "`
- `"-e UBSAN_OPTIONS=print_stacktrace=1:log_path=ubsan_error.log "`
- `"--cap-add SYS_PTRACE" \
- $command_wrapper bash -c "cd build && ctest --exclude-regex AllTestsInDMLCUnitTests "`
- `"--extra-verbose"
diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh
deleted file mode 100755
index 189c67cba449..000000000000
--- a/tests/buildkite/build-cuda-with-rmm.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-if [ "$#" -lt 1 ]
-then
- mode=stable
- exit 1
-else
- mode=$1
-fi
-
-WHEEL_TAG=manylinux_2_28_x86_64
-
-source tests/buildkite/conftest.sh
-
-
-case "${mode}" in
- stable)
- container_tag='gpu_build_rockylinux8'
- rapids_version=$RAPIDS_VERSION
- ;;
-
- dev)
- container_tag='gpu_dev_ver'
- rapids_version=$DEV_RAPIDS_VERSION
- ;;
-
- *)
- echo "Unrecognized mode ID: ${mode}"
- exit 2
- ;;
-esac
-
-echo "--- Build with CUDA ${CUDA_VERSION} with RMM"
-
-if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
-then
- arch_flag="-DGPU_COMPUTE_VER=75"
-else
- arch_flag=""
-fi
-
-command_wrapper="tests/ci_build/ci_build.sh $container_tag --build-arg "`
- `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
- `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
- `"RAPIDS_VERSION_ARG=$rapids_version"
-
-echo "--- Build libxgboost from the source"
-$command_wrapper tests/ci_build/build_via_cmake.sh \
- -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake" \
- -DUSE_CUDA=ON \
- -DUSE_OPENMP=ON \
- -DHIDE_CXX_SYMBOLS=ON \
- -DPLUGIN_FEDERATED=ON \
- -DPLUGIN_RMM=ON \
- -DUSE_NCCL=ON \
- -DUSE_NCCL_LIB_PATH=ON \
- -DNCCL_INCLUDE_DIR=/usr/include \
- -DUSE_DLOPEN_NCCL=ON \
- ${arch_flag}
-echo "--- Build binary wheel"
-$command_wrapper bash -c \
- "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
-$command_wrapper python tests/ci_build/rename_whl.py \
- --wheel-path python-package/dist/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
- --platform-tag ${WHEEL_TAG}
-
-echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
-tests/ci_build/ci_build.sh manylinux_2_28_x86_64 auditwheel repair \
- --plat ${WHEEL_TAG} python-package/dist/*.whl
-$command_wrapper python tests/ci_build/rename_whl.py \
- --wheel-path wheelhouse/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
- --platform-tag ${WHEEL_TAG}
-mv -v wheelhouse/*.whl python-package/dist/
-# Make sure that libgomp.so is vendored in the wheel
-tests/ci_build/ci_build.sh manylinux_2_28_x86_64 bash -c \
- "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
-
-echo "--- Upload Python wheel"
-buildkite-agent artifact upload python-package/dist/*.whl
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
- aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/experimental_build_with_rmm/ \
- --acl public-read --no-progress
-fi
-
-echo "-- Stash C++ test executable (testxgboost)"
-buildkite-agent artifact upload build/testxgboost
diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh
deleted file mode 100755
index 03d2cc8a6a24..000000000000
--- a/tests/buildkite/build-cuda.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-WHEEL_TAG=manylinux_2_28_x86_64
-
-source tests/buildkite/conftest.sh
-
-echo "--- Build with CUDA ${CUDA_VERSION}"
-
-if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
-then
- arch_flag="-DGPU_COMPUTE_VER=75"
-else
- arch_flag=""
-fi
-
-command_wrapper="tests/ci_build/ci_build.sh gpu_build_rockylinux8 --build-arg "`
- `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
- `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
- `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
-
-echo "--- Build libxgboost from the source"
-$command_wrapper tests/ci_build/build_via_cmake.sh \
- -DCMAKE_PREFIX_PATH="/opt/grpc" \
- -DUSE_CUDA=ON \
- -DUSE_OPENMP=ON \
- -DHIDE_CXX_SYMBOLS=ON \
- -DPLUGIN_FEDERATED=ON \
- -DUSE_NCCL=ON \
- -DUSE_NCCL_LIB_PATH=ON \
- -DNCCL_INCLUDE_DIR=/usr/include \
- -DUSE_DLOPEN_NCCL=ON \
- ${arch_flag}
-echo "--- Build binary wheel"
-$command_wrapper bash -c \
- "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
-$command_wrapper python tests/ci_build/rename_whl.py \
- --wheel-path python-package/dist/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
- --platform-tag ${WHEEL_TAG}
-
-echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
-tests/ci_build/ci_build.sh manylinux_2_28_x86_64 auditwheel repair \
- --plat ${WHEEL_TAG} python-package/dist/*.whl
-$command_wrapper python tests/ci_build/rename_whl.py \
- --wheel-path wheelhouse/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
- --platform-tag ${WHEEL_TAG}
-mv -v wheelhouse/*.whl python-package/dist/
-# Make sure that libgomp.so is vendored in the wheel
-tests/ci_build/ci_build.sh manylinux_2_28_x86_64 bash -c \
- "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
-
-echo "--- Upload Python wheel"
-buildkite-agent artifact upload python-package/dist/*.whl
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
- aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
- --acl public-read --no-progress
-
- # Generate the meta info which includes xgboost version and the commit info
- $command_wrapper python tests/ci_build/format_wheel_meta.py \
- --wheel-path python-package/dist/*.whl \
- --commit-hash ${BUILDKITE_COMMIT} \
- --platform-tag ${WHEEL_TAG} \
- --meta-path python-package/dist/
- aws s3 cp python-package/dist/meta.json s3://xgboost-nightly-builds/${BRANCH_NAME}/ \
- --acl public-read --no-progress
-fi
-echo "-- Stash C++ test executable (testxgboost)"
-buildkite-agent artifact upload build/testxgboost
diff --git a/tests/buildkite/build-jvm-doc.sh b/tests/buildkite/build-jvm-doc.sh
deleted file mode 100755
index d168eb8cc58d..000000000000
--- a/tests/buildkite/build-jvm-doc.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-echo "--- Build JVM packages doc"
-tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
- echo "--- Upload JVM packages doc"
- aws s3 cp jvm-packages/${BRANCH_NAME}.tar.bz2 \
- s3://xgboost-docs/${BRANCH_NAME}.tar.bz2 --acl public-read --no-progress
-fi
diff --git a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh b/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
deleted file mode 100644
index 46a819a016d3..000000000000
--- a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-command_wrapper="tests/ci_build/ci_build.sh jvm_manylinux2014_x86_64"
-
-# Build XGBoost4J binary
-echo "--- Build libxgboost4j.so (targeting glibc 2.17)"
-set -x
-mkdir build
-$command_wrapper bash -c \
- "cd build && cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && ninja -v"
-ldd lib/libxgboost4j.so
-objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu
-
-echo "--- Upload libxgboost4j.so"
-pushd lib
-libname=libxgboost4j_linux_x86_64_${BUILDKITE_COMMIT}.so
-mv -v libxgboost4j.so ${libname}
-buildkite-agent artifact upload ${libname}
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
- aws s3 cp ${libname} \
- s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
- --acl public-read --no-progress
-fi
-popd
diff --git a/tests/buildkite/build-jvm-packages-gpu.sh b/tests/buildkite/build-jvm-packages-gpu.sh
deleted file mode 100755
index 76ffafbcfdd7..000000000000
--- a/tests/buildkite/build-jvm-packages-gpu.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-echo "--- Build and test XGBoost JVM packages with CUDA"
-
-if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
-then
- arch_flag="-DGPU_COMPUTE_VER=75"
-else
- arch_flag=""
-fi
-
-tests/ci_build/ci_build.sh jvm_gpu_build --use-gpus \
- --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
- --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
- tests/ci_build/build_jvm_packages.sh \
- ${SPARK_VERSION} -Duse.cuda=ON ${arch_flag}
diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh
deleted file mode 100755
index da4d1e9d8c8a..000000000000
--- a/tests/buildkite/build-jvm-packages.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-echo "--- Build and test XGBoost JVM packages with Scala 2.12"
-tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
- ${SPARK_VERSION}
-
-echo "--- Build and test XGBoost JVM packages with Scala 2.13"
-
-tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
- ${SPARK_VERSION} "" "" "true"
diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1
deleted file mode 100644
index 9114d3237751..000000000000
--- a/tests/buildkite/build-win64-gpu.ps1
+++ /dev/null
@@ -1,55 +0,0 @@
-$ErrorActionPreference = "Stop"
-
-. tests/buildkite/conftest.ps1
-
-Write-Host "--- Build libxgboost on Windows with CUDA"
-
-nvcc --version
-if ( $is_release_branch -eq 0 ) {
- $arch_flag = "-DGPU_COMPUTE_VER=75"
-} else {
- $arch_flag = ""
-}
-mkdir build
-cd build
-cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON `
- -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON ${arch_flag}
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-cmake --build . --config Release -- /m /nodeReuse:false `
- "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
-Write-Host "--- Build binary wheel"
-cd ../python-package
-conda activate
-& pip install --user -v "pip>=23"
-& pip --version
-& pip wheel --no-deps -v . --wheel-dir dist/
-Get-ChildItem . -Filter dist/*.whl |
-Foreach-Object {
- & python ../tests/ci_build/rename_whl.py `
- --wheel-path $_.FullName `
- --commit-hash $Env:BUILDKITE_COMMIT `
- --platform-tag win_amd64
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-}
-
-Write-Host "--- Upload Python wheel"
-cd ..
-Get-ChildItem . -Filter python-package/dist/*.whl |
-Foreach-Object {
- & buildkite-agent artifact upload python-package/dist/$_
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-}
-if ( $is_release_branch -eq 1 ) {
- Get-ChildItem . -Filter python-package/dist/*.whl |
- Foreach-Object {
- & aws s3 cp python-package/dist/$_ s3://xgboost-nightly-builds/$Env:BUILDKITE_BRANCH/ `
- --acl public-read --no-progress
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
- }
-}
-
-Write-Host "--- Stash C++ test executables"
-& buildkite-agent artifact upload build/testxgboost.exe
-& buildkite-agent artifact upload xgboost.exe
diff --git a/tests/buildkite/conftest.ps1 b/tests/buildkite/conftest.ps1
deleted file mode 100644
index bd623caf0c03..000000000000
--- a/tests/buildkite/conftest.ps1
+++ /dev/null
@@ -1,13 +0,0 @@
-if ( $Env:BUILDKITE_PULL_REQUEST -and ($Env:BUILDKITE_PULL_REQUEST -ne "false") ) {
- $is_pull_request = 1
-} else {
- $is_pull_request = 0
-}
-
-if ( ($Env:BUILDKITE_BRANCH -eq "master") -or ($Env:BUILDKITE_BRANCH -match "release_.+") ) {
- $is_release_branch = 1
- $enforce_daily_budget = 0
-} else {
- $is_release_branch = 0
- $enforce_daily_budget = 1
-}
diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh
deleted file mode 100755
index 12f4c07ac6c9..000000000000
--- a/tests/buildkite/conftest.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-function get_aws_secret {
- if [[ $# -ne 1 ]]
- then
- echo "Usage: get_aws_secret [Name of secret]"
- return 1
- fi
- aws secretsmanager get-secret-value --secret-id $1 --output text --region us-west-2 --query SecretString
-}
-
-function set_buildkite_env_vars_in_container {
- # Pass all Buildkite-specific env vars to Docker containers.
- # This is to be used with tests/ci_build/ci_build.sh
- export CI_DOCKER_EXTRA_PARAMS_INIT="${CI_DOCKER_EXTRA_PARAMS_INIT:-} "`
- `"--env BUILDKITE_ANALYTICS_TOKEN --env BUILDKITE_BUILD_ID --env BUILDKITE_BUILD_NUMBER "`
- `"--env BUILDKITE_JOB_ID --env BUILDKITE_BRANCH --env BUILDKITE_COMMIT "`
- `"--env BUILDKITE_MESSAGE --env BUILDKITE_BUILD_URL"
-}
-
-set -x
-
-CUDA_VERSION=12.4.1
-NCCL_VERSION=2.23.4-1
-RAPIDS_VERSION=24.10
-DEV_RAPIDS_VERSION=24.12
-SPARK_VERSION=3.5.1
-JDK_VERSION=8
-R_VERSION=4.3.2
-
-if [[ -z ${BUILDKITE:-} ]]
-then
- echo "$0 is not meant to run locally; it should run inside BuildKite."
- echo "Please inspect the content of $0 and locate the desired command manually."
- exit 1
-fi
-
-if [[ -n $BUILDKITE_PULL_REQUEST && $BUILDKITE_PULL_REQUEST != "false" ]]
-then
- is_pull_request=1
- BRANCH_NAME=PR-$BUILDKITE_PULL_REQUEST
-else
- is_pull_request=0
- BRANCH_NAME=$BUILDKITE_BRANCH
-fi
-export BRANCH_NAME=${BRANCH_NAME//\//-}
-
-if [[ $BRANCH_NAME == "master" || $BRANCH_NAME == "release_"* || $BRANCH_NAME == "federated-secure" ]]
-then
- is_release_branch=1
- enforce_daily_budget=0
-else
- is_release_branch=0
- enforce_daily_budget=1
-fi
-
-if [[ -n ${DISABLE_RELEASE:-} ]]
-then
- is_release_branch=0
-fi
-
-set +x
diff --git a/tests/buildkite/deploy-jvm-packages.sh b/tests/buildkite/deploy-jvm-packages.sh
deleted file mode 100755
index 812a6c5cafec..000000000000
--- a/tests/buildkite/deploy-jvm-packages.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
-then
- echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
- tests/ci_build/ci_build.sh jvm_gpu_build \
- --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
- --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
- tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}
-fi
diff --git a/tests/buildkite/enforce_daily_budget.py b/tests/buildkite/enforce_daily_budget.py
deleted file mode 100644
index af1b1ce484b8..000000000000
--- a/tests/buildkite/enforce_daily_budget.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import json
-import argparse
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--response", type=str, required=True)
- args = parser.parse_args()
- with open(args.response, "r") as f:
- payload = f.read()
- response = json.loads(payload)
- if response["approved"]:
- print(f"Testing approved. Reason: {response['reason']}")
- else:
- raise RuntimeError(f"Testing rejected. Reason: {response['reason']}")
diff --git a/tests/buildkite/enforce_daily_budget.sh b/tests/buildkite/enforce_daily_budget.sh
deleted file mode 100755
index 8212f07c1b24..000000000000
--- a/tests/buildkite/enforce_daily_budget.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-echo "--- Enforce daily budget"
-
-source tests/buildkite/conftest.sh
-
-if [[ $enforce_daily_budget == 0 ]]
-then
- echo "Automatically approving all test jobs for trunk branches"
-else
- aws lambda invoke --function-name XGBoostCICostWatcher --invocation-type RequestResponse --region us-west-2 response.json
- python3 tests/buildkite/enforce_daily_budget.py --response response.json
-fi
diff --git a/tests/buildkite/infrastructure/README.md b/tests/buildkite/infrastructure/README.md
deleted file mode 100644
index cc3e552e70ff..000000000000
--- a/tests/buildkite/infrastructure/README.md
+++ /dev/null
@@ -1,106 +0,0 @@
-BuildKite CI Infrastructure
-===========================
-
-# Worker image builder (`worker-image-pipeline/`)
-
-Use EC2 Image Builder to build machine images in a deterministic fashion.
-The machine images are used to initialize workers in the CI/CD pipelines.
-
-## Editing bootstrap scripts
-
-Currently, we create two pipelines for machine images: one for Linux workers and another
-for Windows workers.
-You can edit the bootstrap scripts to change how the worker machines are initialized.
-
-* `linux-amd64-gpu-bootstrap.yml`: Bootstrap script for Linux worker machines
-* `windows-gpu-bootstrap.yml`: Bootstrap script for Windows worker machines
-
-## Creating and running Image Builder pipelines
-
-Run the following commands to create and run pipelines in EC2 Image Builder service:
-```bash
-python worker-image-pipeline/create_worker_image_pipelines.py --aws-region us-west-2
-python worker-image-pipeline/run_pipelines.py --aws-region us-west-2
-```
-Go to the AWS CloudFormation console and verify the existence of two CloudFormation stacks:
-* `buildkite-windows-gpu-worker`
-* `buildkite-linux-amd64-gpu-worker`
-
-Then go to the EC2 Image Builder console to check the status of the image builds. You may
-want to inspect the log output should a build fails.
-Once the new machine images are done building, see the next section to deploy the new
-images to the worker machines.
-
-# Elastic CI Stack for AWS (`aws-stack-creator/`)
-
-Use EC2 Autoscaling groups to launch worker machines in EC2. BuildKite periodically sends
-messages to the Autoscaling groups to increase or decrease the number of workers according
-to the number of outstanding testing jobs.
-
-## Deploy an updated CI stack with new machine images
-
-First, edit `aws-stack-creator/metadata.py` to update the `AMI_ID` fields:
-```python
-AMI_ID = {
- # Managed by XGBoost team
- "linux-amd64-gpu": {
- "us-west-2": "...",
- },
- "linux-amd64-mgpu": {
- "us-west-2": "...",
- },
- "windows-gpu": {
- "us-west-2": "...",
- },
- "windows-cpu": {
- "us-west-2": "...",
- },
- # Managed by BuildKite
- # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
- "linux-amd64-cpu": {
- "us-west-2": "...",
- },
- "pipeline-loader": {
- "us-west-2": "...",
- },
- "linux-arm64-cpu": {
- "us-west-2": "...",
- },
-}
-```
-AMI IDs uniquely identify the machine images in the EC2 service.
-Go to the EC2 Image Builder console to find the AMI IDs for the new machine images
-(see the previous section), and update the following fields:
-
-* `AMI_ID["linux-amd64-gpu"]["us-west-2"]`:
- Use the latest output from the `buildkite-linux-amd64-gpu-worker` pipeline
-* `AMI_ID["linux-amd64-mgpu"]["us-west-2"]`:
- Should be identical to `AMI_ID["linux-amd64-gpu"]["us-west-2"]`
-* `AMI_ID["windows-gpu"]["us-west-2"]`:
- Use the latest output from the `buildkite-windows-gpu-worker` pipeline
-* `AMI_ID["windows-cpu"]["us-west-2"]`:
- Should be identical to `AMI_ID["windows-gpu"]["us-west-2"]`
-
-Next, visit https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
-to look up the AMI IDs for the following fields:
-
-* `AMI_ID["linux-amd64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field
- `Mappings/AWSRegion2AMI/us-west-2/linuxamd64`
-* `AMI_ID["pipeline-loader"]["us-west-2"]`:
- Should be identical to `AMI_ID["linux-amd64-cpu"]["us-west-2"]`
-* `AMI_ID["linux-arm64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field
- `Mappings/AWSRegion2AMI/us-west-2/linuxarm64`
-
-Finally, run the following commands to deploy the new machine images:
-```
-python aws-stack-creator/create_stack.py --aws-region us-west-2 --agent-token AGENT_TOKEN
-```
-Go to the AWS CloudFormation console and verify the existence of the following
-CloudFormation stacks:
-* `buildkite-pipeline-loader-autoscaling-group`
-* `buildkite-linux-amd64-cpu-autoscaling-group`
-* `buildkite-linux-amd64-gpu-autoscaling-group`
-* `buildkite-linux-amd64-mgpu-autoscaling-group`
-* `buildkite-linux-arm64-cpu-autoscaling-group`
-* `buildkite-windows-cpu-autoscaling-group`
-* `buildkite-windows-gpu-autoscaling-group`
diff --git a/tests/buildkite/infrastructure/aws-stack-creator/agent-iam-policy-template.yml b/tests/buildkite/infrastructure/aws-stack-creator/agent-iam-policy-template.yml
deleted file mode 100644
index 7f15b1fbcd4f..000000000000
--- a/tests/buildkite/infrastructure/aws-stack-creator/agent-iam-policy-template.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-AWSTemplateFormatVersion: "2010-09-09"
-Description: "Buildkite agent's IAM policy"
-
-Resources:
- BuildkiteAgentManagedPolicy:
- Type: AWS::IAM::ManagedPolicy
- Properties:
- PolicyDocument:
- {
- "Version": "2012-10-17",
- "Statement": [
- {
- "Effect": "Allow",
- "Action": [
- "s3:*",
- "s3-object-lambda:*"
- ],
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": "lambda:InvokeFunction",
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": "secretsmanager:GetSecretValue",
- "Resource": "*"
- }
- ]
- }
diff --git a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py
deleted file mode 100644
index 8f8db348a073..000000000000
--- a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import argparse
-import copy
-import os
-import re
-import sys
-
-import boto3
-import botocore
-from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS
-
-current_dir = os.path.dirname(__file__)
-sys.path.append(os.path.join(current_dir, ".."))
-
-from common_blocks.utils import create_or_update_stack, wait
-
-TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
-
-
-def get_availability_zones(*, aws_region):
- client = boto3.client("ec2", region_name=aws_region)
- r = client.describe_availability_zones(
- Filters=[
- {"Name": "region-name", "Values": [aws_region]},
- {"Name": "zone-type", "Values": ["availability-zone"]},
- ]
- )
- return sorted([x["ZoneName"] for x in r["AvailabilityZones"]])
-
-
-def get_default_vpc(*, aws_region):
- ec2 = boto3.resource("ec2", region_name=aws_region)
- default_vpc_id = None
- for x in ec2.vpcs.filter(Filters=[{"Name": "is-default", "Values": ["true"]}]):
- return x
-
- # Create default VPC if not exist
- client = boto3.client("ec2", region_name=aws_region)
- r = client.create_default_vpc()
- default_vpc_id = r["Vpc"]["VpcId"]
-
- return ec2.Vpc(default_vpc_id)
-
-
-def format_params(args, *, stack_id, agent_iam_policy):
- default_vpc = get_default_vpc(aws_region=args.aws_region)
- azs = get_availability_zones(aws_region=args.aws_region)
- # For each of the first two availability zones (AZs), choose the default subnet
- subnets = [
- x.id
- for x in default_vpc.subnets.filter(
- Filters=[
- {"Name": "default-for-az", "Values": ["true"]},
- {"Name": "availability-zone", "Values": azs[:2]},
- ]
- )
- ]
- assert len(subnets) == 2
-
- params = copy.deepcopy(STACK_PARAMS[stack_id])
- params["ImageId"] = AMI_ID[stack_id][args.aws_region]
- params["BuildkiteQueue"] = stack_id
- params["CostAllocationTagValue"] = f"buildkite-{stack_id}"
- params["BuildkiteAgentToken"] = args.agent_token
- params["VpcId"] = default_vpc.id
- params["Subnets"] = ",".join(subnets)
- params["ManagedPolicyARNs"] = agent_iam_policy
- params.update(COMMON_STACK_PARAMS)
- return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]
-
-
-def get_full_stack_id(stack_id):
- return f"buildkite-{stack_id}-autoscaling-group"
-
-
-def create_agent_iam_policy(args, *, client):
- policy_stack_name = "buildkite-agent-iam-policy"
- print(f"Creating stack {policy_stack_name} for agent IAM policy...")
- with open(
- os.path.join(current_dir, "agent-iam-policy-template.yml"),
- encoding="utf-8",
- ) as f:
- policy_template = f.read()
- promise = create_or_update_stack(
- args, client=client, stack_name=policy_stack_name, template_body=policy_template
- )
- wait(promise, client=client)
-
- cf = boto3.resource("cloudformation", region_name=args.aws_region)
- policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy")
- return policy.physical_resource_id
-
-
-def main(args):
- client = boto3.client("cloudformation", region_name=args.aws_region)
-
- agent_iam_policy = create_agent_iam_policy(args, client=client)
-
- promises = []
-
- for stack_id in AMI_ID:
- stack_id_full = get_full_stack_id(stack_id)
- print(f"Creating elastic CI stack {stack_id_full}...")
-
- params = format_params(
- args, stack_id=stack_id, agent_iam_policy=agent_iam_policy
- )
-
- promise = create_or_update_stack(
- args,
- client=client,
- stack_name=stack_id_full,
- template_url=TEMPLATE_URL,
- params=params,
- )
- promises.append(promise)
- print(f"CI stack {stack_id_full} is in progress in the background")
-
- for promise in promises:
- wait(promise, client=client)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--aws-region", type=str, required=True)
- parser.add_argument("--agent-token", type=str, required=True)
- args = parser.parse_args()
- main(args)
diff --git a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py
deleted file mode 100644
index 5012aa738854..000000000000
--- a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py
+++ /dev/null
@@ -1,114 +0,0 @@
-AMI_ID = {
- # Managed by XGBoost team
- "linux-amd64-gpu": {
- "us-west-2": "ami-0b4079c15bbbd0faf",
- },
- "linux-amd64-mgpu": {
- "us-west-2": "ami-0b4079c15bbbd0faf",
- },
- "windows-gpu": {
- "us-west-2": "ami-0123456bcf4cdfb82",
- },
- "windows-cpu": {
- "us-west-2": "ami-0123456bcf4cdfb82",
- },
- # Managed by BuildKite
- # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
- "linux-amd64-cpu": {
- "us-west-2": "ami-0083e0ae73c175ec6",
- },
- "pipeline-loader": {
- "us-west-2": "ami-0083e0ae73c175ec6",
- },
- "linux-arm64-cpu": {
- "us-west-2": "ami-0dbf1f9da54222f21",
- },
-}
-
-STACK_PARAMS = {
- "linux-amd64-gpu": {
- "InstanceOperatingSystem": "linux",
- "InstanceTypes": "g4dn.xlarge",
- "AgentsPerInstance": "1",
- "MinSize": "0",
- "MaxSize": "8",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
- "linux-amd64-mgpu": {
- "InstanceOperatingSystem": "linux",
- "InstanceTypes": "g4dn.12xlarge",
- "AgentsPerInstance": "1",
- "MinSize": "0",
- "MaxSize": "1",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
- "windows-gpu": {
- "InstanceOperatingSystem": "windows",
- "InstanceTypes": "g4dn.2xlarge",
- "AgentsPerInstance": "1",
- "MinSize": "0",
- "MaxSize": "2",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
- "windows-cpu": {
- "InstanceOperatingSystem": "windows",
- "InstanceTypes": "c5a.2xlarge",
- "AgentsPerInstance": "1",
- "MinSize": "0",
- "MaxSize": "2",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
- "linux-amd64-cpu": {
- "InstanceOperatingSystem": "linux",
- "InstanceTypes": "c5a.4xlarge",
- "AgentsPerInstance": "1",
- "MinSize": "0",
- "MaxSize": "16",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
- "pipeline-loader": {
- "InstanceOperatingSystem": "linux",
- "InstanceTypes": "t3a.micro",
- "AgentsPerInstance": "1",
- "MinSize": "2",
- "MaxSize": "2",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
- "linux-arm64-cpu": {
- "InstanceOperatingSystem": "linux",
- "InstanceTypes": "c6g.4xlarge",
- "AgentsPerInstance": "1",
- "MinSize": "0",
- "MaxSize": "8",
- "OnDemandPercentage": "100",
- "ScaleOutFactor": "1.0",
- "ScaleInIdlePeriod": "60", # in seconds
- },
-}
-
-COMMON_STACK_PARAMS = {
- "BuildkiteAgentTimestampLines": "false",
- "BuildkiteWindowsAdministrator": "true",
- "AssociatePublicIpAddress": "true",
- "ScaleOutForWaitingJobs": "false",
- "EnableCostAllocationTags": "true",
- "CostAllocationTagName": "CreatedBy",
- "ECRAccessPolicy": "full",
- "EnableSecretsPlugin": "false",
- "EnableECRPlugin": "false",
- "EnableDockerLoginPlugin": "false",
- "EnableDockerUserNamespaceRemap": "false",
- "BuildkiteAgentExperiments": "normalised-upload-paths,resolve-commit-after-checkout",
-}
diff --git a/tests/buildkite/infrastructure/common_blocks/utils.py b/tests/buildkite/infrastructure/common_blocks/utils.py
deleted file mode 100644
index 27a0835e8dc0..000000000000
--- a/tests/buildkite/infrastructure/common_blocks/utils.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import re
-
-import boto3
-import botocore
-
-
-def stack_exists(args, *, stack_name):
- client = boto3.client("cloudformation", region_name=args.aws_region)
- waiter = client.get_waiter("stack_exists")
- try:
- waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
- return True
- except botocore.exceptions.WaiterError as e:
- return False
-
-
-def create_or_update_stack(
- args, *, client, stack_name, template_url=None, template_body=None, params=None
-):
- kwargs = {
- "StackName": stack_name,
- "Capabilities": [
- "CAPABILITY_IAM",
- "CAPABILITY_NAMED_IAM",
- "CAPABILITY_AUTO_EXPAND",
- ],
- }
- if template_url:
- kwargs["TemplateURL"] = template_url
- if template_body:
- kwargs["TemplateBody"] = template_body
- if params:
- kwargs["Parameters"] = params
-
- if stack_exists(args, stack_name=stack_name):
- print(f"Stack {stack_name} already exists. Updating...")
- try:
- response = client.update_stack(**kwargs)
- return {"StackName": stack_name, "Action": "update"}
- except botocore.exceptions.ClientError as e:
- if e.response["Error"]["Code"] == "ValidationError" and re.search(
- "No updates are to be performed", e.response["Error"]["Message"]
- ):
- print(f"No update was made to {stack_name}")
- return {"StackName": stack_name, "Action": "noop"}
- else:
- raise e
- else:
- kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
- response = client.create_stack(**kwargs)
- return {"StackName": stack_name, "Action": "create"}
-
-
-def replace_stack(
- args, *, client, stack_name, template_url=None, template_body=None, params=None
-):
- """Delete an existing stack and create a new stack with identical name"""
-
- if not stack_exists(args, stack_name=stack_name):
- raise ValueError(f"Stack {stack_name} does not exist")
- r = client.delete_stack(StackName=stack_name)
- delete_waiter = client.get_waiter("stack_delete_complete")
- delete_waiter.wait(StackName=stack_name)
-
- kwargs = {
- "StackName": stack_name,
- "Capabilities": [
- "CAPABILITY_IAM",
- "CAPABILITY_NAMED_IAM",
- "CAPABILITY_AUTO_EXPAND",
- ],
- "OnFailure": "ROLLBACK",
- "EnableTerminationProtection": False,
- }
- if template_url:
- kwargs["TemplateURL"] = template_url
- if template_body:
- kwargs["TemplateBody"] = template_body
- if params:
- kwargs["Parameters"] = params
- response = client.create_stack(**kwargs)
- return {"StackName": stack_name, "Action": "create"}
-
-
-def wait(promise, *, client):
- stack_name = promise["StackName"]
- print(f"Waiting for {stack_name}...")
- if promise["Action"] == "create":
- waiter = client.get_waiter("stack_create_complete")
- waiter.wait(StackName=stack_name)
- print(f"Finished creating stack {stack_name}")
- elif promise["Action"] == "update":
- waiter = client.get_waiter("stack_update_complete")
- waiter.wait(StackName=stack_name)
- print(f"Finished updating stack {stack_name}")
- elif promise["Action"] != "noop":
- raise ValueError(f"Invalid promise {promise}")
diff --git a/tests/buildkite/infrastructure/requirements.txt b/tests/buildkite/infrastructure/requirements.txt
deleted file mode 100644
index 3ce271ebbdd6..000000000000
--- a/tests/buildkite/infrastructure/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-boto3
-cfn_tools
diff --git a/tests/buildkite/infrastructure/service-user/create_service_user.py b/tests/buildkite/infrastructure/service-user/create_service_user.py
deleted file mode 100644
index ba08779bd159..000000000000
--- a/tests/buildkite/infrastructure/service-user/create_service_user.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import argparse
-import os
-
-import boto3
-
-current_dir = os.path.dirname(__file__)
-
-
-def main(args):
- with open(
- os.path.join(current_dir, "service-user-template.yml"), encoding="utf-8"
- ) as f:
- service_user_template = f.read()
-
- stack_id = "buildkite-elastic-ci-stack-service-user"
-
- print("Create a new IAM user with suitable permissions...")
- client = boto3.client("cloudformation", region_name=args.aws_region)
- response = client.create_stack(
- StackName=stack_id,
- TemplateBody=service_user_template,
- Capabilities=[
- "CAPABILITY_IAM",
- "CAPABILITY_NAMED_IAM",
- ],
- Parameters=[{"ParameterKey": "UserName", "ParameterValue": args.user_name}],
- )
- waiter = client.get_waiter("stack_create_complete")
- waiter.wait(StackName=stack_id)
- user = boto3.resource("iam", region_name=args.aws_region).User(args.user_name)
- key_pair = user.create_access_key_pair()
- print("Finished creating an IAM users with suitable permissions.")
- print(f"Access Key ID: {key_pair.access_key_id}")
- print(f"Access Secret Access Key: {key_pair.secret_access_key}")
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--aws-region", type=str, required=True)
- parser.add_argument(
- "--user-name", type=str, default="buildkite-elastic-ci-stack-user"
- )
- args = parser.parse_args()
- main(args)
diff --git a/tests/buildkite/infrastructure/service-user/service-user-template.yml b/tests/buildkite/infrastructure/service-user/service-user-template.yml
deleted file mode 100644
index 2077cfe7b148..000000000000
--- a/tests/buildkite/infrastructure/service-user/service-user-template.yml
+++ /dev/null
@@ -1,349 +0,0 @@
----
-AWSTemplateFormatVersion: "2010-09-09"
-Description: "Buildkite Elastic CI Stack CloudFormation service user"
-
-Parameters:
- UserName:
- Type: String
- Default: buildkite-elastic-ci-stack-user
- Description: Name of user to create
-
-Outputs:
- UserNameOutput:
- Value: !Ref CloudFormationServiceUser
- UserArnOutput:
- Value: !GetAtt CloudFormationServiceUser.Arn
-
-Resources:
- CloudFormationServiceUser:
- Type: AWS::IAM::User
- Properties:
- ManagedPolicyArns:
- - !Ref SubstackCrudPolicy
- - !Ref CrudPolicy
- - !Ref ImageBuilderPolicy
- UserName: !Ref UserName
-
- SubstackCrudPolicy:
- Type: AWS::IAM::ManagedPolicy
- Properties:
- PolicyDocument:
- {
- "Version": "2012-10-17",
- "Statement": [
- {
- "Effect": "Allow",
- "Action": "cloudformation:*",
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "serverlessrepo:GetApplication",
- "serverlessrepo:GetCloudFormationTemplate",
- "serverlessrepo:CreateCloudFormationTemplate"
- ],
- "Resource": "*"
- }
- ]
- }
-
- CrudPolicy:
- Type: AWS::IAM::ManagedPolicy
- Properties:
- PolicyDocument:
- {
- "Version": "2012-10-17",
- "Statement": [
- {
- "Effect": "Allow",
- "Action": [
- "ec2:DescribeAccountAttributes",
- "ec2:DescribeAvailabilityZones",
- "ec2:DescribeInstances",
- "ec2:DescribeInternetGateways",
- "ec2:DescribeLaunchTemplateVersions",
- "ec2:DescribeLaunchTemplates",
- "ec2:DescribeNetworkInterfaces",
- "ec2:DescribeRouteTables",
- "ec2:DescribeSecurityGroups",
- "ec2:DescribeSubnets",
- "ec2:DescribeVpcs",
- "ec2:CreateTags"
- ],
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:CreateInternetGateway",
- "ec2:AttachInternetGateway",
- "ec2:DetachInternetGateway",
- "ec2:DeleteInternetGateway"
- ],
- "Resource": "arn:aws:ec2:*:*:internet-gateway/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:CreateLaunchTemplate",
- "ec2:CreateLaunchTemplateVersion",
- "ec2:DeleteLaunchTemplate"
- ],
- "Resource": "arn:aws:ec2:*:*:launch-template/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:AssociateRouteTable",
- "ec2:DisassociateRouteTable",
- "ec2:CreateRoute",
- "ec2:CreateRouteTable",
- "ec2:DeleteRoute",
- "ec2:DeleteRouteTable"
- ],
- "Resource": "arn:aws:ec2:*:*:route-table/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:AuthorizeSecurityGroupIngress",
- "ec2:RevokeSecurityGroupIngress",
- "ec2:CreateSecurityGroup",
- "ec2:DeleteSecurityGroup"
- ],
- "Resource": "arn:aws:ec2:*:*:security-group/*"
- },
- {
- "Effect": "Allow",
- "Action": "ec2:RunInstances",
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:CreateSubnet",
- "ec2:DeleteSubnet",
- "ec2:AssociateRouteTable",
- "ec2:DisassociateRouteTable"
- ],
- "Resource": "arn:aws:ec2:*:*:subnet/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:CreateVpc",
- "ec2:CreateSecurityGroup",
- "ec2:ModifyVpcAttribute",
- "ec2:AttachInternetGateway",
- "ec2:DetachInternetGateway",
- "ec2:CreateSubnet",
- "ec2:CreateRouteTable",
- "ec2:DeleteVpc"
- ],
- "Resource": "arn:aws:ec2:*:*:vpc/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ec2:CreateDefaultVpc",
- "ec2:CreateDefaultSubnet"
- ],
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "iam:CreateInstanceProfile",
- "iam:GetInstanceProfile",
- "iam:AddRoleToInstanceProfile",
- "iam:RemoveRoleFromInstanceProfile",
- "iam:DeleteInstanceProfile"
- ],
- "Resource": "arn:aws:iam::*:instance-profile/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "kms:DescribeKey",
- "kms:CreateGrant",
- "kms:Decrypt",
- "kms:Encrypt"
- ],
- "Resource": "arn:aws:kms:*:*:key/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "lambda:CreateFunction",
- "lambda:GetFunction",
- "lambda:GetFunctionCodeSigningConfig",
- "lambda:AddPermission",
- "lambda:RemovePermission",
- "lambda:DeleteFunction",
- "lambda:InvokeFunction",
- "lambda:TagResource"
- ],
- "Resource": "arn:aws:lambda:*:*:function:*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "logs:CreateLogGroup",
- "logs:PutRetentionPolicy",
- "logs:DeleteLogGroup"
- ],
- "Resource": "arn:aws:logs:*:*:log-group:*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "s3:GetObject",
- "s3:CreateBucket",
- "s3:PutBucketAcl",
- "s3:PutBucketLogging",
- "s3:PutBucketTagging",
- "s3:PutBucketVersioning"
- ],
- "Resource": "arn:aws:s3:::*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "ssm:GetParameter",
- "ssm:PutParameter",
- "ssm:DeleteParameter"
- ],
- "Resource": "arn:aws:ssm:*:*:parameter/*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "iam:ListPolicies",
- "iam:ListInstanceProfiles",
- "iam:ListRoles",
- "iam:ListPolicyVersions",
- "iam:ListRolePolicies",
- "iam:ListAttachedRolePolicies",
- "iam:ListInstanceProfileTags",
- "iam:ListRoleTags",
- "iam:ListInstanceProfilesForRole",
- "iam:GetPolicyVersion",
- "iam:GetPolicy",
- "iam:GetInstanceProfile",
- "iam:GetRole",
- "iam:GetRolePolicy",
- "iam:TagPolicy",
- "iam:UntagPolicy",
- "iam:TagInstanceProfile",
- "iam:UntagInstanceProfile",
- "iam:TagRole",
- "iam:UntagRole",
- "iam:CreateRole",
- "iam:PassRole",
- "iam:DeleteRole",
- "iam:UpdateRoleDescription",
- "iam:UpdateRole",
- "iam:AddRoleToInstanceProfile",
- "iam:RemoveRoleFromInstanceProfile",
- "iam:CreateInstanceProfile",
- "iam:DeleteInstanceProfile",
- "iam:DetachRolePolicy",
- "iam:SetDefaultPolicyVersion",
- "iam:AttachRolePolicy",
- "iam:UpdateAssumeRolePolicy",
- "iam:PutRolePermissionsBoundary",
- "iam:DeleteRolePermissionsBoundary",
- "iam:CreatePolicy",
- "iam:DeletePolicyVersion",
- "iam:DeletePolicy",
- "iam:PutRolePolicy",
- "iam:DeleteRolePolicy"
- ],
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "autoscaling:DescribeLifecycleHookTypes",
- "autoscaling:DescribeTerminationPolicyTypes",
- "autoscaling:DescribePolicies",
- "autoscaling:DescribeWarmPool",
- "autoscaling:DescribeScalingActivities",
- "autoscaling:DescribeScalingProcessTypes",
- "autoscaling:DescribeScheduledActions",
- "autoscaling:DescribeAutoScalingGroups",
- "autoscaling:DescribeAutoScalingInstances",
- "autoscaling:DescribeLifecycleHooks",
- "autoscaling:SetDesiredCapacity",
- "autoscaling:PutLifecycleHook",
- "autoscaling:DeleteLifecycleHook",
- "autoscaling:SetInstanceProtection",
- "autoscaling:CreateAutoScalingGroup",
- "autoscaling:EnableMetricsCollection",
- "autoscaling:UpdateAutoScalingGroup",
- "autoscaling:DeleteAutoScalingGroup",
- "autoscaling:PutScalingPolicy",
- "autoscaling:DeletePolicy",
- "autoscaling:BatchPutScheduledUpdateGroupAction",
- "autoscaling:PutScheduledUpdateGroupAction",
- "autoscaling:DeleteScheduledAction",
- "autoscaling:PutWarmPool",
- "autoscaling:DeleteWarmPool",
- "autoscaling:TerminateInstanceInAutoScalingGroup",
- "autoscaling:AttachInstances"
- ],
- "Resource": "*"
- },
- {
- "Effect": "Allow",
- "Action": [
- "events:DescribeRule",
- "events:PutRule",
- "events:PutTargets",
- "events:RemoveTargets",
- "events:DeleteRule"
- ],
- "Resource": "arn:aws:events:*:*:rule/*"
- }
- ]
- }
-
- ImageBuilderPolicy:
- Type: AWS::IAM::ManagedPolicy
- Properties:
- PolicyDocument:
- {
- "Version": "2012-10-17",
- "Statement": [
- {
- "Effect": "Allow",
- "Action": [
- "imagebuilder:CreateComponent",
- "imagebuilder:GetComponent",
- "imagebuilder:DeleteComponent",
- "imagebuilder:CreateImageRecipe",
- "imagebuilder:GetImageRecipe",
- "imagebuilder:DeleteImageRecipe",
- "imagebuilder:CreateImagePipeline",
- "imagebuilder:GetImagePipeline",
- "imagebuilder:DeleteImagePipeline",
- "imagebuilder:CreateInfrastructureConfiguration",
- "imagebuilder:GetInfrastructureConfiguration",
- "imagebuilder:DeleteInfrastructureConfiguration",
- "imagebuilder:CreateDistributionConfiguration",
- "imagebuilder:GetDistributionConfiguration",
- "imagebuilder:DeleteDistributionConfiguration",
- "imagebuilder:TagResource",
- "imagebuilder:StartImagePipelineExecution",
- "ec2:DescribeImages",
- "ec2:DescribeSnapshots",
- "ec2:DescribeRegions",
- "ec2:DescribeVolumes",
- "ec2:DescribeKeyPairs",
- "ec2:DescribeInstanceTypeOfferings"
- ],
- "Resource": "*"
- }
- ]
- }
diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py
deleted file mode 100644
index 8051b991da51..000000000000
--- a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import argparse
-import copy
-import json
-import os
-import sys
-from urllib.request import urlopen
-
-import boto3
-import cfn_flip
-from metadata import IMAGE_PARAMS
-
-current_dir = os.path.dirname(__file__)
-sys.path.append(os.path.join(current_dir, ".."))
-
-from common_blocks.utils import replace_stack, wait
-
-BUILDKITE_CF_TEMPLATE_URL = (
- "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
-)
-
-
-def format_params(*, stack_id, aws_region, ami_mapping):
- params = copy.deepcopy(IMAGE_PARAMS[stack_id])
- with open(
- os.path.join(current_dir, params["BootstrapScript"]),
- encoding="utf-8",
- ) as f:
- bootstrap_script = f.read()
- params["BaseImageId"] = ami_mapping[aws_region][params["BaseImageId"]]
- params["BootstrapScript"] = bootstrap_script
- return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]
-
-
-def get_ami_mapping():
- with urlopen(BUILDKITE_CF_TEMPLATE_URL) as response:
- buildkite_cf_template = response.read().decode("utf-8")
- cfn_obj = json.loads(cfn_flip.to_json(buildkite_cf_template))
- return cfn_obj["Mappings"]["AWSRegion2AMI"]
-
-
-def get_full_stack_id(stack_id):
- return f"buildkite-{stack_id}-worker"
-
-
-def main(args):
- with open(
- os.path.join(current_dir, "ec2-image-builder-pipeline-template.yml"),
- encoding="utf-8",
- ) as f:
- ec2_image_pipeline_template = f.read()
-
- ami_mapping = get_ami_mapping()
-
- client = boto3.client("cloudformation", region_name=args.aws_region)
- promises = []
-
- for stack_id in IMAGE_PARAMS:
- stack_id_full = get_full_stack_id(stack_id)
- print(f"Creating EC2 image builder stack {stack_id_full}...")
-
- params = format_params(
- stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping
- )
-
- promise = replace_stack(
- args,
- client=client,
- stack_name=stack_id_full,
- template_body=ec2_image_pipeline_template,
- params=params,
- )
- promises.append(promise)
- print(
- f"EC2 image builder stack {stack_id_full} is in progress in the background"
- )
-
- for promise in promises:
- wait(promise, client=client)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--aws-region", type=str, required=True)
- args = parser.parse_args()
- main(args)
diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml
deleted file mode 100644
index 8d3bafa72f08..000000000000
--- a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml
+++ /dev/null
@@ -1,108 +0,0 @@
----
-AWSTemplateFormatVersion: "2010-09-09"
-Description: "EC2 Image Builder pipelines to build workers"
-
-Parameters:
- BaseImageId:
- Type: String
- Description: Base AMI to build a new image on top of.
-
- BootstrapScript:
- Type: String
- Description: Content of AMI customization script
-
- InstanceType:
- Type: String
- Description: Instance type for the Image Builder instances.
-
- InstanceOperatingSystem:
- Type: String
- Description: The operating system to run on the instance
- AllowedValues:
- - Linux
- - Windows
- Default: "Linux"
-
- VolumeSize:
- Type: Number
- Description: Size of EBS volume, in GiBs
-
-Conditions:
- IsInstanceWindows:
- !Equals [ !Ref InstanceOperatingSystem, "Windows" ]
-
-Resources:
- # IAM role for the image builder instance
- InstanceRole:
- Type: AWS::IAM::Role
- Properties:
- AssumeRolePolicyDocument:
- Version: "2012-10-17"
- Statement:
- - Effect: "Allow"
- Principal:
- Service: "ec2.amazonaws.com"
- Action: "sts:AssumeRole"
- ManagedPolicyArns:
- - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
- - arn:aws:iam::aws:policy/EC2InstanceProfileForImageBuilder
- - arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess
-
- InstanceProfile:
- Type: AWS::IAM::InstanceProfile
- Properties:
- Roles:
- - !Ref InstanceRole
-
- # Component that runs the bootstrap script
- BootstrapComponent:
- Type: AWS::ImageBuilder::Component
- Properties:
- Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
- Platform: !Ref InstanceOperatingSystem
- Version: "1.0.0"
- Description: Execute a bootstrap script.
- Data: !Ref BootstrapScript
-
- Recipe:
- Type: AWS::ImageBuilder::ImageRecipe
- Properties:
- Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
- Components:
- - ComponentArn: !Ref BootstrapComponent
- ParentImage: !Ref BaseImageId
- BlockDeviceMappings:
- - DeviceName: !If [IsInstanceWindows, "/dev/sda1", "/dev/xvda"]
- Ebs:
- DeleteOnTermination: true
- Encrypted: false
- VolumeSize: !Ref VolumeSize
- VolumeType: gp2
- Version: "1.0.0"
-
- Infrastructure:
- Type: AWS::ImageBuilder::InfrastructureConfiguration
- Properties:
- Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
- InstanceProfileName: !Ref InstanceProfile
- InstanceTypes:
- - !Ref InstanceType
- TerminateInstanceOnFailure: true
-
- # Copy to this region only
- Distribution:
- Type: AWS::ImageBuilder::DistributionConfiguration
- Properties:
- Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
- Distributions:
- - Region: !Ref AWS::Region
- AmiDistributionConfiguration: {}
-
- # Composition of the above elements
- Pipeline:
- Type: AWS::ImageBuilder::ImagePipeline
- Properties:
- Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
- DistributionConfigurationArn: !Ref Distribution
- ImageRecipeArn: !Ref Recipe
- InfrastructureConfigurationArn: !Ref Infrastructure
diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/linux-amd64-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/linux-amd64-gpu-bootstrap.yml
deleted file mode 100644
index 88403911cbc6..000000000000
--- a/tests/buildkite/infrastructure/worker-image-pipeline/linux-amd64-gpu-bootstrap.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: BuildKiteLinuxAMD64GPUBootstrap
-description: Set up worker image for linux-amd64-gpu pipeline
-schemaVersion: 1.0
-
-phases:
- - name: build
- steps:
- - name: SetupStep
- action: ExecuteBash
- inputs:
- commands:
- - |
- yum groupinstall -y "Development tools"
- yum install -y kernel-devel-$(uname -r)
- dnf install -y kernel-modules-extra
- aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ .
- chmod +x NVIDIA-Linux-x86_64*.run
- ./NVIDIA-Linux-x86_64*.run --silent
-
- curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
- yum install -y nvidia-container-toolkit
- yum clean expire-cache
- nvidia-ctk runtime configure --runtime=docker
- systemctl restart docker
diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py
deleted file mode 100644
index 37100209fe2e..000000000000
--- a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py
+++ /dev/null
@@ -1,18 +0,0 @@
-IMAGE_PARAMS = {
- "linux-amd64-gpu": {
- "BaseImageId": "linuxamd64",
- # AMI ID is looked up from Buildkite's CloudFormation template
- "BootstrapScript": "linux-amd64-gpu-bootstrap.yml",
- "InstanceType": "g4dn.xlarge",
- "InstanceOperatingSystem": "Linux",
- "VolumeSize": "40", # in GiBs
- },
- "windows-gpu": {
- "BaseImageId": "windows",
- # AMI ID is looked up from Buildkite's CloudFormation template
- "BootstrapScript": "windows-gpu-bootstrap.yml",
- "InstanceType": "g4dn.2xlarge",
- "InstanceOperatingSystem": "Windows",
- "VolumeSize": "120", # in GiBs
- },
-}
diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/run_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/run_pipelines.py
deleted file mode 100644
index 9edb8b1a7c24..000000000000
--- a/tests/buildkite/infrastructure/worker-image-pipeline/run_pipelines.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import argparse
-
-import boto3
-from create_worker_image_pipelines import get_full_stack_id
-from metadata import IMAGE_PARAMS
-
-
-def main(args):
- cf = boto3.resource("cloudformation", region_name=args.aws_region)
- builder_client = boto3.client("imagebuilder", region_name=args.aws_region)
- for stack_id in IMAGE_PARAMS:
- stack_id_full = get_full_stack_id(stack_id)
- pipeline_arn = cf.Stack(stack_id_full).Resource("Pipeline").physical_resource_id
- print(f"Running pipeline {pipeline_arn} to generate a new AMI...")
- r = builder_client.start_image_pipeline_execution(imagePipelineArn=pipeline_arn)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--aws-region", type=str, required=True)
- args = parser.parse_args()
- main(args)
diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml
deleted file mode 100644
index 0348e28c8709..000000000000
--- a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: BuildKiteWindowsGPUBootstrap
-description: Set up worker image for windows-gpu pipeline
-schemaVersion: 1.0
-
-phases:
- - name: build
- steps:
- - name: SetupStep
- action: ExecutePowerShell
- inputs:
- commands:
- - |
- $ErrorActionPreference = "Stop"
-
- choco --version
- choco feature enable -n=allowGlobalConfirmation
-
- # CMake 3.29.2
- Write-Host '>>> Installing CMake 3.29.2...'
- choco install cmake --version 3.29.2 --installargs "ADD_CMAKE_TO_PATH=System"
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Notepad++
- Write-Host '>>> Installing Notepad++...'
- choco install notepadplusplus
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Mambaforge
- Write-Host '>>> Installing Mambaforge...'
- choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge
- C:\tools\mambaforge\Scripts\conda.exe init --user --system
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
- . "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
- conda config --set auto_activate_base false
-
- # Install Java 11
- Write-Host '>>> Installing Java 11...'
- choco install openjdk11
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Install Maven
- Write-Host '>>> Installing Maven...'
- choco install maven
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Install GraphViz
- Write-Host '>>> Installing GraphViz...'
- choco install graphviz
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Install Visual Studio 2022 Community
- Write-Host '>>> Installing Visual Studio 2022 Community...'
- choco install visualstudio2022community `
- --params "--wait --passive --norestart"
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
- choco install visualstudio2022-workload-nativedesktop --params `
- "--wait --passive --norestart --includeOptional"
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Install CUDA 12.4
- Write-Host '>>> Installing CUDA 12.4...'
- choco install cuda --version=12.4.1.551
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
- # Install R
- Write-Host '>>> Installing R...'
- choco install r.project --version=4.3.2
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
- choco install rtools --version=4.3.5550
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
diff --git a/tests/buildkite/pipeline-mac-m1.yml b/tests/buildkite/pipeline-mac-m1.yml
deleted file mode 100644
index 57b1b1d12010..000000000000
--- a/tests/buildkite/pipeline-mac-m1.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-steps:
- - block: ":rocket: Run this test job"
- if: build.pull_request.id != null || build.branch =~ /^dependabot\//
- - label: ":macos: Build libxgboost4j.dylib for MacOS M1"
- command: "tests/buildkite/build-jvm-macos-m1.sh"
- key: mac-m1-jvm
- agents:
- queue: mac-mini-m1
- - label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11"
- command: "tests/buildkite/test-macos-m1-clang11.sh"
- key: mac-m1-appleclang11
- agents:
- queue: mac-mini-m1
diff --git a/tests/buildkite/pipeline-mgpu.yml b/tests/buildkite/pipeline-mgpu.yml
deleted file mode 100644
index cbb573c3682c..000000000000
--- a/tests/buildkite/pipeline-mgpu.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-env:
- DOCKER_CACHE_ECR_ID: "492475357299"
- DOCKER_CACHE_ECR_REGION: "us-west-2"
- DISABLE_RELEASE: "1"
- # Skip uploading artifacts to S3 bucket
- # Also, don't build all CUDA archs; just build sm_75
-steps:
- - label: ":moneybag: Enforce daily budget"
- command: "tests/buildkite/enforce_daily_budget.sh"
- key: enforce-daily-budget
- agents:
- queue: pipeline-loader
- - wait
- - block: ":rocket: Run this test job"
- if: build.pull_request.id != null || build.branch =~ /^dependabot\//
- #### -------- CONTAINER BUILD --------
- - label: ":docker: Build containers"
- commands:
- - "tests/buildkite/build-containers.sh gpu"
- - "tests/buildkite/build-containers.sh gpu_build_rockylinux8"
- - "tests/buildkite/build-containers.sh jvm_gpu_build"
- key: build-containers
- agents:
- queue: linux-amd64-cpu
- - wait
- #### -------- BUILD --------
- - label: ":console: Build CUDA"
- command: "tests/buildkite/build-cuda.sh"
- key: build-cuda
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build and test JVM packages with CUDA"
- command: "tests/buildkite/build-jvm-packages-gpu.sh"
- key: build-jvm-packages-gpu
- agents:
- queue: linux-amd64-mgpu
- - wait
- #### -------- TEST --------
- - label: ":console: Run Google Tests"
- command: "tests/buildkite/test-cpp-mgpu.sh"
- key: test-cpp-mgpu
- agents:
- queue: linux-amd64-mgpu
- - label: ":console: Test Python package, 4 GPUs"
- command: "tests/buildkite/test-python-gpu.sh mgpu"
- key: test-python-mgpu
- agents:
- queue: linux-amd64-mgpu
diff --git a/tests/buildkite/pipeline-nightly.yml b/tests/buildkite/pipeline-nightly.yml
deleted file mode 100644
index 4d84f93a54d4..000000000000
--- a/tests/buildkite/pipeline-nightly.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-# Nightly CI pipeline, to test against dev versions of dependencies
-
-env:
- DOCKER_CACHE_ECR_ID: "492475357299"
- DOCKER_CACHE_ECR_REGION: "us-west-2"
- DISABLE_RELEASE: "1"
- # Skip uploading artifacts to S3 bucket
- # Also, don't build all CUDA archs; just build sm_75
- USE_DEPS_DEV_VER: "1"
- # Use dev versions of RAPIDS and other dependencies
-steps:
- #### -------- CONTAINER BUILD --------
- - label: ":docker: Build containers"
- commands:
- - "tests/buildkite/build-containers.sh gpu_build_rockylinux8"
- - "tests/buildkite/build-containers.sh gpu_dev_ver"
- key: build-containers
- agents:
- queue: linux-amd64-cpu
- - wait
-
- - label: ":console: Build CUDA"
- command: "tests/buildkite/build-cuda.sh"
- key: build-cuda
- agents:
- queue: linux-amd64-cpu
- - wait
- - label: ":console: Build CUDA + RMM Nightly"
- command: "tests/buildkite/build-cuda-with-rmm.sh dev"
- key: build-cuda-rmm-nightly
- agents:
- queue: linux-amd64-cpu
- - wait
- - label: ":console: Test Python package, single GPU"
- command: "tests/buildkite/test-python-gpu.sh gpu"
- key: test-python-gpu
- agents:
- queue: linux-amd64-gpu
- - label: ":console: Test Python package, 4 GPUs"
- command: "tests/buildkite/test-python-gpu.sh mgpu"
- key: test-python-mgpu
- agents:
- queue: linux-amd64-mgpu
diff --git a/tests/buildkite/pipeline-win64.yml b/tests/buildkite/pipeline-win64.yml
deleted file mode 100644
index 83a61981e716..000000000000
--- a/tests/buildkite/pipeline-win64.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-steps:
- - label: ":moneybag: Enforce daily budget"
- command: "tests/buildkite/enforce_daily_budget.sh"
- key: enforce-daily-budget
- agents:
- queue: pipeline-loader
- - wait
- - block: ":rocket: Run this test job"
- if: build.pull_request.id != null || build.branch =~ /^dependabot\//
- #### -------- BUILD --------
- - label: ":windows: Build XGBoost for Windows with CUDA"
- command: "tests/buildkite/build-win64-gpu.ps1"
- key: build-win64-gpu
- agents:
- queue: windows-cpu
-
- - wait
-
- #### -------- TEST --------
- - label: ":windows: Test XGBoost on Windows"
- command: "tests/buildkite/test-win64-gpu.ps1"
- key: test-win64-gpu
- agents:
- queue: windows-gpu
diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml
deleted file mode 100644
index 6c1df33b84dd..000000000000
--- a/tests/buildkite/pipeline.yml
+++ /dev/null
@@ -1,113 +0,0 @@
-env:
- DOCKER_CACHE_ECR_ID: "492475357299"
- DOCKER_CACHE_ECR_REGION: "us-west-2"
-steps:
- - label: ":moneybag: Enforce daily budget"
- command: "tests/buildkite/enforce_daily_budget.sh"
- key: enforce-daily-budget
- agents:
- queue: pipeline-loader
- - wait
- - block: ":rocket: Run this test job"
- if: build.pull_request.id != null || build.branch =~ /^dependabot\//
- #### -------- CONTAINER BUILD --------
- - label: ":docker: Build containers"
- commands:
- - "tests/buildkite/build-containers.sh cpu"
- - "tests/buildkite/build-containers.sh gpu"
- - "tests/buildkite/build-containers.sh gpu_build_rockylinux8"
- key: build-containers
- agents:
- queue: linux-amd64-cpu
- - wait
- #### -------- BUILD --------
- - label: ":console: Run clang-tidy"
- command: "tests/buildkite/run-clang-tidy.sh"
- key: run-clang-tidy
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build CPU"
- command: "tests/buildkite/build-cpu.sh"
- key: build-cpu
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build CPU ARM64 + manylinux_2_28_aarch64 wheel"
- command: "tests/buildkite/build-cpu-arm64.sh"
- key: build-cpu-arm64
- agents:
- queue: linux-arm64-cpu
- - label: ":console: Build CUDA + manylinux_2_28_x86_64 wheel"
- command: "tests/buildkite/build-cuda.sh"
- key: build-cuda
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build CUDA with RMM"
- command: "tests/buildkite/build-cuda-with-rmm.sh stable"
- key: build-cuda-with-rmm
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build R package with CUDA"
- command: "tests/buildkite/build-gpu-rpkg.sh"
- key: build-gpu-rpkg
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build JVM packages"
- timeout_in_minutes: 30
- command: "tests/buildkite/build-jvm-packages.sh"
- key: build-jvm-packages
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build libxgboost4j.so for Linux ARM64 (targeting glibc 2.17)"
- command: "tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh"
- key: build-jvm-linux-arm64-manylinux2014
- agents:
- queue: linux-arm64-cpu
- - label: ":console: Build libxgboost4j.so for Linux x86_64 (targeting glibc 2.17)"
- command: "tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh"
- key: build-jvm-linux-x86_64-manylinux2014
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build JVM package doc"
- command: "tests/buildkite/build-jvm-doc.sh"
- key: build-jvm-doc
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build manylinux2014_x86_64 wheel"
- command: "tests/buildkite/build-manylinux2014.sh x86_64"
- key: build-manylinux2014-x86_64
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Build manylinux2014_aarch64 wheel"
- command: "tests/buildkite/build-manylinux2014.sh aarch64"
- key: build-manylinux2014-aarch64
- agents:
- queue: linux-arm64-cpu
- - wait
- #### -------- TEST --------
- - label: ":console: Test Python package, CPU"
- command: "tests/buildkite/test-python-cpu.sh"
- key: test-python-cpu
- agents:
- queue: linux-amd64-cpu
- - label: ":console: Test Python package, CPU ARM64"
- command: "tests/buildkite/test-python-cpu-arm64.sh"
- key: test-python-cpu-arm64
- agents:
- queue: linux-arm64-cpu
- - label: ":console: Test Python package, single GPU"
- command: "tests/buildkite/test-python-gpu.sh gpu"
- key: test-python-gpu
- agents:
- queue: linux-amd64-gpu
- - label: ":console: Run Google Tests"
- command: "tests/buildkite/test-cpp-gpu.sh"
- key: test-cpp-gpu
- agents:
- queue: linux-amd64-gpu
- - wait
- #### -------- DEPLOY JVM --------
- - label: ":console: Deploy JVM packages"
- command: "tests/buildkite/deploy-jvm-packages.sh"
- key: deploy-jvm-packages
- agents:
- queue: linux-amd64-cpu
diff --git a/tests/buildkite/run-clang-tidy.sh b/tests/buildkite/run-clang-tidy.sh
deleted file mode 100755
index 95ff010c20f1..000000000000
--- a/tests/buildkite/run-clang-tidy.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-echo "--- Run clang-tidy"
-
-source tests/buildkite/conftest.sh
-
-tests/ci_build/ci_build.sh clang_tidy \
- --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
- python3 tests/ci_build/tidy.py --cuda-archs 75
diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh
deleted file mode 100755
index d7197db2efce..000000000000
--- a/tests/buildkite/test-cpp-gpu.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-echo "--- Run Google Tests with CUDA, using a GPU"
-buildkite-agent artifact download "build/testxgboost" . --step build-cuda
-chmod +x build/testxgboost
-tests/ci_build/ci_build.sh gpu --use-gpus \
- --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
- --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
- --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
- build/testxgboost
-
-echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
-rm -rfv build/
-buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
-chmod +x build/testxgboost
-tests/ci_build/ci_build.sh gpu --use-gpus \
- --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
- --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
- --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
- build/testxgboost --use-rmm-pool
diff --git a/tests/buildkite/test-cpp-mgpu.sh b/tests/buildkite/test-cpp-mgpu.sh
deleted file mode 100755
index 65614b191d04..000000000000
--- a/tests/buildkite/test-cpp-mgpu.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-# Allocate extra space in /dev/shm to enable NCCL
-export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
-
-echo "--- Run Google Tests with CUDA, using multiple GPUs"
-buildkite-agent artifact download "build/testxgboost" . --step build-cuda
-chmod +x build/testxgboost
-tests/ci_build/ci_build.sh gpu --use-gpus \
- --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
- --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
- --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
- build/testxgboost --gtest_filter=*MGPU*
diff --git a/tests/buildkite/test-macos-m1-clang11.sh b/tests/buildkite/test-macos-m1-clang11.sh
deleted file mode 100755
index 6824cb7b14b4..000000000000
--- a/tests/buildkite/test-macos-m1-clang11.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-# Display system info
-echo "--- Display system information"
-set -x
-system_profiler SPSoftwareDataType
-sysctl -n machdep.cpu.brand_string
-uname -m
-set +x
-
-# Ensure that XGBoost can be built with Clang 11
-echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
-set -x
-LLVM11_PATH=$(brew --prefix llvm\@11)
-mkdir build
-pushd build
-cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \
- -DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \
- -DUSE_DMLC_GTEST=ON
-ninja -v
-./testxgboost
diff --git a/tests/buildkite/test-python-cpu-arm64.sh b/tests/buildkite/test-python-cpu-arm64.sh
deleted file mode 100755
index 68a428034073..000000000000
--- a/tests/buildkite/test-python-cpu-arm64.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-source tests/buildkite/conftest.sh
-
-echo "--- Test Python CPU ARM64"
-buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64
-buildkite-agent artifact download "xgboost" . --step build-cpu-arm64
-chmod +x ./xgboost
-tests/ci_build/ci_build.sh aarch64 tests/ci_build/test_python.sh cpu-arm64
diff --git a/tests/buildkite/test-python-cpu.sh b/tests/buildkite/test-python-cpu.sh
deleted file mode 100755
index 6c53dc2821bc..000000000000
--- a/tests/buildkite/test-python-cpu.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-echo "--- Test CPU code in Python env"
-
-source tests/buildkite/conftest.sh
-
-mkdir -pv python-package/dist
-buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cuda
-buildkite-agent artifact download "xgboost" . --step build-cpu
-chmod +x ./xgboost
-
-export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/cpu)
-set_buildkite_env_vars_in_container
-tests/ci_build/ci_build.sh cpu tests/ci_build/test_python.sh cpu
diff --git a/tests/buildkite/test-python-gpu.sh b/tests/buildkite/test-python-gpu.sh
deleted file mode 100755
index d7bd729a2e01..000000000000
--- a/tests/buildkite/test-python-gpu.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-if [ "$#" -lt 1 ]
-then
- suite=''
- args=''
-else
- suite=$1
- shift 1
- args="$@"
-fi
-
-source tests/buildkite/conftest.sh
-
-echo "--- Fetch build artifacts"
-buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cuda
-buildkite-agent artifact download "build/testxgboost" . --step build-cuda
-chmod +x build/testxgboost
-
-# Allocate extra space in /dev/shm to enable NCCL
-export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
-
-if [[ -z "${USE_DEPS_DEV_VER-}" ]]
-then
- container_tag='gpu'
- rapids_version=${RAPIDS_VERSION}
-else
- container_tag='gpu_dev_ver'
- rapids_version=${DEV_RAPIDS_VERSION}
-fi
-
-command_wrapper="tests/ci_build/ci_build.sh ${container_tag} --use-gpus --build-arg "`
- `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
- `"RAPIDS_VERSION_ARG=${rapids_version} --build-arg "`
- `"NCCL_VERSION_ARG=$NCCL_VERSION"
-
-# Run specified test suite
-case "$suite" in
- gpu)
- export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/gpu)
- set_buildkite_env_vars_in_container
- echo "--- Test XGBoost Python package, single GPU"
- $command_wrapper tests/ci_build/test_python.sh $suite
- ;;
-
- mgpu)
- export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/mgpu)
- set_buildkite_env_vars_in_container
- echo "--- Test XGBoost Python package, 4 GPUs"
- $command_wrapper tests/ci_build/test_python.sh $suite
- ;;
-
- *)
- echo "Usage: $0 {gpu|mgpu} [extra args to pass to pytest]"
- exit 1
- ;;
-esac
diff --git a/tests/buildkite/test-win64-gpu.ps1 b/tests/buildkite/test-win64-gpu.ps1
deleted file mode 100644
index 95a51b50228d..000000000000
--- a/tests/buildkite/test-win64-gpu.ps1
+++ /dev/null
@@ -1,39 +0,0 @@
-$ErrorActionPreference = "Stop"
-
-. tests/buildkite/conftest.ps1
-
-Write-Host "--- Test XGBoost on Windows with CUDA"
-
-New-Item python-package/dist -ItemType Directory -ea 0
-New-Item build -ItemType Directory -ea 0
-buildkite-agent artifact download "python-package/dist/*.whl" . --step build-win64-gpu
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-buildkite-agent artifact download "build/testxgboost.exe" . --step build-win64-gpu
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-buildkite-agent artifact download "xgboost.exe" . --step build-win64-gpu
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
-nvcc --version
-
-Write-Host "--- Run Google Tests"
-& build/testxgboost.exe
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-
-Write-Host "--- Set up Python env"
-conda activate
-$env_name = -join("win64_", (New-Guid).ToString().replace("-", ""))
-mamba env create -n ${env_name} --file=tests/ci_build/conda_env/win64_test.yml
-conda activate ${env_name}
-Get-ChildItem . -Filter python-package/dist/*.whl |
-Foreach-Object {
- & python -m pip install python-package/dist/$_
- if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-}
-
-Write-Host "--- Run Python tests"
-python -X faulthandler -m pytest -v -s -rxXs --fulltrace tests/python
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
-Write-Host "--- Run Python tests with GPU"
-python -X faulthandler -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)"`
- tests/python-gpu
-if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
diff --git a/tests/ci_build/Dockerfile.gpu_dev_ver b/tests/ci_build/Dockerfile.gpu_dev_ver
deleted file mode 100644
index d23c5e83c2c7..000000000000
--- a/tests/ci_build/Dockerfile.gpu_dev_ver
+++ /dev/null
@@ -1,54 +0,0 @@
-# Container to test XGBoost against dev versions of dependencies
-
-ARG CUDA_VERSION_ARG
-FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
-ARG CUDA_VERSION_ARG
-ARG RAPIDS_VERSION_ARG
- # Should be first 4 digits of the dev version (e.g. 24.06)
-ARG NCCL_VERSION_ARG
-
-# Environment
-ENV DEBIAN_FRONTEND=noninteractive
-SHELL ["/bin/bash", "-c"] # Use Bash as shell
-
-# Install all basic requirements
-RUN \
- apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
- apt-get update && \
- apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
- # Python
- wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
- bash conda.sh -b -p /opt/miniforge
-
-ENV PATH=/opt/miniforge/bin:$PATH
-
-# Create new Conda environment with dev versions of cuDF, Dask, and cuPy
-RUN \
- export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
- export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \
- mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \
- python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \
- "nccl>=${NCCL_SHORT_VER}" \
- dask \
- "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \
- numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \
- python-kubernetes urllib3 graphviz hypothesis loky \
- "pyspark>=3.4.0" cloudpickle cuda-python && \
- mamba clean --all --yes && \
- conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
-
-ENV GOSU_VERSION=1.10
-ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
-
-# Install lightweight sudo (not bound to TTY)
-RUN set -ex; \
- wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
- chmod +x /usr/local/bin/gosu && \
- gosu nobody true
-
-# Default entry-point to use if running locally
-# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
-
-WORKDIR /workspace
-ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64 b/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64
deleted file mode 100644
index 52baff43bb6f..000000000000
--- a/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM quay.io/pypa/manylinux2014_aarch64
-
-RUN yum update -y && yum install -y java-1.8.0-openjdk-devel
-
-# Install lightweight sudo (not bound to TTY)
-ENV GOSU_VERSION=1.10
-RUN set -ex; \
- curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-arm64" && \
- chmod +x /usr/local/bin/gosu && \
- gosu nobody true
-
-# Default entry-point to use if running locally
-# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
-
-WORKDIR /workspace
-ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64 b/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64
deleted file mode 100644
index 578b85618776..000000000000
--- a/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM quay.io/pypa/manylinux2014_x86_64
-
-RUN yum update -y && yum install -y java-1.8.0-openjdk-devel ninja-build
-
-# Install lightweight sudo (not bound to TTY)
-ENV GOSU_VERSION=1.10
-RUN set -ex; \
- curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
- chmod +x /usr/local/bin/gosu && \
- gosu nobody true
-
-# Default entry-point to use if running locally
-# It will preserve attributes of created files
-COPY entrypoint.sh /scripts/
-
-WORKDIR /workspace
-ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/build_jvm_doc.sh b/tests/ci_build/build_jvm_doc.sh
deleted file mode 100755
index 01a91dd629b5..000000000000
--- a/tests/ci_build/build_jvm_doc.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-if [ $# -ne 1 ]; then
- echo "Usage: $0 [branch name]"
- exit 1
-fi
-
-set -e
-set -x
-
-rm -rf build/
-cd jvm-packages
-
-branch_name=$1
-
-# Install JVM packages in local Maven repository
-mvn --no-transfer-progress install -DskipTests
-# Build Scaladocs
-mvn --no-transfer-progress scala:doc -DskipTests
-# Build Javadocs
-mvn --no-transfer-progress javadoc:javadoc -DskipTests
-
-# Package JVM docs in a tarball
-mkdir -p tmp/scaladocs
-cp -rv xgboost4j/target/reports/apidocs/ ./tmp/javadocs/
-cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/
-cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/
-cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/
-
-cd tmp
-tar cvjf ${branch_name}.tar.bz2 javadocs/ scaladocs/
-mv ${branch_name}.tar.bz2 ..
-cd ..
-rm -rfv tmp/
-
-set +x
-set +e
diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh
deleted file mode 100755
index 99681f5ca43c..000000000000
--- a/tests/ci_build/build_jvm_packages.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-set -e
-set -x
-
-spark_version=$1
-use_cuda=$2
-gpu_arch=$3
-use_scala213=$4
-
-gpu_options=""
-if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
- gpu_options="$use_cuda -Pgpu"
-fi
-
-rm -rf build/
-cd jvm-packages
-
-if [ "x$gpu_arch" != "x" ]; then
- export GPU_ARCH_FLAG=$gpu_arch
-fi
-
-# Purge artifacts and set correct Scala version
-pushd ..
-if [ "x$use_scala213" != "x" ]; then
- python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
-else
- python dev/change_scala_version.py --scala-version 2.12 --purge-artifacts
-fi
-popd
-
-# Build and test XGBoost4j-spark against different spark versions only for CPU and scala=2.12
-if [ "x$gpu_options" == "x" ] && [ "x$use_scala213" == "x" ]; then
- mvn --no-transfer-progress clean package -Dspark.version=3.1.3 -pl xgboost4j,xgboost4j-spark
- mvn --no-transfer-progress clean package -Dspark.version=3.2.4 -pl xgboost4j,xgboost4j-spark
- mvn --no-transfer-progress clean package -Dspark.version=3.3.4 -pl xgboost4j,xgboost4j-spark
- mvn --no-transfer-progress clean package -Dspark.version=3.4.3 -pl xgboost4j,xgboost4j-spark
-fi
-
-mvn --no-transfer-progress clean install -Dspark.version=${spark_version} $gpu_options
-
-# Integration tests
-if [ "x$use_cuda" == "x" ]; then
- mvn --no-transfer-progress test -pl xgboost4j-example
-fi
-
-set +x
-set +e
diff --git a/tests/ci_build/ci_build.sh b/tests/ci_build/ci_build.sh
deleted file mode 100755
index a2f2d6063160..000000000000
--- a/tests/ci_build/ci_build.sh
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env bash
-#
-# Execute command within a docker container
-#
-# Usage: ci_build.sh [--use-gpus]
-# [--dockerfile ] [-it]
-# [--build-arg ]
-#
-# CONTAINER_TYPE: Type of the docker container used the run the build: e.g.,
-# (cpu | gpu)
-#
-# --use-gpus: Whether to grant the container access to NVIDIA GPUs.
-#
-# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. If
-# this optional value is not supplied (via the --dockerfile
-# flag), will use Dockerfile.CONTAINER_TYPE in default
-#
-# BUILD_ARG: (Optional) an argument to be passed to docker build
-#
-# COMMAND: Command to be executed in the docker container
-#
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Get the command line arguments.
-CONTAINER_TYPE=$( echo "$1" | tr '[:upper:]' '[:lower:]' )
-shift 1
-
-# Dockerfile to be used in docker build
-DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}"
-DOCKER_CONTEXT_PATH="${SCRIPT_DIR}"
-
-GPU_FLAG=''
-if [[ "$1" == "--use-gpus" ]]; then
- echo "Using NVIDIA GPUs"
- GPU_FLAG='--gpus all'
- shift 1
-fi
-
-if [[ "$1" == "--dockerfile" ]]; then
- DOCKERFILE_PATH="$2"
- DOCKER_CONTEXT_PATH=$(dirname "${DOCKERFILE_PATH}")
- echo "Using custom Dockerfile path: ${DOCKERFILE_PATH}"
- echo "Using custom docker build context path: ${DOCKER_CONTEXT_PATH}"
- shift 2
-fi
-
-if [[ -n "${CI_DOCKER_EXTRA_PARAMS_INIT}" ]]
-then
- IFS=' ' read -r -a CI_DOCKER_EXTRA_PARAMS <<< "${CI_DOCKER_EXTRA_PARAMS_INIT}"
-fi
-
-if [[ "$1" == "-it" ]]; then
- CI_DOCKER_EXTRA_PARAMS+=('-it')
- shift 1
-fi
-
-while [[ "$1" == "--build-arg" ]]; do
- CI_DOCKER_BUILD_ARG+=" $1"
- CI_DOCKER_BUILD_ARG+=" $2"
- shift 2
-done
-
-if [[ ! -f "${DOCKERFILE_PATH}" ]]; then
- echo "Invalid Dockerfile path: \"${DOCKERFILE_PATH}\""
- exit 1
-fi
-
-COMMAND=("$@")
-
-# Validate command line arguments.
-if [ "$#" -lt 1 ] || [ ! -e "${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" ]; then
- supported_container_types=$( ls -1 ${SCRIPT_DIR}/Dockerfile.* | \
- sed -n 's/.*Dockerfile\.\([^\/]*\)/\1/p' | tr '\n' ' ' )
- echo "Usage: $(basename $0) CONTAINER_TYPE COMMAND"
- echo " CONTAINER_TYPE can be one of [${supported_container_types}]"
- echo " COMMAND is a command (with arguments) to run inside"
- echo " the container."
- exit 1
-fi
-
-# Helper function to traverse directories up until given file is found.
-function upsearch () {
- test / == "$PWD" && return || \
- test -e "$1" && echo "$PWD" && return || \
- cd .. && upsearch "$1"
-}
-
-# Set up WORKSPACE. Jenkins will set them for you or we pick
-# reasonable defaults if you run it outside of Jenkins.
-WORKSPACE="${WORKSPACE:-${SCRIPT_DIR}/../../}"
-
-# Determine the docker image name
-DOCKER_IMG_NAME="xgb-ci.${CONTAINER_TYPE}"
-
-# Append cuda version if available
-CUDA_VERSION=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'CUDA_VERSION_ARG=[0-9]+\.[0-9]+' | grep -o -E '[0-9]+\.[0-9]+')
-# Append jdk version if available
-JDK_VERSION=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'JDK_VERSION=[0-9]+' | grep -o -E '[0-9]+')
-# Append cmake version if available
-CMAKE_VERSION=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'CMAKE_VERSION=[0-9]+\.[0-9]+' | grep -o -E '[0-9]+\.[0-9]+')
-# Append R version if available
-USE_R35=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'USE_R35=[0-9]+' | grep -o -E '[0-9]+$')
-if [[ ${USE_R35} == "1" ]]; then
- USE_R35="_r35"
-elif [[ ${USE_R35} == "0" ]]; then
- USE_R35="_no_r35"
-fi
-DOCKER_IMG_NAME=$DOCKER_IMG_NAME$CUDA_VERSION$JDK_VERSION$CMAKE_VERSION$USE_R35
-
-# Under Jenkins matrix build, the build tag may contain characters such as
-# commas (,) and equal signs (=), which are not valid inside docker image names.
-DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | sed -e 's/=/_/g' -e 's/,/-/g')
-
-# Convert to all lower-case, as per requirement of Docker image names
-DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | tr '[:upper:]' '[:lower:]')
-
-# Bash on Ubuntu on Windows
-UBUNTU_ON_WINDOWS=$([ -e /proc/version ] && grep -l Microsoft /proc/version || echo "")
-# MSYS, Git Bash, etc.
-MSYS=$([ -e /proc/version ] && grep -l MINGW /proc/version || echo "")
-
-if [[ -z "$UBUNTU_ON_WINDOWS" ]] && [[ -z "$MSYS" ]] && [[ ! "$OSTYPE" == "darwin"* ]]; then
- USER_IDS="-e CI_BUILD_UID=$( id -u ) -e CI_BUILD_GID=$( id -g ) -e CI_BUILD_USER=$( id -un ) -e CI_BUILD_GROUP=$( id -gn ) -e CI_BUILD_HOME=${WORKSPACE}"
-fi
-
-# Print arguments.
-cat <=1.4.1
-- pandas
-- matplotlib
-- dask
-- distributed
-- python-graphviz
-- pytest
-- jsonschema
-- hypothesis
-- python-graphviz
-- pip
-- py-ubjson
-- loky
-- pyarrow
diff --git a/tests/ci_build/deploy_jvm_packages.sh b/tests/ci_build/deploy_jvm_packages.sh
deleted file mode 100755
index 2cb108c8bc6f..000000000000
--- a/tests/ci_build/deploy_jvm_packages.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-set -e
-set -x
-
-if [ $# -ne 1 ]; then
- echo "Usage: $0 [spark version]"
- exit 1
-fi
-
-spark_version=$1
-
-cd jvm-packages
-rm -rf $(find . -name target)
-rm -rf ../build/
-
-## Deploy JVM packages to xgboost-maven-repo
-
-# Scala 2.12, CPU variant
-mvn --no-transfer-progress deploy -Pdefault,release-to-s3 -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true
-mvn clean
-mvn clean -Pdefault,release-to-s3
-
-# Scala 2.12, GPU variant
-mvn --no-transfer-progress install -Pgpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true
-mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true
-
-# Scala 2.13, CPU variant
-pushd ..
-python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
-popd
-mvn --no-transfer-progress deploy -Pdefault,release-to-s3 -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true
-mvn clean
-mvn clean -Pdefault,release-to-s3
-
-# Scala 2.13, GPU variant
-mvn --no-transfer-progress install -Pgpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true
-mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true
-
-set +x
-set +e
diff --git a/tests/ci_build/jenkins_tools.Groovy b/tests/ci_build/jenkins_tools.Groovy
deleted file mode 100644
index 1bc2574c6ac0..000000000000
--- a/tests/ci_build/jenkins_tools.Groovy
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/groovy
-// -*- mode: groovy -*-
-
-/* Utility functions for Jenkins */
-
-// Command to run command inside a docker container
-dockerRun = 'tests/ci_build/ci_build.sh'
-
-
-/**
- * Creates cmake and make builds
- */
-def buildFactory(buildName, conf, restricted, build_func) {
- def os = conf["os"]
- def device = conf["withGpu"] ? (conf["multiGpu"] ? "mgpu" : "gpu") : "cpu"
- def restricted_flag = restricted ? "restricted" : "unrestricted"
- def nodeReq = "${os} && ${device} && ${restricted_flag}"
- def dockerTarget = conf["withGpu"] ? "gpu" : "cpu"
- [ ("${buildName}") : { build_func("${buildName}", conf, nodeReq, dockerTarget) }
- ]
-}
-
-def cmakeOptions(conf) {
- return ([
- conf["withGpu"] ? '-DUSE_CUDA=ON' : '-DUSE_CUDA=OFF',
- conf["withNccl"] ? '-DUSE_NCCL=ON' : '-DUSE_NCCL=OFF',
- conf["withOmp"] ? '-DOPEN_MP:BOOL=ON' : '']
- ).join(" ")
-}
-
-def getBuildName(conf) {
- def gpuLabel = conf['withGpu'] ? ( (conf['multiGpu'] ? "_mgpu" : "") + "_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu"
- def ompLabel = conf['withOmp'] ? "_omp" : ""
- def pyLabel = "_py${conf['pythonVersion']}"
- return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"
-}
-
-return this
diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh
deleted file mode 100755
index a1a023046e5b..000000000000
--- a/tests/ci_build/test_python.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/bash
-set -e
-
-if [ "$#" -lt 1 ]
-then
- suite=''
- args=''
-else
- suite=$1
- shift 1
- args="$@"
-fi
-
-# Install XGBoost Python package
-function install_xgboost {
- wheel_found=0
- pip install --upgrade pip --user
- for file in python-package/dist/*.whl
- do
- if [ -e "${file}" ]
- then
- pip install --user "${file}"
- wheel_found=1
- break # need just one
- fi
- done
- if [ "$wheel_found" -eq 0 ]
- then
- pushd .
- cd python-package
- pip install --user -v .
- popd
- fi
-}
-
-function setup_pyspark_envs {
- export PYSPARK_DRIVER_PYTHON=`which python`
- export PYSPARK_PYTHON=`which python`
- export SPARK_TESTING=1
-}
-
-function unset_pyspark_envs {
- unset PYSPARK_DRIVER_PYTHON
- unset PYSPARK_PYTHON
- unset SPARK_TESTING
-}
-
-function uninstall_xgboost {
- pip uninstall -y xgboost
-}
-
-# Run specified test suite
-case "$suite" in
- gpu)
- source activate gpu_test
- set -x
- install_xgboost
- setup_pyspark_envs
- python -c 'from cupy.cuda import jitify; jitify._init_module()'
- pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu
- unset_pyspark_envs
- uninstall_xgboost
- set +x
- ;;
-
- mgpu)
- source activate gpu_test
- set -x
- install_xgboost
- setup_pyspark_envs
- python -c 'from cupy.cuda import jitify; jitify._init_module()'
- pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu
- pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask
- pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark
- pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_federated
- unset_pyspark_envs
- uninstall_xgboost
- set +x
- ;;
-
- cpu)
- source activate linux_cpu_test
- set -x
- install_xgboost
- export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1
- setup_pyspark_envs
- pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python
- pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_with_dask
- pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_with_spark
- pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_federated
- unset_pyspark_envs
- uninstall_xgboost
- set +x
- ;;
-
- cpu-arm64)
- source activate aarch64_test
- set -x
- install_xgboost
- setup_pyspark_envs
- pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python/test_basic.py tests/python/test_basic_models.py tests/python/test_model_compatibility.py
- unset_pyspark_envs
- uninstall_xgboost
- set +x
- ;;
-
- *)
- echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64} [extra args to pass to pytest]"
- exit 1
- ;;
-esac