diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 1a8098071ba3..000000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,35 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates - -version: 2 -updates: - - package-ecosystem: "maven" - directory: "/jvm-packages" - schedule: - interval: "monthly" - - package-ecosystem: "maven" - directory: "/jvm-packages/xgboost4j" - schedule: - interval: "monthly" - - package-ecosystem: "maven" - directory: "/jvm-packages/xgboost4j-gpu" - schedule: - interval: "monthly" - - package-ecosystem: "maven" - directory: "/jvm-packages/xgboost4j-example" - schedule: - interval: "monthly" - - package-ecosystem: "maven" - directory: "/jvm-packages/xgboost4j-spark" - schedule: - interval: "monthly" - - package-ecosystem: "maven" - directory: "/jvm-packages/xgboost4j-spark-gpu" - schedule: - interval: "monthly" - - package-ecosystem: "github-actions" - directory: / - schedule: - interval: "monthly" diff --git a/.github/runs-on.yml b/.github/runs-on.yml index d951a08e8273..e21895ee8c3b 100644 --- a/.github/runs-on.yml +++ b/.github/runs-on.yml @@ -34,4 +34,3 @@ runners: cpu: 32 family: ["c7i-flex", "c7i", "c7a", "c5", "c5a"] image: windows-amd64 - diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index d3208a1294d1..d0eb13c20fb6 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -15,20 +15,20 @@ jobs: timeout-minutes: 20 name: A job to run test in FreeBSD steps: - - uses: actions/checkout@v4 - with: - submodules: 'true' - - name: Test in FreeBSD - id: test - uses: vmactions/freebsd-vm@v1 - with: - usesh: true - prepare: | - pkg install -y cmake git ninja googletest + - uses: actions/checkout@v4 + with: + submodules: 'true' + - name: Test in FreeBSD + id: test + uses: vmactions/freebsd-vm@v1 + with: + usesh: true + prepare: | + pkg install -y cmake git ninja googletest - run: | - mkdir build - cd build - cmake .. -GNinja -DGOOGLE_TEST=ON - ninja -v - ./testxgboost + run: | + mkdir build + cd build + cmake .. -GNinja -DGOOGLE_TEST=ON + ninja -v + ./testxgboost diff --git a/.github/workflows/i386.yml b/.github/workflows/i386.yml index aec7e9d31087..455d6ea91033 100644 --- a/.github/workflows/i386.yml +++ b/.github/workflows/i386.yml @@ -19,25 +19,25 @@ jobs: ports: - 5000:5000 steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.7.1 - with: - driver-opts: network=host - - name: Build and push container - uses: docker/build-push-action@v6 - with: - context: . - file: tests/ci_build/Dockerfile.i386 - push: true - tags: localhost:5000/xgboost/build-32bit:latest - cache-from: type=gha - cache-to: type=gha,mode=max - - name: Build XGBoost - run: | - docker run --rm -v $PWD:/workspace -w /workspace \ - -e CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move' \ - localhost:5000/xgboost/build-32bit:latest \ - tests/ci_build/build_via_cmake.sh + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3.7.1 + with: + driver-opts: network=host + - name: Build and push container + uses: docker/build-push-action@v6 + with: + context: . + file: ops/docker/dockerfile/Dockerfile.i386 + push: true + tags: localhost:5000/xgboost/build-32bit:latest + cache-from: type=gha + cache-to: type=gha,mode=max + - name: Build XGBoost + run: | + docker run --rm -v $PWD:/workspace -w /workspace \ + -e CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move' \ + localhost:5000/xgboost/build-32bit:latest \ + bash ops/script/build_via_cmake.sh diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index dcbd9de55b50..549094d52e37 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -1,100 +1,284 @@ -name: XGBoost-JVM-Tests +name: XGBoost CI (JVM packages) on: [push, pull_request] permissions: - contents: read # to fetch code (actions/checkout) + contents: read # to fetch code (actions/checkout) concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + USE_DOCKER_CACHE: 1 + jobs: - test-with-jvm: - name: Test JVM on OS ${{ matrix.os }} + build-containers: + name: Build CI containers (${{ matrix.container_id }}) + runs-on: + - runs-on=${{ github.run_id }} + - runner=${{ matrix.runner }} + strategy: + max-parallel: 2 + matrix: + container_id: + - xgb-ci.manylinux2014_x86_64 + - xgb-ci.jvm + - xgb-ci.jvm_gpu_build + runner: [linux-amd64-cpu] + include: + - container_id: xgb-ci.manylinux2014_aarch64 + runner: linux-arm64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Build ${{ matrix.container_id }} + run: bash ops/docker_build.sh + env: + CONTAINER_ID: ${{ matrix.container_id }} + + build-jvm-manylinux2014: + name: >- + Build libxgboost4j.so targeting glibc 2.17 + (arch ${{ matrix.arch }}, runner ${{ matrix.runner }}) + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - arch: aarch64 + runner: linux-arm64-cpu + - arch: x86_64 + runner: linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.manylinux2014_${{ matrix.arch }} + - run: bash ops/pipeline/build-jvm-manylinux2014.sh ${{ matrix.arch }} + + build-jvm-gpu: + name: Build libxgboost4j.so with CUDA + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.jvm_gpu_build + - run: bash ops/pipeline/build-jvm-gpu.sh + - name: Stash files + run: bash ops/stash_artifacts.sh lib/libxgboost4j.so + env: + COMMAND: upload + KEY: build-jvm-gpu + + build-jvm-mac: + name: "Build libxgboost4j.dylib for ${{ matrix.description }}" + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - description: "MacOS (Apple Silicon)" + script: ops/pipeline/build-jvm-macos-apple-silicon.sh + runner: macos-14 + - description: "MacOS (Intel)" + script: ops/pipeline/build-jvm-macos-intel.sh + runner: macos-13 + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - run: bash ${{ matrix.script }} + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} + + build-jvm-docs: + name: Build docs for JVM packages + needs: [build-jvm-gpu] + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.jvm_gpu_build + - name: Unstash files + run: bash ops/stash_artifacts.sh lib/libxgboost4j.so + env: + COMMAND: download + KEY: build-jvm-gpu + - run: bash ops/pipeline/build-jvm-doc.sh + + build-test-jvm-packages: + name: Build and test JVM packages (Linux) + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.jvm + - name: Build and test JVM packages (Scala 2.12) + run: bash ops/pipeline/build-test-jvm-packages.sh + env: + SCALA_VERSION: 2.12 + - name: Build and test JVM packages (Scala 2.13) + run: bash ops/pipeline/build-test-jvm-packages.sh + env: + SCALA_VERSION: 2.13 + - name: Stash files + run: bash ops/stash_artifacts.sh lib/libxgboost4j.so + env: + COMMAND: upload + KEY: build-test-jvm-packages + + build-test-jvm-packages-other-os: + name: Build and test JVM packages (${{ matrix.os }}) timeout-minutes: 30 runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [windows-latest, ubuntu-latest, macos-13] + os: [windows-latest, macos-13] + + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + + - uses: actions/setup-java@v4.5.0 + with: + distribution: 'temurin' + java-version: '8' + + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: jvm_tests + environment-file: ops/conda_env/jvm_tests.yml + use-mamba: true + - name: Cache Maven packages + uses: actions/cache@v4.1.2 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} + restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} + + - name: Test XGBoost4J (Core) + run: | + cd jvm-packages + mvn test -B -pl :xgboost4j_2.12 + + - name: Publish artifact xgboost4j.dll to S3 + run: | + cd lib/ + Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll + dir + python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll ` + s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/libxgboost4j/ ` + --acl public-read --region us-west-2 + if: | + (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && + matrix.os == 'windows-latest' + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} + + test-jvm-packages-gpu: + name: Test JVM packages with CUDA + needs: [build-jvm-gpu] + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-mgpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.jvm_gpu_build + - name: Unstash files + run: bash ops/stash_artifacts.sh lib/libxgboost4j.so + env: + COMMAND: download + KEY: build-jvm-gpu + - run: bash ops/pipeline/test-jvm-gpu.sh + + deploy-jvm-packages: + name: Deploy JVM packages to S3 (${{ matrix.variant }}) + needs: [build-jvm-gpu, build-test-jvm-packages, test-jvm-packages-gpu] + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + strategy: + fail-fast: false + matrix: + include: + - variant: cpu + container_id: xgb-ci.jvm + artifact_from: build-test-jvm-packages + - variant: gpu + container_id: xgb-ci.jvm_gpu_build + artifact_from: build-jvm-gpu steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - uses: actions/setup-java@b36c23c0d998641eff861008f374ee103c25ac73 # v4.4.0 - with: - distribution: 'temurin' - java-version: '8' - - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: jvm_tests - environment-file: tests/ci_build/conda_env/jvm_tests.yml - use-mamba: true - - - name: Cache Maven packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} - restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} - - - name: Test XGBoost4J (Core) - run: | - cd jvm-packages - mvn test -B -pl :xgboost4j_2.12 - - - name: Test XGBoost4J (Core, Spark, Examples) - run: | - rm -rfv build/ - cd jvm-packages - mvn -B test - if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows - - - name: Extract branch name - shell: bash - run: | - echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT" - id: extract_branch - if: | - (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && - (matrix.os == 'windows-latest' || matrix.os == 'macos-13') - - - name: Publish artifact xgboost4j.dll to S3 - run: | - cd lib/ - Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll - dir - python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2 - if: | - (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && - matrix.os == 'windows-latest' - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} - - - name: Publish artifact libxgboost4j.dylib to S3 - shell: bash -l {0} - run: | - cd lib/ - mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib - ls - python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2 - if: | - (github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) && - matrix.os == 'macos-13' - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} - - - name: Build and Test XGBoost4J with scala 2.13 - run: | - rm -rfv build/ - cd jvm-packages - mvn -B clean install test -Pdefault,scala-2.13 - if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: ${{ matrix.container_id }} + - name: Unstash files + run: | + bash ops/stash_artifacts.sh lib/libxgboost4j.so + ls -lh lib/libxgboost4j.so + env: + COMMAND: download + KEY: ${{ matrix.artifact_from }} + - name: Deploy JVM packages to S3 + run: >- + bash ops/pipeline/deploy-jvm-packages.sh ${{ matrix.variant }} \ + ${{ matrix.container_id }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000000..70d892b1061d --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,131 @@ +name: XGBoost CI (Lint) + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + +jobs: + build-containers: + name: Build CI containers + env: + CONTAINER_ID: xgb-ci.clang_tidy + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Build ${{ env.CONTAINER_ID }} + run: bash ops/docker_build.sh + + clang-tidy: + name: Run clang-tidy + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.clang_tidy + - run: bash ops/pipeline/run-clang-tidy.sh + + python-mypy-lint: + runs-on: ubuntu-latest + name: Type and format checks for the Python package + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: python_lint + environment-file: ops/conda_env/python_lint.yml + use-mamba: true + - name: Display Conda env + shell: bash -el {0} + run: | + conda info + conda list + - name: Run mypy + shell: bash -el {0} + run: | + python ops/script/lint_python.py --format=0 --type-check=1 --pylint=0 + - name: Run formatter + shell: bash -el {0} + run: | + python ops/script/lint_python.py --format=1 --type-check=0 --pylint=0 + - name: Run pylint + shell: bash -el {0} + run: | + python ops/script/lint_python.py --format=0 --type-check=0 --pylint=1 + + cpp-lint: + runs-on: ubuntu-latest + name: Code linting for C++ + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - uses: actions/setup-python@v5.3.0 + with: + python-version: "3.10" + architecture: 'x64' + - name: Install Python packages + run: | + python -m pip install wheel setuptools cmakelint cpplint==1.6.1 pylint + - name: Run lint + run: | + python3 ops/script/lint_cpp.py + bash ops/script/lint_cmake.sh + + lintr: + runs-on: ubuntu-latest + name: Run R linters on Ubuntu + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + + - uses: r-lib/actions/setup-r@v2.11.0 + with: + r-version: "release" + + - name: Cache R packages + uses: actions/cache@v4.1.2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-release-7-${{ hashFiles('R-package/DESCRIPTION') }} + restore-keys: ${{ runner.os }}-r-release-7-${{ hashFiles('R-package/DESCRIPTION') }} + + - name: Install dependencies + shell: Rscript {0} + run: | + source("./R-package/tests/helper_scripts/install_deps.R") + + - name: Run lintr + run: | + MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/ + Rscript ops/script/lint_r.R $(pwd) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3c0a67b4f463..15822c55f0d5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,193 +1,297 @@ -# This is a basic workflow to help you get started with Actions +name: XGBoost CI -name: XGBoost-CI - -# Controls when the action will run. Triggers the workflow on push or pull request -# events but only for the master branch on: [push, pull_request] permissions: - contents: read # to fetch code (actions/checkout) + contents: read # to fetch code (actions/checkout) concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true -# A workflow run is made up of one or more jobs that can run sequentially or in parallel +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + USE_DOCKER_CACHE: 1 + jobs: - gtest-cpu: - name: Test Google C++ test (CPU) - runs-on: ${{ matrix.os }} + build-containers: + name: Build CI containers (${{ matrix.container_id }}) + runs-on: + - runs-on=${{ github.run_id }} + - runner=${{ matrix.runner }} strategy: - fail-fast: false + max-parallel: 2 matrix: - os: [macos-12] + container_id: + - xgb-ci.gpu_build_rockylinux8 + - xgb-ci.gpu_build_r_rockylinux8 + - xgb-ci.gpu + - xgb-ci.gpu_dev_ver + - xgb-ci.cpu + - xgb-ci.manylinux_2_28_x86_64 + - xgb-ci.manylinux2014_x86_64 + runner: [linux-amd64-cpu] + include: + - container_id: xgb-ci.manylinux2014_aarch64 + runner: linux-arm64-cpu + - container_id: xgb-ci.aarch64 + runner: linux-arm64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Build ${{ matrix.container_id }} + run: bash ops/docker_build.sh + env: + CONTAINER_ID: ${{ matrix.container_id }} + + build-cpu: + name: Build CPU + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.cpu + - run: bash ops/pipeline/build-cpu.sh + - name: Stash CLI executable + run: bash ops/stash_artifacts.sh ./xgboost + env: + COMMAND: upload + KEY: build-cpu + + build-cpu-arm64: + name: Build CPU ARM64 + manylinux_2_28_aarch64 wheel + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-arm64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.aarch64 + - run: bash ops/pipeline/build-cpu-arm64.sh + - name: Stash files + run: bash ops/stash_artifacts.sh ./xgboost python-package/dist/*.whl + env: + COMMAND: upload + KEY: build-cpu-arm64 + + build-cuda: + name: Build CUDA + manylinux_2_28_x86_64 wheel + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - name: Install system packages - run: | - brew install ninja libomp - - name: Build gtest binary - run: | - mkdir build - cd build - cmake .. -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja -DBUILD_DEPRECATED_CLI=ON -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=address -DCMAKE_BUILD_TYPE=RelWithDebInfo - ninja -v - - name: Run gtest binary - run: | - cd build - ./testxgboost - ctest -R TestXGBoostCLI --extra-verbose + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.gpu_build_rockylinux8 + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.manylinux_2_28_x86_64 + - run: bash ops/pipeline/build-cuda.sh + - name: Stash files + run: | + bash ops/stash_artifacts.sh \ + build/testxgboost ./xgboost python-package/dist/*.whl + env: + COMMAND: upload + KEY: build-cuda - gtest-cpu-nonomp: - name: Test Google C++ unittest (CPU Non-OMP) - runs-on: ${{ matrix.os }} + build-cuda-with-rmm: + name: Build CUDA with RMM + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.gpu_build_rockylinux8 + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.manylinux_2_28_x86_64 + - run: bash ops/pipeline/build-cuda-with-rmm.sh + - name: Stash files + run: bash ops/stash_artifacts.sh build/testxgboost + env: + COMMAND: upload + KEY: build-cuda-with-rmm + + build-manylinux2014: + name: Build manylinux2014_${{ matrix.arch }} wheel + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=${{ matrix.runner }} strategy: fail-fast: false matrix: - os: [ubuntu-latest] + include: + - arch: aarch64 + runner: linux-arm64-cpu + - arch: x86_64 + runner: linux-amd64-cpu + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.manylinux2014_${{ matrix.arch }} + - run: bash ops/pipeline/build-manylinux2014.sh ${{ matrix.arch }} + + build-gpu-rpkg: + name: Build GPU-enabled R package + needs: build-containers + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - name: Install system packages - run: | - sudo apt-get install -y --no-install-recommends ninja-build - - name: Build and install XGBoost - shell: bash -l {0} - run: | - mkdir build - cd build - cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_OPENMP=OFF -DBUILD_DEPRECATED_CLI=ON - ninja -v - - name: Run gtest binary - run: | - cd build - ctest --extra-verbose + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.gpu_build_r_rockylinux8 + - run: bash ops/pipeline/build-gpu-rpkg.sh - gtest-cpu-sycl: - name: Test Google C++ unittest (CPU SYCL) - runs-on: ${{ matrix.os }} + test-cpp-gpu: + name: >- + Run Google Tests with GPUs + (Suite ${{ matrix.suite }}, Runner ${{ matrix.runner }}) + needs: [build-cuda, build-cuda-with-rmm] + runs-on: + - runs-on=${{ github.run_id }} + - runner=${{ matrix.runner }} strategy: fail-fast: false + max-parallel: 2 matrix: - os: [ubuntu-latest] - python-version: ["3.10"] + include: + - suite: gpu + runner: linux-amd64-gpu + artifact_from: build-cuda + - suite: gpu-rmm + runner: linux-amd64-gpu + artifact_from: build-cuda-with-rmm + - suite: mgpu + runner: linux-amd64-mgpu + artifact_from: build-cuda steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: linux_sycl_test - environment-file: tests/ci_build/conda_env/linux_sycl_test.yml - use-mamba: true - - name: Display Conda env - run: | - conda info - conda list - - name: Build and install XGBoost - shell: bash -l {0} - run: | - mkdir build - cd build - cmake .. -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX - make -j$(nproc) - - name: Run gtest binary for SYCL - run: | - cd build - ./testxgboost --gtest_filter=Sycl* - - name: Run gtest binary for non SYCL - run: | - cd build - ./testxgboost --gtest_filter=-Sycl* + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: xgb-ci.gpu + - name: Unstash gtest + run: | + bash ops/stash_artifacts.sh build/testxgboost + chmod +x build/testxgboost + env: + COMMAND: download + KEY: ${{ matrix.artifact_from }} + - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }} - c-api-demo: - name: Test installing XGBoost lib + building the C API demo - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash -l {0} + test-python: + name: Run Python tests (${{ matrix.description }}) + needs: [build-cuda, build-cpu-arm64] + runs-on: + - runs-on=${{ github.run_id }} + - runner=${{ matrix.runner }} strategy: fail-fast: false + max-parallel: 2 matrix: - os: ["ubuntu-latest"] - python-version: ["3.10"] - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: cpp_test - environment-file: tests/ci_build/conda_env/cpp_test.yml - use-mamba: true - - name: Display Conda env - run: | - conda info - conda list - - - name: Build and install XGBoost static library - run: | - mkdir build - cd build - cmake .. -DBUILD_STATIC_LIB=ON -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja - ninja -v install - cd - - - name: Build and run C API demo with static - run: | - pushd . - cd demo/c-api/ - mkdir build - cd build - cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX - ninja -v - ctest - cd .. - rm -rf ./build - popd - - - name: Build and install XGBoost shared library - run: | - cd build - cmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja -DPLUGIN_FEDERATED=ON -DGOOGLE_TEST=ON - ninja -v install - ./testxgboost - cd - - - name: Build and run C API demo with shared - run: | - pushd . - cd demo/c-api/ - mkdir build - cd build - cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX - ninja -v - ctest - popd - ./tests/ci_build/verify_link.sh ./demo/c-api/build/basic/api-demo - ./tests/ci_build/verify_link.sh ./demo/c-api/build/external-memory/external-memory-demo - - cpp-lint: - runs-on: ubuntu-latest - name: Code linting for C++ + include: + - description: "single GPU" + container: xgb-ci.gpu + suite: gpu + runner: linux-amd64-gpu + artifact_from: build-cuda + - description: "single GPU, nightly deps" + container: xgb-ci.gpu_dev_ver + suite: gpu + runner: linux-amd64-gpu + artifact_from: build-cuda + - description: "multiple GPUs" + container: xgb-ci.gpu + suite: mgpu + runner: linux-amd64-mgpu + artifact_from: build-cuda + - description: "multiple GPUs, nightly deps" + container: xgb-ci.gpu_dev_ver + suite: mgpu + runner: linux-amd64-mgpu + artifact_from: build-cuda + - description: "CPU" + container: xgb-ci.cpu + suite: cpu + runner: linux-amd64-cpu + artifact_from: build-cuda + - description: "CPU ARM64" + container: xgb-ci.aarch64 + suite: cpu-arm64 + runner: linux-arm64-cpu + artifact_from: build-cpu-arm64 steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: "3.10" - architecture: 'x64' - - name: Install Python packages - run: | - python -m pip install wheel setuptools cmakelint cpplint==1.6.1 pylint - - name: Run lint - run: | - python3 tests/ci_build/lint_cpp.py - sh ./tests/ci_build/lint_cmake.sh + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Fetch container from cache + run: bash ops/docker_build.sh + env: + CONTAINER_ID: ${{ matrix.container }} + - name: Unstash Python wheel + run: | + bash ops/stash_artifacts.sh python-package/dist/*.whl ./xgboost + chmod +x ./xgboost + env: + COMMAND: download + KEY: ${{ matrix.artifact_from }} + - name: Run Python tests, ${{ matrix.description }} + run: bash ops/pipeline/test-python.sh ${{ matrix.suite }} ${{ matrix.container }} diff --git a/.github/workflows/misc.yml b/.github/workflows/misc.yml new file mode 100644 index 000000000000..1e6df46615d5 --- /dev/null +++ b/.github/workflows/misc.yml @@ -0,0 +1,120 @@ +name: XGBoost CI (misc) + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + +jobs: + gtest-cpu: + name: Test Google C++ test (CPU) + runs-on: macos-13 + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - name: Install system packages + run: | + brew install ninja libomp + - name: Build gtest binary + run: | + mkdir build + cd build + cmake .. -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja -DBUILD_DEPRECATED_CLI=ON -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=address -DCMAKE_BUILD_TYPE=RelWithDebInfo + ninja -v + - name: Run gtest binary + run: | + cd build + ./testxgboost + ctest -R TestXGBoostCLI --extra-verbose + + gtest-cpu-nonomp: + name: Test Google C++ unittest (CPU Non-OMP) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - name: Install system packages + run: | + sudo apt-get install -y --no-install-recommends ninja-build + - name: Build and install XGBoost + shell: bash -l {0} + run: | + mkdir build + cd build + cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_OPENMP=OFF -DBUILD_DEPRECATED_CLI=ON + ninja -v + - name: Run gtest binary + run: | + cd build + ctest --extra-verbose + + c-api-demo: + name: Test installing XGBoost lib + building the C API demo + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: cpp_test + environment-file: ops/conda_env/cpp_test.yml + use-mamba: true + - name: Display Conda env + run: | + conda info + conda list + - name: Build and install XGBoost static library + run: | + mkdir build + cd build + cmake .. -DBUILD_STATIC_LIB=ON -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja + ninja -v install + cd - + - name: Build and run C API demo with static + run: | + pushd . + cd demo/c-api/ + mkdir build + cd build + cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX + ninja -v + ctest + cd .. + rm -rf ./build + popd + + - name: Build and install XGBoost shared library + run: | + cd build + cmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja -DPLUGIN_FEDERATED=ON -DGOOGLE_TEST=ON + ninja -v install + ./testxgboost + cd - + - name: Build and run C API demo with shared + run: | + pushd . + cd demo/c-api/ + mkdir build + cd build + cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX + ninja -v + ctest + popd + ./ops/script/verify_link.sh ./demo/c-api/build/basic/api-demo + ./ops/script/verify_link.sh ./demo/c-api/build/external-memory/external-memory-demo diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 907cf98e1011..bcc0f5b8ba81 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -1,4 +1,4 @@ -name: XGBoost-Python-Tests +name: XGBoost CI (Python tests) on: [push, pull_request] @@ -14,67 +14,32 @@ concurrency: cancel-in-progress: true jobs: - python-mypy-lint: - runs-on: ubuntu-latest - name: Type and format checks for the Python package - strategy: - matrix: - os: [ubuntu-latest] - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: python_lint - environment-file: tests/ci_build/conda_env/python_lint.yml - use-mamba: true - - name: Display Conda env - run: | - conda info - conda list - - name: Run mypy - run: | - python tests/ci_build/lint_python.py --format=0 --type-check=1 --pylint=0 - - name: Run formatter - run: | - python tests/ci_build/lint_python.py --format=1 --type-check=0 --pylint=0 - - name: Run pylint - run: | - python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1 - python-sdist-test-on-Linux: - # Mismatched glibcxx version between system and conda forge. - runs-on: ${{ matrix.os }} - name: Test installing XGBoost Python source package on ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] + runs-on: ubuntu-latest + name: Test installing XGBoost Python source package steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: sdist_test - environment-file: tests/ci_build/conda_env/sdist_test.yml - use-mamba: true - - name: Display Conda env - run: | - conda info - conda list - - name: Build and install XGBoost - run: | - cd python-package - python --version - python -m build --sdist - pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False - cd .. - python -c 'import xgboost' + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: sdist_test + environment-file: ops/conda_env/sdist_test.yml + use-mamba: true + - name: Display Conda env + run: | + conda info + conda list + - name: Build and install XGBoost + run: | + cd python-package + python --version + python -m build --sdist + pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False + cd .. + python -c 'import xgboost' python-sdist-test: # Use system toolchain instead of conda toolchain for macos and windows. @@ -82,244 +47,97 @@ jobs: runs-on: ${{ matrix.os }} name: Test installing XGBoost Python source package on ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [macos-13, windows-latest] python-version: ["3.10"] steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - name: Install osx system dependencies - if: matrix.os == 'macos-13' - run: | - brew install ninja libomp - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - activate-environment: test - - name: Install build - run: | - conda install -c conda-forge python-build - - name: Display Conda env - run: | - conda info - conda list - - name: Build and install XGBoost - run: | - cd python-package - python --version - python -m build --sdist - pip install -v ./dist/xgboost-*.tar.gz - cd .. - python -c 'import xgboost' + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - name: Install osx system dependencies + if: matrix.os == 'macos-13' + run: | + brew install ninja libomp + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + activate-environment: test + - name: Install build + run: | + conda install -c conda-forge python-build + - name: Display Conda env + run: | + conda info + conda list + - name: Build and install XGBoost + run: | + cd python-package + python --version + python -m build --sdist + pip install -v ./dist/xgboost-*.tar.gz + cd .. + python -c 'import xgboost' python-tests-on-macos: - name: Test XGBoost Python package on ${{ matrix.config.os }} - runs-on: ${{ matrix.config.os }} + name: Test XGBoost Python package on macos-13 + runs-on: macos-13 timeout-minutes: 60 - strategy: - matrix: - config: - - {os: macos-13} - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: macos_cpu_test - environment-file: tests/ci_build/conda_env/macos_cpu_test.yml - use-mamba: true - - - name: Display Conda env - run: | - conda info - conda list - - - name: Build XGBoost on macos - run: | - brew install ninja - - mkdir build - cd build - # Set prefix, to use OpenMP library from Conda env - # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228 - # to learn why we don't use libomp from Homebrew. - cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON - ninja - - - name: Install Python package - run: | - cd python-package - python --version - pip install -v . - - - name: Test Python package - run: | - pytest -s -v -rxXs --durations=0 ./tests/python - - - name: Test Dask Interface - run: | - pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask - - python-tests-on-win: - name: Test XGBoost Python package on ${{ matrix.config.os }} - runs-on: ${{ matrix.config.os }} - timeout-minutes: 60 - strategy: - matrix: - config: - - {os: windows-latest, python-version: '3.10'} - - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - auto-update-conda: true - python-version: ${{ matrix.config.python-version }} - activate-environment: win64_env - environment-file: tests/ci_build/conda_env/win64_cpu_test.yml - - - name: Display Conda env - run: | - conda info - conda list - - - name: Build XGBoost on Windows - run: | - mkdir build_msvc - cd build_msvc - cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DBUILD_DEPRECATED_CLI=ON - cmake --build . --config Release --parallel $(nproc) - - - name: Install Python package - run: | - cd python-package - python --version - pip wheel -v . --wheel-dir dist/ - pip install ./dist/*.whl - - - name: Test Python package - run: | - pytest -s -v -rxXs --durations=0 ./tests/python - - python-tests-on-ubuntu: - name: Test XGBoost Python package on ${{ matrix.config.os }} - runs-on: ${{ matrix.config.os }} - timeout-minutes: 90 - strategy: - matrix: - config: - - {os: ubuntu-latest, python-version: "3.10"} - - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: linux_cpu_test - environment-file: tests/ci_build/conda_env/linux_cpu_test.yml - use-mamba: true - - - name: Display Conda env - run: | - conda info - conda list - - - name: Build XGBoost on Ubuntu - run: | - mkdir build - cd build - cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON - ninja - - - name: Install Python package - run: | - cd python-package - python --version - pip install -v . - - - name: Test Python package - run: | - pytest -s -v -rxXs --durations=0 ./tests/python + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' - - name: Test Dask Interface - run: | - pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: macos_cpu_test + environment-file: ops/conda_env/macos_cpu_test.yml + use-mamba: true - - name: Test PySpark Interface - shell: bash -l {0} - run: | - pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_spark + - name: Display Conda env + run: | + conda info + conda list - python-sycl-tests-on-ubuntu: - name: Test XGBoost Python package with SYCL on ${{ matrix.config.os }} - runs-on: ${{ matrix.config.os }} - timeout-minutes: 90 - strategy: - matrix: - config: - - {os: ubuntu-latest, python-version: "3.10"} + - name: Build XGBoost on macos + run: | + brew install ninja - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' + mkdir build + cd build + # Set prefix, to use OpenMP library from Conda env + # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228 + # to learn why we don't use libomp from Homebrew. + cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON + ninja - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - activate-environment: linux_sycl_test - environment-file: tests/ci_build/conda_env/linux_sycl_test.yml - use-mamba: true + - name: Install Python package + run: | + cd python-package + python --version + pip install -v . - - name: Display Conda env - run: | - conda info - conda list - - name: Build XGBoost on Ubuntu - run: | - mkdir build - cd build - cmake .. -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_PREFIX_PATH=$CONDA_PREFIX - make -j$(nproc) - - name: Install Python package - run: | - cd python-package - python --version - pip install -v . - - name: Test Python package - run: | - pytest -s -v -rxXs --durations=0 ./tests/python-sycl/ + - name: Test Python package + run: | + pytest -s -v -rxXs --durations=0 ./tests/python + - name: Test Dask Interface + run: | + pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask python-system-installation-on-ubuntu: - name: Test XGBoost Python package System Installation on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - + name: Test XGBoost Python package System Installation on Ubuntu + runs-on: ubuntu-latest steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@v4.2.2 with: submodules: 'true' - name: Set up Python 3.10 - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + uses: actions/setup-python@v5.3.0 with: python-version: "3.10" diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml deleted file mode 100644 index 3b7a8072c109..000000000000 --- a/.github/workflows/python_wheels.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: XGBoost-Python-Wheels - -on: [push, pull_request] - -permissions: - contents: read # to fetch code (actions/checkout) - -defaults: - run: - shell: bash -l {0} - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - python-wheels: - name: Build wheel for ${{ matrix.platform_id }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: macos-13 - platform_id: macosx_x86_64 - - os: macos-14 - platform_id: macosx_arm64 - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - name: Set up homebrew - uses: Homebrew/actions/setup-homebrew@68fa6aeb1ccb0596d311f2b34ec74ec21ee68e54 - - name: Install libomp - run: brew install libomp - - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 - with: - miniforge-variant: Miniforge3 - miniforge-version: latest - python-version: "3.10" - use-mamba: true - - name: Build wheels - run: bash tests/ci_build/build_python_wheels.sh ${{ matrix.platform_id }} ${{ github.sha }} - - name: Extract branch name - run: | - echo "branch=${GITHUB_REF#refs/heads/}" >> "$GITHUB_OUTPUT" - id: extract_branch - if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_') - - name: Upload Python wheel - if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_') - run: | - python -m pip install awscli - python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read --region us-west-2 - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} diff --git a/.github/workflows/python_wheels_macos.yml b/.github/workflows/python_wheels_macos.yml new file mode 100644 index 000000000000..02f21593c220 --- /dev/null +++ b/.github/workflows/python_wheels_macos.yml @@ -0,0 +1,55 @@ +name: Build Python wheels targeting MacOS + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +defaults: + run: + shell: bash -l {0} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + +jobs: + python-wheels-macos: + name: Build wheel for ${{ matrix.platform_id }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: macos-13 + platform_id: macosx_x86_64 + - os: macos-14 + platform_id: macosx_arm64 + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - name: Set up homebrew + uses: Homebrew/actions/setup-homebrew@13341b4d5e459a98bbe0b122b12c11bf90518cc8 + - name: Install libomp + run: brew install libomp + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + python-version: "3.10" + use-mamba: true + - name: Build wheels + run: bash ops/pipeline/build-python-wheels-macos.sh ${{ matrix.platform_id }} ${{ github.sha }} + - name: Upload Python wheel + if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_') + run: | + python -m pip install awscli + python -m awscli s3 cp wheelhouse/*.whl s3://xgboost-nightly-builds/${{ env.BRANCH_NAME }}/ --acl public-read --region us-west-2 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} diff --git a/.github/workflows/r_nold.yml b/.github/workflows/r_nold.yml index 4b506927e06c..6ff4aa079e95 100644 --- a/.github/workflows/r_nold.yml +++ b/.github/workflows/r_nold.yml @@ -22,23 +22,23 @@ jobs: container: image: rhub/debian-gcc-devel-nold steps: - - name: Install git and system packages - shell: bash - run: | - apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y - - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - name: Install dependencies - shell: bash -l {0} - run: | - /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" - - - name: Run R tests - shell: bash - run: | - cd R-package && \ - /tmp/R-devel/bin/R CMD INSTALL . && \ - /tmp/R-devel/bin/R -q -e "library(testthat); setwd('tests'); source('testthat.R')" + - name: Install git and system packages + shell: bash + run: | + apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y + + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + with: + submodules: 'true' + + - name: Install dependencies + shell: bash -l {0} + run: | + /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" + + - name: Run R tests + shell: bash + run: | + cd R-package && \ + /tmp/R-devel/bin/R CMD INSTALL . && \ + /tmp/R-devel/bin/R -q -e "library(testthat); setwd('tests'); source('testthat.R')" diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index c56d1f8ef943..f5e5152fa29a 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -13,98 +13,66 @@ concurrency: cancel-in-progress: true jobs: - lintr: - runs-on: ${{ matrix.config.os }} - name: Run R linters on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }} - strategy: - matrix: - config: - - {os: ubuntu-latest, r: 'release'} - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - uses: r-lib/actions/setup-r@929c772977a3a13c8733b363bf5a2f685c25dd91 # v2.9.0 - with: - r-version: ${{ matrix.config.r }} - - - name: Cache R packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} - restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} - - - name: Install dependencies - shell: Rscript {0} - run: | - source("./R-package/tests/helper_scripts/install_deps.R") - - - name: Run lintr - run: | - MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/ - Rscript tests/ci_build/lint_r.R $(pwd) - test-Rpkg: - runs-on: ${{ matrix.config.os }} - name: Test R on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }} + runs-on: ${{ matrix.os }} + name: Test R on OS ${{ matrix.os }}, R ${{ matrix.r }}, Compiler ${{ matrix.compiler }}, Build ${{ matrix.build }} strategy: fail-fast: false matrix: - config: - - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'} - - {os: ubuntu-latest, r: 'release', compiler: 'none', build: 'cmake'} + include: + - os: windows-latest + r: release + compiler: mingw + build: autotools + - os: ubuntu-latest + r: release + compiler: none + build: cmake env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} steps: - - name: Install system dependencies - run: | - sudo apt update - sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev - if: matrix.config.os == 'ubuntu-latest' - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - uses: r-lib/actions/setup-r@929c772977a3a13c8733b363bf5a2f685c25dd91 # v2.9.0 - with: - r-version: ${{ matrix.config.r }} - - - name: Cache R packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} - restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} - - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: "3.10" - architecture: 'x64' - - - uses: r-lib/actions/setup-tinytex@v2 - - - name: Install dependencies - shell: Rscript {0} - run: | - source("./R-package/tests/helper_scripts/install_deps.R") - - - name: Test R - run: | - python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool="${{ matrix.config.build }}" --task=check - if: matrix.config.compiler != 'none' - - - name: Test R - run: | - python tests/ci_build/test_r_package.py --build-tool="${{ matrix.config.build }}" --task=check - if: matrix.config.compiler == 'none' + - name: Install system dependencies + run: | + sudo apt update + sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev + if: matrix.os == 'ubuntu-latest' + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + + - uses: r-lib/actions/setup-r@v2.11.0 + with: + r-version: ${{ matrix.r }} + + - name: Cache R packages + uses: actions/cache@v4.1.2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ matrix.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} + restore-keys: ${{ runner.os }}-r-${{ matrix.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }} + + - uses: actions/setup-python@v5.3.0 + with: + python-version: "3.10" + architecture: 'x64' + + - uses: r-lib/actions/setup-tinytex@v2 + + - name: Install dependencies + shell: Rscript {0} + run: | + source("./R-package/tests/helper_scripts/install_deps.R") + + - name: Test R + run: | + python ops/script/test_r_package.py --compiler='${{ matrix.compiler }}' --build-tool="${{ matrix.build }}" --task=check + if: matrix.compiler != 'none' + + - name: Test R + run: | + python ops/script/test_r_package.py --build-tool="${{ matrix.build }}" --task=check + if: matrix.compiler == 'none' test-R-on-Debian: name: Test R package on Debian @@ -113,38 +81,38 @@ jobs: image: rhub/debian-gcc-release steps: - - name: Install system dependencies - run: | - # Must run before checkout to have the latest git installed. - # No need to add pandoc, the container has it figured out. - apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y - - - name: Trust git cloning project sources - run: | - git config --global --add safe.directory "${GITHUB_WORKSPACE}" - - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - - name: Install dependencies - shell: bash -l {0} - run: | - Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" - - - name: Test R - shell: bash -l {0} - run: | - python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check - - - uses: dorny/paths-filter@v3 - id: changes - with: - filters: | - r_package: - - 'R-package/**' - - - name: Run document check - if: steps.changes.outputs.r_package == 'true' - run: | - python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc + - name: Install system dependencies + run: | + # Must run before checkout to have the latest git installed. + # No need to add pandoc, the container has it figured out. + apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y + + - name: Trust git cloning project sources + run: | + git config --global --add safe.directory "${GITHUB_WORKSPACE}" + + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + + - name: Install dependencies + shell: bash -l {0} + run: | + Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')" + + - name: Test R + shell: bash -l {0} + run: | + python3 ops/script/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check + + - uses: dorny/paths-filter@v3 + id: changes + with: + filters: | + r_package: + - 'R-package/**' + + - name: Run document check + if: steps.changes.outputs.r_package == 'true' + run: | + python3 ops/script/test_r_package.py --r=/usr/bin/R --task=doc diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 85a9abb57e1b..8ab77ec4c382 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -22,7 +22,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@v4.2.2 with: persist-credentials: false diff --git a/.github/workflows/sycl_tests.yml b/.github/workflows/sycl_tests.yml new file mode 100644 index 000000000000..7f6214016c00 --- /dev/null +++ b/.github/workflows/sycl_tests.yml @@ -0,0 +1,86 @@ +name: XGBoost CI (oneAPI) + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +defaults: + run: + shell: bash -l {0} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + +jobs: + gtest-cpu-sycl: + name: Test Google C++ unittest (CPU SYCL) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: linux_sycl_test + environment-file: ops/conda_env/linux_sycl_test.yml + use-mamba: true + - name: Display Conda env + run: | + conda info + conda list + - name: Build and install XGBoost + run: | + mkdir build + cd build + cmake .. -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja + ninja + - name: Run gtest + run: | + cd build + ./testxgboost + + python-sycl-tests-on-ubuntu: + name: Test XGBoost Python package with SYCL + runs-on: ubuntu-latest + timeout-minutes: 90 + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + + - uses: conda-incubator/setup-miniconda@v3.1.0 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + activate-environment: linux_sycl_test + environment-file: ops/conda_env/linux_sycl_test.yml + use-mamba: true + + - name: Display Conda env + run: | + conda info + conda list + - name: Build XGBoost on Ubuntu + run: | + mkdir build + cd build + cmake .. -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -GNinja + ninja + - name: Install Python package + run: | + cd python-package + python --version + pip install -v . + - name: Test Python package + run: | + pytest -s -v -rxXs --durations=0 ./tests/python-sycl/ diff --git a/.github/workflows/update_rapids.yml b/.github/workflows/update_rapids.yml index 5e229db4c050..d6be99d00851 100644 --- a/.github/workflows/update_rapids.yml +++ b/.github/workflows/update_rapids.yml @@ -25,20 +25,20 @@ jobs: name: Check latest RAPIDS runs-on: ubuntu-latest steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - with: - submodules: 'true' - - name: Check latest RAPIDS and update conftest.sh - run: | - bash tests/buildkite/update-rapids.sh - - name: Create Pull Request - uses: peter-evans/create-pull-request@v7 - if: github.ref == 'refs/heads/master' - with: - add-paths: | - tests/buildkite - branch: create-pull-request/update-rapids - base: master - title: "[CI] Update RAPIDS to latest stable" - commit-message: "[CI] Update RAPIDS to latest stable" + - uses: actions/checkout@v4.2.2 + with: + submodules: 'true' + - name: Check latest RAPIDS and update conftest.sh + run: | + bash ops/script/update_rapids.sh + - name: Create Pull Request + uses: peter-evans/create-pull-request@v7 + if: github.ref == 'refs/heads/master' + with: + add-paths: | + tests/buildkite + branch: create-pull-request/update-rapids + base: master + title: "[CI] Update RAPIDS to latest stable" + commit-message: "[CI] Update RAPIDS to latest stable" diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 000000000000..afd9e65192ba --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,60 @@ +name: XGBoost CI (Windows) + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: powershell + +env: + BRANCH_NAME: >- + ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }} + ARTIFACT_STASH_PREFIX: cache/${{ github.repository }}/stash/${{ github.run_id }} + # TODO(hcho3): Remove + RUNS_ON_S3_BUCKET_CACHE: runs-on-s3bucketcache-dv5n3gmnaius + +jobs: + build-win64-gpu: + name: Build XGBoost for Windows with CUDA + runs-on: + - runs-on=${{ github.run_id }} + - runner=windows-cpu + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - run: powershell ops/pipeline/build-win64-gpu.ps1 + - name: Stash files + run: | + powershell ops/stash_artifacts.ps1 ` + build/testxgboost.exe xgboost.exe ` + (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) + env: + COMMAND: upload + KEY: build-win64-gpu + + test-win64-gpu: + name: Test XGBoost on Windows + needs: build-win64-gpu + runs-on: + - runs-on=${{ github.run_id }} + - runner=windows-gpu + steps: + - uses: actions/checkout@v4.2.2 + with: + submodules: "true" + - name: Unstash files + run: | + powershell ops/stash_artifacts.ps1 ` + build/testxgboost.exe xgboost.exe python-package/dist/*.whl + env: + COMMAND: download + KEY: build-win64-gpu + - run: powershell ops/pipeline/test-win64-gpu.ps1 diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 0b4594e2d2c0..c5a72724f707 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -203,7 +203,7 @@ def main(): ) print( "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n" - " python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n" + " python ops/script/change_scala_version.py --scala-version 2.13 --purge-artifacts\n" " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests -Dskip.native.build=true" ) print( diff --git a/doc/jvm/api.rst b/doc/jvm/api.rst index b9e7821aa6fa..3d56cb2c9aa4 100644 --- a/doc/jvm/api.rst +++ b/doc/jvm/api.rst @@ -5,4 +5,5 @@ API Docs for the JVM packages * `XGBoost4J Java API <../jvm_docs/javadocs/index.html>`_ * `XGBoost4J Scala API <../jvm_docs/scaladocs/xgboost4j/index.html>`_ * `XGBoost4J-Spark Scala API <../jvm_docs/scaladocs/xgboost4j-spark/index.html>`_ +* `XGBoost4J-Spark-GPU Scala API <../jvm_docs/scaladocs/xgboost4j-spark-gpu/index.html>`_ * `XGBoost4J-Flink Scala API <../jvm_docs/scaladocs/xgboost4j-flink/index.html>`_ diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 6be7b451ce14..fbd9b4ce5672 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -32,7 +32,7 @@ def cd(path): path = normpath(path) cwd = os.getcwd() os.chdir(path) - print("cd " + path) + print("cd " + path, flush=True) try: yield path finally: @@ -41,7 +41,7 @@ def cd(path): def maybe_makedirs(path): path = normpath(path) - print("mkdir -p " + path) + print("mkdir -p " + path, flush=True) try: os.makedirs(path) except OSError as e: @@ -50,14 +50,14 @@ def maybe_makedirs(path): def run(command, **kwargs): - print(command) + print(command, flush=True) subprocess.run(command, shell=True, check=True, env=os.environ, **kwargs) def cp(source, target): source = normpath(source) target = normpath(target) - print("cp {0} {1}".format(source, target)) + print("cp {0} {1}".format(source, target), flush=True) shutil.copy(source, target) @@ -78,7 +78,7 @@ def native_build(args): subprocess.check_output("/usr/libexec/java_home").strip().decode() ) - print("building Java wrapper") + print("building Java wrapper", flush=True) with cd(".."): build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build" maybe_makedirs(build_dir) @@ -123,7 +123,7 @@ def native_build(args): run("cmake .. " + " ".join(args + [generator])) break except subprocess.CalledProcessError as e: - print(f"Failed to build with generator: {generator}", e) + print(f"Failed to build with generator: {generator}", e, flush=True) with cd(os.path.pardir): shutil.rmtree(build_dir) maybe_makedirs(build_dir) @@ -132,7 +132,7 @@ def native_build(args): run("cmake --build . --config Release" + maybe_parallel_build) - print("copying native library") + print("copying native library", flush=True) library_name, os_folder = { "Windows": ("xgboost4j.dll", "windows"), "Darwin": ("libxgboost4j.dylib", "macos"), @@ -153,7 +153,7 @@ def native_build(args): maybe_makedirs(output_folder) cp("../lib/" + library_name, output_folder) - print("copying train/test files") + print("copying train/test files", flush=True) # for xgboost4j maybe_makedirs("xgboost4j/src/test/resources") diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index ad992464a2bc..4f2be9cee080 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -116,6 +116,22 @@ + + docs + + ON + true + true + true + + + xgboost4j + xgboost4j-spark + xgboost4j-spark-gpu + xgboost4j-flink + + + release diff --git a/tests/ci_build/conda_env/aarch64_test.yml b/ops/conda_env/aarch64_test.yml similarity index 100% rename from tests/ci_build/conda_env/aarch64_test.yml rename to ops/conda_env/aarch64_test.yml diff --git a/tests/ci_build/conda_env/cpp_test.yml b/ops/conda_env/cpp_test.yml similarity index 100% rename from tests/ci_build/conda_env/cpp_test.yml rename to ops/conda_env/cpp_test.yml diff --git a/tests/ci_build/conda_env/jvm_tests.yml b/ops/conda_env/jvm_tests.yml similarity index 100% rename from tests/ci_build/conda_env/jvm_tests.yml rename to ops/conda_env/jvm_tests.yml diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/ops/conda_env/linux_cpu_test.yml similarity index 100% rename from tests/ci_build/conda_env/linux_cpu_test.yml rename to ops/conda_env/linux_cpu_test.yml diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/ops/conda_env/linux_sycl_test.yml similarity index 97% rename from tests/ci_build/conda_env/linux_sycl_test.yml rename to ops/conda_env/linux_sycl_test.yml index 5b3a15f7e3b1..f1ce49492d42 100644 --- a/tests/ci_build/conda_env/linux_sycl_test.yml +++ b/ops/conda_env/linux_sycl_test.yml @@ -18,6 +18,7 @@ dependencies: - pytest-timeout - pytest-cov - dask +- ninja - dpcpp_linux-64 - onedpl-devel - intel-openmp diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/ops/conda_env/macos_cpu_test.yml similarity index 100% rename from tests/ci_build/conda_env/macos_cpu_test.yml rename to ops/conda_env/macos_cpu_test.yml diff --git a/tests/ci_build/conda_env/python_lint.yml b/ops/conda_env/python_lint.yml similarity index 100% rename from tests/ci_build/conda_env/python_lint.yml rename to ops/conda_env/python_lint.yml diff --git a/tests/ci_build/conda_env/sdist_test.yml b/ops/conda_env/sdist_test.yml similarity index 100% rename from tests/ci_build/conda_env/sdist_test.yml rename to ops/conda_env/sdist_test.yml diff --git a/tests/ci_build/conda_env/win64_test.yml b/ops/conda_env/win64_test.yml similarity index 100% rename from tests/ci_build/conda_env/win64_test.yml rename to ops/conda_env/win64_test.yml diff --git a/ops/docker/ci_container.yml b/ops/docker/ci_container.yml new file mode 100644 index 000000000000..f5eb7eb982df --- /dev/null +++ b/ops/docker/ci_container.yml @@ -0,0 +1,65 @@ +## List of CI containers with definitions and build arguments + +# Each container will be built using the definition from +# ops/docker/dockerfile/Dockerfile.CONTAINER_DEF + +rapids_versions: + stable: &rapids_version "24.10" + dev: &dev_rapids_version "24.12" + +xgb-ci.gpu_build_rockylinux8: + container_def: gpu_build_rockylinux8 + build_args: + CUDA_VERSION_ARG: "12.4.1" + NCCL_VERSION_ARG: "2.23.4-1" + RAPIDS_VERSION_ARG: *rapids_version + +xgb-ci.gpu_build_r_rockylinux8: + container_def: gpu_build_r_rockylinux8 + build_args: + CUDA_VERSION_ARG: "12.4.1" + R_VERSION_ARG: "4.3.2" + +xgb-ci.gpu: + container_def: gpu + build_args: + CUDA_VERSION_ARG: "12.4.1" + NCCL_VERSION_ARG: "2.23.4-1" + RAPIDS_VERSION_ARG: *rapids_version + +xgb-ci.gpu_dev_ver: + container_def: gpu + build_args: + CUDA_VERSION_ARG: "12.4.1" + NCCL_VERSION_ARG: "2.23.4-1" + RAPIDS_VERSION_ARG: *dev_rapids_version + RAPIDSAI_CONDA_CHANNEL_ARG: "rapidsai-nightly" + +xgb-ci.clang_tidy: + container_def: clang_tidy + build_args: + CUDA_VERSION_ARG: "12.4.1" + +xgb-ci.cpu: + container_def: cpu + +xgb-ci.aarch64: + container_def: aarch64 + +xgb-ci.manylinux_2_28_x86_64: + container_def: manylinux_2_28_x86_64 + +xgb-ci.manylinux2014_x86_64: + container_def: manylinux2014_x86_64 + +xgb-ci.manylinux2014_aarch64: + container_def: manylinux2014_aarch64 + +xgb-ci.jvm: + container_def: jvm + +xgb-ci.jvm_gpu_build: + container_def: jvm_gpu_build + build_args: + CUDA_VERSION_ARG: "12.4.1" + NCCL_VERSION_ARG: "2.23.4-1" diff --git a/ops/docker/docker_cache_ecr.yml b/ops/docker/docker_cache_ecr.yml new file mode 100644 index 000000000000..e20f35fc8020 --- /dev/null +++ b/ops/docker/docker_cache_ecr.yml @@ -0,0 +1,4 @@ +## Constants for AWS ECR (Elastic Container Registry), used for the Docker cache + +DOCKER_CACHE_ECR_ID: "492475357299" +DOCKER_CACHE_ECR_REGION: "us-west-2" diff --git a/tests/ci_build/Dockerfile.aarch64 b/ops/docker/dockerfile/Dockerfile.aarch64 similarity index 97% rename from tests/ci_build/Dockerfile.aarch64 rename to ops/docker/dockerfile/Dockerfile.aarch64 index 8d6cfaca39fa..9dff2a05230b 100644 --- a/tests/ci_build/Dockerfile.aarch64 +++ b/ops/docker/dockerfile/Dockerfile.aarch64 @@ -32,7 +32,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.clang_tidy b/ops/docker/dockerfile/Dockerfile.clang_tidy similarity index 96% rename from tests/ci_build/Dockerfile.clang_tidy rename to ops/docker/dockerfile/Dockerfile.clang_tidy index 2e7751a20185..de7d9bd3f254 100644 --- a/tests/ci_build/Dockerfile.clang_tidy +++ b/ops/docker/dockerfile/Dockerfile.clang_tidy @@ -1,4 +1,4 @@ -ARG CUDA_VERSION_ARG +ARG CUDA_VERSION_ARG=notset FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu22.04 ARG CUDA_VERSION_ARG @@ -44,7 +44,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.cpu b/ops/docker/dockerfile/Dockerfile.cpu similarity index 92% rename from tests/ci_build/Dockerfile.cpu rename to ops/docker/dockerfile/Dockerfile.cpu index 22db93572207..a426ce5da30c 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/ops/docker/dockerfile/Dockerfile.cpu @@ -41,8 +41,7 @@ RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \ COPY conda_env/linux_cpu_test.yml /scripts/ RUN mamba create -n linux_cpu_test && \ mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \ - mamba clean --all --yes && \ - conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector + mamba clean --all --yes # Install lightweight sudo (not bound to TTY) RUN set -ex; \ @@ -52,7 +51,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.gpu b/ops/docker/dockerfile/Dockerfile.gpu similarity index 76% rename from tests/ci_build/Dockerfile.gpu rename to ops/docker/dockerfile/Dockerfile.gpu index 501726e9ffba..96a532fc2ff1 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/ops/docker/dockerfile/Dockerfile.gpu @@ -1,8 +1,10 @@ -ARG CUDA_VERSION_ARG +ARG CUDA_VERSION_ARG=notset FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04 ARG CUDA_VERSION_ARG ARG RAPIDS_VERSION_ARG + # Should be first 4 digits (e.g. 24.06) ARG NCCL_VERSION_ARG +ARG RAPIDSAI_CONDA_CHANNEL_ARG="rapidsai" # Environment ENV DEBIAN_FRONTEND=noninteractive @@ -24,16 +26,16 @@ ENV PATH=/opt/miniforge/bin:$PATH RUN \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \ - mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \ - python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_SHORT_VER \ + mamba create -y -n gpu_test -c ${RAPIDSAI_CONDA_CHANNEL_ARG} -c conda-forge -c nvidia \ + python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \ "nccl>=${NCCL_SHORT_VER}" \ - dask \ - dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ + "dask<=2024.10.0" \ + "distributed<=2024.10.0" \ + "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \ python-kubernetes urllib3 graphviz hypothesis loky \ "pyspark>=3.4.0" cloudpickle cuda-python && \ - mamba clean --all --yes && \ - conda run --no-capture-output -n gpu_test pip install buildkite-test-collector + mamba clean --all --yes ENV GOSU_VERSION=1.10 ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ @@ -46,7 +48,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.gpu_build_r_rockylinux8 b/ops/docker/dockerfile/Dockerfile.gpu_build_r_rockylinux8 similarity index 97% rename from tests/ci_build/Dockerfile.gpu_build_r_rockylinux8 rename to ops/docker/dockerfile/Dockerfile.gpu_build_r_rockylinux8 index 159e5d776c16..2d18b1eeb315 100644 --- a/tests/ci_build/Dockerfile.gpu_build_r_rockylinux8 +++ b/ops/docker/dockerfile/Dockerfile.gpu_build_r_rockylinux8 @@ -1,4 +1,4 @@ -ARG CUDA_VERSION_ARG +ARG CUDA_VERSION_ARG=notset FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-rockylinux8 ARG CUDA_VERSION_ARG ARG R_VERSION_ARG @@ -52,7 +52,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.gpu_build_rockylinux8 b/ops/docker/dockerfile/Dockerfile.gpu_build_rockylinux8 similarity index 98% rename from tests/ci_build/Dockerfile.gpu_build_rockylinux8 rename to ops/docker/dockerfile/Dockerfile.gpu_build_rockylinux8 index 8869fb468e12..ae79e88b15b3 100644 --- a/tests/ci_build/Dockerfile.gpu_build_rockylinux8 +++ b/ops/docker/dockerfile/Dockerfile.gpu_build_rockylinux8 @@ -1,4 +1,4 @@ -ARG CUDA_VERSION_ARG +ARG CUDA_VERSION_ARG=notset FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-rockylinux8 ARG CUDA_VERSION_ARG ARG NCCL_VERSION_ARG @@ -76,7 +76,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.i386 b/ops/docker/dockerfile/Dockerfile.i386 similarity index 100% rename from tests/ci_build/Dockerfile.i386 rename to ops/docker/dockerfile/Dockerfile.i386 diff --git a/tests/ci_build/Dockerfile.jvm b/ops/docker/dockerfile/Dockerfile.jvm similarity index 97% rename from tests/ci_build/Dockerfile.jvm rename to ops/docker/dockerfile/Dockerfile.jvm index c4584747f5db..9fd62e52de93 100644 --- a/tests/ci_build/Dockerfile.jvm +++ b/ops/docker/dockerfile/Dockerfile.jvm @@ -37,7 +37,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/ops/docker/dockerfile/Dockerfile.jvm_gpu_build similarity index 97% rename from tests/ci_build/Dockerfile.jvm_gpu_build rename to ops/docker/dockerfile/Dockerfile.jvm_gpu_build index edb5918b8bbc..4983493a6878 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/ops/docker/dockerfile/Dockerfile.jvm_gpu_build @@ -1,4 +1,4 @@ -ARG CUDA_VERSION_ARG +ARG CUDA_VERSION_ARG=notset FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-rockylinux8 ARG CUDA_VERSION_ARG ARG NCCL_VERSION_ARG @@ -48,7 +48,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.manylinux2014_aarch64 b/ops/docker/dockerfile/Dockerfile.manylinux2014_aarch64 similarity index 82% rename from tests/ci_build/Dockerfile.manylinux2014_aarch64 rename to ops/docker/dockerfile/Dockerfile.manylinux2014_aarch64 index 9627e15c64a0..7800033f552d 100644 --- a/tests/ci_build/Dockerfile.manylinux2014_aarch64 +++ b/ops/docker/dockerfile/Dockerfile.manylinux2014_aarch64 @@ -1,5 +1,7 @@ FROM quay.io/pypa/manylinux2014_aarch64 +RUN yum update -y && yum install -y java-1.8.0-openjdk-devel + # Install lightweight sudo (not bound to TTY) ENV GOSU_VERSION=1.10 RUN set -ex; \ @@ -9,7 +11,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.manylinux2014_x86_64 b/ops/docker/dockerfile/Dockerfile.manylinux2014_x86_64 similarity index 82% rename from tests/ci_build/Dockerfile.manylinux2014_x86_64 rename to ops/docker/dockerfile/Dockerfile.manylinux2014_x86_64 index 11beb116ee43..8214b598d8d4 100644 --- a/tests/ci_build/Dockerfile.manylinux2014_x86_64 +++ b/ops/docker/dockerfile/Dockerfile.manylinux2014_x86_64 @@ -1,5 +1,7 @@ FROM quay.io/pypa/manylinux2014_x86_64 +RUN yum update -y && yum install -y java-1.8.0-openjdk-devel + # Install lightweight sudo (not bound to TTY) ENV GOSU_VERSION=1.10 RUN set -ex; \ @@ -9,7 +11,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.manylinux_2_28_x86_64 b/ops/docker/dockerfile/Dockerfile.manylinux_2_28_x86_64 similarity index 92% rename from tests/ci_build/Dockerfile.manylinux_2_28_x86_64 rename to ops/docker/dockerfile/Dockerfile.manylinux_2_28_x86_64 index 5e264e2f16e6..f5dac54b9b8f 100644 --- a/tests/ci_build/Dockerfile.manylinux_2_28_x86_64 +++ b/ops/docker/dockerfile/Dockerfile.manylinux_2_28_x86_64 @@ -9,7 +9,7 @@ RUN set -ex; \ # Default entry-point to use if running locally # It will preserve attributes of created files -COPY entrypoint.sh /scripts/ +COPY docker/entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/entrypoint.sh b/ops/docker/entrypoint.sh similarity index 70% rename from tests/ci_build/entrypoint.sh rename to ops/docker/entrypoint.sh index a0c5f56bb52d..40135c197c73 100755 --- a/tests/ci_build/entrypoint.sh +++ b/ops/docker/entrypoint.sh @@ -1,12 +1,10 @@ #!/usr/bin/env bash -# This script is a wrapper creating the same user inside container as the one -# running the ci_build.sh outside the container. It also set the home directory -# for the user inside container to match the same absolute path as the workspace -# outside of container. Do not run this manually. It does not make sense. It is -# intended to be called by ci_build.sh only. +# This wrapper script propagates the user information from the host +# to the container. This way, any files generated by processes running +# in the container will be accessible in the host. -set -e +set -euo pipefail COMMAND=("$@") @@ -19,7 +17,11 @@ else rm /this_is_writable_file_system fi -if [[ -n $CI_BUILD_UID ]] && [[ -n $CI_BUILD_GID ]]; then +## Assumption: the host passes correct user information via environment variables +## CI_BUILD_UID, CI_BUILD_GID, CI_BUILD_USER, CI_BUILD_GROUP + +if [[ -n ${CI_BUILD_UID:-} ]] && [[ -n ${CI_BUILD_GID:-} ]] +then groupadd -o -g "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" || true useradd -o -m -g "${CI_BUILD_GID}" -u "${CI_BUILD_UID}" \ "${CI_BUILD_USER}" || true diff --git a/ops/docker/extract_build_args.jq b/ops/docker/extract_build_args.jq new file mode 100644 index 000000000000..682b62cb63cb --- /dev/null +++ b/ops/docker/extract_build_args.jq @@ -0,0 +1,8 @@ +def compute_build_args($input; $container_id): + $input | + .[$container_id] | + select(.build_args != null) | + .build_args | + to_entries | + map("--build-arg " + .key + "=" + .value) | + join(" "); diff --git a/ops/docker/extract_build_args.sh b/ops/docker/extract_build_args.sh new file mode 100755 index 000000000000..0fa7b132b760 --- /dev/null +++ b/ops/docker/extract_build_args.sh @@ -0,0 +1,21 @@ +#!/bin/bash +## Extract container definition and build args from ops/docker/ci_container.yml, +## given the container ID. + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 [container_id]" + exit 1 +fi + +CONTAINER_ID="$1" +CONTAINER_DEF=$( + yq -o json ops/docker/ci_container.yml | + jq -r --arg container_id "${CONTAINER_ID}" '.[$container_id].container_def' +) +BUILD_ARGS=$( + yq -o json ops/docker/ci_container.yml | + jq -r --arg container_id "${CONTAINER_ID}" \ + 'include "ops/docker/extract_build_args"; + compute_build_args(.; $container_id)' +) +echo "CONTAINER_DEF='${CONTAINER_DEF}' BUILD_ARGS='${BUILD_ARGS}'" diff --git a/ops/docker_build.py b/ops/docker_build.py new file mode 100644 index 000000000000..1fed975ce223 --- /dev/null +++ b/ops/docker_build.py @@ -0,0 +1,137 @@ +""" +Wrapper script to build a Docker container with layer caching +""" + +import argparse +import itertools +import pathlib +import subprocess +import sys +from typing import Optional + +from docker_run import OPS_DIR, fancy_print_cli_args + + +def parse_build_args(raw_build_args: list[str]) -> dict[str, str]: + parsed_build_args = dict() + for arg in raw_build_args: + try: + key, value = arg.split("=", maxsplit=1) + except ValueError as e: + raise ValueError( + f"Build argument must be of form KEY=VALUE. Got: {arg}" + ) from e + parsed_build_args[key] = value + return parsed_build_args + + +def docker_build( + container_id: str, + *, + build_args: dict[str, str], + dockerfile_path: pathlib.Path, + docker_context_path: pathlib.Path, + cache_from: Optional[str], + cache_to: Optional[str], +) -> None: + ## Set up command-line arguments to be passed to `docker build` + # Build args + docker_build_cli_args = list( + itertools.chain.from_iterable( + [["--build-arg", f"{k}={v}"] for k, v in build_args.items()] + ) + ) + # When building an image using a non-default driver, we need to specify + # `--load` to load it to the image store. + # See https://docs.docker.com/build/builders/drivers/ + docker_build_cli_args.append("--load") + # Layer caching + if cache_from: + docker_build_cli_args.extend(["--cache-from", cache_from]) + if cache_to: + docker_build_cli_args.extend(["--cache-to", cache_to]) + # Remaining CLI args + docker_build_cli_args.extend( + [ + "--progress=plain", + "--ulimit", + "nofile=1024000:1024000", + "-t", + container_id, + "-f", + str(dockerfile_path), + str(docker_context_path), + ] + ) + cli_args = ["docker", "build"] + docker_build_cli_args + fancy_print_cli_args(cli_args) + subprocess.run(cli_args, check=True, encoding="utf-8") + + +def main(args: argparse.Namespace) -> None: + # Dockerfile to be used in docker build + dockerfile_path = ( + OPS_DIR / "docker" / "dockerfile" / f"Dockerfile.{args.container_def}" + ) + docker_context_path = OPS_DIR + + build_args = parse_build_args(args.build_arg) + + docker_build( + args.container_id, + build_args=build_args, + dockerfile_path=dockerfile_path, + docker_context_path=docker_context_path, + cache_from=args.cache_from, + cache_to=args.cache_to, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Build a Docker container") + parser.add_argument( + "--container-def", + type=str, + required=True, + help=( + "String uniquely identifying the container definition. The container " + "definition will be fetched from " + "docker/dockerfile/Dockerfile.CONTAINER_DEF." + ), + ) + parser.add_argument( + "--container-id", + type=str, + required=True, + help="String ID to assign to the newly built container", + ) + parser.add_argument( + "--build-arg", + type=str, + default=[], + action="append", + help=( + "Build-time variable(s) to be passed to `docker build`. Each variable " + "should be specified as a key-value pair in the form KEY=VALUE. " + "The variables should match the ARG instructions in the Dockerfile. " + "When passing multiple variables, specify --build-arg multiple times. " + "Example: --build-arg CUDA_VERSION_ARG=12.5 --build-arg RAPIDS_VERSION_ARG=24.10'" + ), + ) + parser.add_argument( + "--cache-from", + type=str, + help="Use an external cache source for the Docker build", + ) + parser.add_argument( + "--cache-to", + type=str, + help="Export layers from the container to an external cache destination", + ) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + parsed_args = parser.parse_args() + main(parsed_args) diff --git a/ops/docker_build.sh b/ops/docker_build.sh new file mode 100755 index 000000000000..0539f817ba8e --- /dev/null +++ b/ops/docker_build.sh @@ -0,0 +1,141 @@ +#!/bin/bash +## Build a CI container and cache the layers in AWS ECR (Elastic Container Registry). +## This script provides a convenient wrapper for ops/docker_build.py. +## Build-time variables (--build-arg) and container defintion are fetched from +## ops/docker/ci_container.yml. +## +## Note. This script takes in all inputs via environment variables. + +INPUT_DOC=$( +cat <<-EOF +Inputs + - CONTAINER_ID: String ID uniquely identifying the container (Required) + - BRANCH_NAME: Name of the current git branch or pull request (Required) + - USE_DOCKER_CACHE: If set to 1, enable caching +EOF +) + +ECR_LIFECYCLE_RULE=$( +cat <<-EOF +{ + "rules": [ + { + "rulePriority": 1, + "selection": { + "tagStatus": "any", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 30 + }, + "action": { + "type": "expire" + } + } + ] +} +EOF +) + +set -euo pipefail + +for arg in "CONTAINER_ID" "BRANCH_NAME" +do + if [[ -z "${!arg:-}" ]] + then + echo -e "Error: $arg must be set.\n${INPUT_DOC}" + exit 1 + fi +done + +# Fetch CONTAINER_DEF and BUILD_ARGS +source <(ops/docker/extract_build_args.sh ${CONTAINER_ID} | tee /dev/stderr) 2>&1 + +if [[ "${USE_DOCKER_CACHE:-}" != "1" ]] # Any value other than 1 is considered false +then + USE_DOCKER_CACHE=0 +fi + +if [[ ${USE_DOCKER_CACHE} -eq 0 ]] +then + echo "USE_DOCKER_CACHE not set; caching disabled" +else + DOCKER_CACHE_ECR_ID=$(yq ".DOCKER_CACHE_ECR_ID" ops/docker/docker_cache_ecr.yml) + DOCKER_CACHE_ECR_REGION=$(yq ".DOCKER_CACHE_ECR_REGION" ops/docker/docker_cache_ecr.yml) + DOCKER_CACHE_REPO="${DOCKER_CACHE_ECR_ID}.dkr.ecr.${DOCKER_CACHE_ECR_REGION}.amazonaws.com" + echo "Using AWS ECR; repo URL = ${DOCKER_CACHE_REPO}" + # Login for Docker registry + echo "aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} |" \ + "docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}" + aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} \ + | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO} +fi + +# Pull pre-built container from the cache +# First try locating one for the particular branch or pull request +CACHE_FROM_CMD="" +IS_CACHED=0 +if [[ ${USE_DOCKER_CACHE} -eq 1 ]] +then + DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allowed in Docker tag + DOCKER_URL="${DOCKER_CACHE_REPO}/${CONTAINER_ID}:${DOCKER_TAG}" + echo "docker pull --quiet ${DOCKER_URL}" + if time docker pull --quiet "${DOCKER_URL}" + then + echo "Found a cached container for the branch ${BRANCH_NAME}: ${DOCKER_URL}" + IS_CACHED=1 + else + # If there's no pre-built container from the cache, + # use the pre-built container from the master branch. + DOCKER_URL="${DOCKER_CACHE_REPO}/${CONTAINER_ID}:master" + echo "Could not find a cached container for the branch ${BRANCH_NAME}." \ + "Using a cached container from the master branch: ${DOCKER_URL}" + echo "docker pull --quiet ${DOCKER_URL}" + if time docker pull --quiet "${DOCKER_URL}" + then + IS_CACHED=1 + else + echo "Could not find a cached container for the master branch either." + IS_CACHED=0 + fi + fi + if [[ $IS_CACHED -eq 1 ]] + then + CACHE_FROM_CMD="--cache-from type=registry,ref=${DOCKER_URL}" + fi +fi + +# Run Docker build +set -x +python3 ops/docker_build.py \ + --container-def ${CONTAINER_DEF} \ + --container-id ${CONTAINER_ID} \ + ${BUILD_ARGS} \ + --cache-to type=inline \ + ${CACHE_FROM_CMD} +set +x + +# Now cache the new container +if [[ ${USE_DOCKER_CACHE} -eq 1 ]] +then + DOCKER_URL="${DOCKER_CACHE_REPO}/${CONTAINER_ID}:${DOCKER_TAG}" + echo "docker tag ${CONTAINER_ID} ${DOCKER_URL}" + docker tag "${CONTAINER_ID}" "${DOCKER_URL}" + + # Attempt to create Docker repository; it will fail if the repository already exists + echo "aws ecr create-repository --repository-name ${CONTAINER_ID} --region ${DOCKER_CACHE_ECR_REGION}" + if aws ecr create-repository --repository-name ${CONTAINER_ID} --region ${DOCKER_CACHE_ECR_REGION} + then + # Repository was created. Now set expiration policy + echo "aws ecr put-lifecycle-policy --repository-name ${CONTAINER_ID}" \ + "--region ${DOCKER_CACHE_ECR_REGION} --lifecycle-policy-text file:///dev/stdin" + echo "${ECR_LIFECYCLE_RULE}" | aws ecr put-lifecycle-policy --repository-name ${CONTAINER_ID} \ + --region ${DOCKER_CACHE_ECR_REGION} --lifecycle-policy-text file:///dev/stdin + fi + + echo "docker push --quiet ${DOCKER_URL}" + if ! time docker push --quiet "${DOCKER_URL}" + then + echo "ERROR: could not update Docker cache ${DOCKER_URL}" + exit 1 + fi +fi diff --git a/ops/docker_run.py b/ops/docker_run.py new file mode 100644 index 000000000000..7e61c5a14f39 --- /dev/null +++ b/ops/docker_run.py @@ -0,0 +1,168 @@ +""" +Wrapper script to run a command inside a Docker container +""" + +import argparse +import grp +import itertools +import os +import pathlib +import pwd +import subprocess +import sys +import textwrap + +OPS_DIR = pathlib.Path(__file__).expanduser().resolve().parent +PROJECT_ROOT_DIR = OPS_DIR.parent +LINEWIDTH = 88 +TEXT_WRAPPER = textwrap.TextWrapper( + width=LINEWIDTH, + initial_indent="", + subsequent_indent=" ", + break_long_words=False, + break_on_hyphens=False, +) + + +def parse_run_args(raw_run_args: str) -> list[str]: + return [x for x in raw_run_args.split() if x] + + +def get_user_ids() -> dict[str, str]: + uid = os.getuid() + gid = os.getgid() + return { + "CI_BUILD_UID": str(uid), + "CI_BUILD_USER": pwd.getpwuid(uid).pw_name, + "CI_BUILD_GID": str(gid), + "CI_BUILD_GROUP": grp.getgrgid(gid).gr_name, + } + + +def fancy_print_cli_args(cli_args: list[str]) -> None: + print( + "=" * LINEWIDTH + + "\n" + + " \\\n".join(TEXT_WRAPPER.wrap(" ".join(cli_args))) + + "\n" + + "=" * LINEWIDTH + + "\n", + flush=True, + ) + + +def docker_run( + container_id: str, + command_args: list[str], + *, + use_gpus: bool, + workdir: pathlib.Path, + user_ids: dict[str, str], + extra_args: list[str], +) -> None: + # Command-line arguments to be passed to `docker run` + docker_run_cli_args = ["--rm", "--pid=host"] + + if use_gpus: + docker_run_cli_args.extend(["--gpus", "all"]) + + docker_run_cli_args.extend(["-v", f"{workdir}:/workspace", "-w", "/workspace"]) + docker_run_cli_args.extend( + itertools.chain.from_iterable([["-e", f"{k}={v}"] for k, v in user_ids.items()]) + ) + docker_run_cli_args.extend(extra_args) + docker_run_cli_args.append(container_id) + docker_run_cli_args.extend(command_args) + + cli_args = ["docker", "run"] + docker_run_cli_args + fancy_print_cli_args(cli_args) + subprocess.run(cli_args, check=True, encoding="utf-8") + + +def main(args: argparse.Namespace) -> None: + run_args = parse_run_args(args.run_args) + user_ids = get_user_ids() + + if args.use_gpus: + print("Using NVIDIA GPUs for `docker run`") + if args.interactive: + print("Using interactive mode for `docker run`") + run_args.append("-it") + + docker_run( + args.container_id, + args.command_args, + use_gpus=args.use_gpus, + workdir=args.workdir, + user_ids=user_ids, + extra_args=run_args, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + usage=( + f"{sys.argv[0]} --container-id CONTAINER_ID [--use-gpus] [--interactive] " + "[--workdir WORKDIR] [--run-args RUN_ARGS] -- COMMAND_ARG " + "[COMMAND_ARG ...]" + ), + description="Run tasks inside a Docker container", + ) + parser.add_argument( + "--container-id", + type=str, + required=True, + help="String ID of the container to run.", + ) + parser.add_argument( + "--use-gpus", + action="store_true", + help=( + "Grant the container access to NVIDIA GPUs; requires the NVIDIA " + "Container Toolkit." + ), + ) + parser.add_argument( + "--interactive", + action="store_true", + help=( + "Run the container in the interactive mode; requires an interactive shell " + "(TTY). With this flag, you can use Ctrl-C to interrupt an long-running " + "command." + ), + ) + parser.add_argument( + "--workdir", + type=lambda p: pathlib.Path(p).expanduser().resolve(), + default=PROJECT_ROOT_DIR, + help="Path to working directory; if unset, use the project's root", + ) + parser.add_argument( + "--run-args", + type=str, + default="", + help=( + "Argument(s) to be passed to `docker run`. When passing multiple " + "arguments, use single quotes to wrap them. Example: " + "--run-args '--cap-add SYS_PTRACE --shm-size=4g'" + ), + ) + parser.add_argument( + "command_args", + metavar="COMMAND_ARG", + type=str, + nargs="+", + help=( + "Argument(s) for the command to execute. NOTE. Make sure to specify " + "double-dash (--) to clearly distinguish between the command and the " + "preceding parameters. Example: --run-args '--cap-add SYS_PTRACE " + "--shm-size=4g' -- ./myprog" + ), + ) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + parsed_args = parser.parse_args() + main(parsed_args) diff --git a/ops/packer/linux/bootstrap.sh b/ops/packer/linux/bootstrap.sh new file mode 100644 index 000000000000..57be6e14b507 --- /dev/null +++ b/ops/packer/linux/bootstrap.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -euo pipefail + +## Install Docker +# Add Docker's official GPG key: +sudo install -m 0755 -d /etc/apt/keyrings +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +sudo chmod a+r /etc/apt/keyrings/docker.asc +# Add the repository to Apt sources: +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt-get update +sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +# Allow users to use Docker without sudo +sudo usermod -aG docker ubuntu + +# Start Docker daemon +sudo systemctl is-active --quiet docker.service || sudo systemctl start docker.service +sudo systemctl is-enabled --quiet docker.service || sudo systemctl enable docker.service +sleep 10 # Docker daemon takes time to come up after installing +sudo docker info + +## Install NVIDIA Container Toolkit +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list +sudo apt-get update +sudo apt-get install -y nvidia-container-toolkit +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker + +sleep 10 +sudo docker run --rm --gpus all ubuntu nvidia-smi +sudo systemctl stop docker + +## Install AWS CLI v2 +wget -nv https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -O awscliv2.zip +unzip -q awscliv2.zip +sudo ./aws/install +rm -rf ./aws/ ./awscliv2.zip + +## Install jq and yq +sudo apt update && sudo apt install jq +mkdir yq/ +pushd yq/ +wget -nv https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64.tar.gz -O - | \ + tar xz && sudo mv ./yq_linux_amd64 /usr/bin/yq +popd +rm -rf yq/ diff --git a/ops/packer/linux/install_drivers.sh b/ops/packer/linux/install_drivers.sh new file mode 100644 index 000000000000..07309be836a8 --- /dev/null +++ b/ops/packer/linux/install_drivers.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -euo pipefail + +## Install basic tools +echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections +sudo apt-get update +sudo apt-get install -y cmake git build-essential wget ca-certificates curl unzip + +## Install CUDA Toolkit 12.6 (Driver will be installed later) +wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt-get update +sudo apt-get -y install cuda-toolkit-12-6 cuda-drivers-565 +rm cuda-keyring_1.1-1_all.deb diff --git a/ops/packer/linux/linux.pkr.hcl b/ops/packer/linux/linux.pkr.hcl new file mode 100644 index 000000000000..c6990894764a --- /dev/null +++ b/ops/packer/linux/linux.pkr.hcl @@ -0,0 +1,79 @@ +packer { + required_plugins { + amazon = { + source = "github.com/hashicorp/amazon" + version = "~> 1" + } + } +} + +locals { + ami_name_prefix = "xgboost-ci" + image_name = "RunsOn worker with Ubuntu 24.04 + CUDA driver" + region = "us-west-2" + timestamp = regex_replace(timestamp(), "[- TZ:]", "") + volume_size = 40 +} + +data "amazon-ami" "aws-ubuntu-x64" { + filters = { + name = "ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-amd64-server-*" + root-device-type = "ebs" + virtualization-type = "hvm" + } + most_recent = true + owners = ["amazon"] +} + +source "amazon-ebs" "runs-on-linux" { + source_ami = "${data.amazon-ami.aws-ubuntu-x64.id}" + ami_name = "${local.ami_name_prefix}-runs-on-linux-${local.timestamp}" + ami_description = "${local.image_name}" + ami_regions = ["${local.region}"] + ami_virtualization_type = "hvm" + associate_public_ip_address = true + communicator = "ssh" + instance_type = "g4dn.xlarge" + region = "${local.region}" + ssh_timeout = "10m" + ssh_username = "ubuntu" + ssh_file_transfer_method = "sftp" + user_data_file = "setup_ssh.sh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = "${local.volume_size}" + volume_type = "gp3" + delete_on_termination = true + } + aws_polling { # Wait up to 1 hour until the AMI is ready + delay_seconds = 15 + max_attempts = 240 + } + snapshot_tags = { + Name = "${local.image_name}" + BuildTime = "${local.timestamp}" + } + tags = { + Name = "${local.image_name}" + BuildTime = "${local.timestamp}" + } +} + +build { + sources = ["source.amazon-ebs.runs-on-linux"] + + provisioner "shell" { + script = "install_drivers.sh" + pause_after = "30s" + } + + provisioner "shell" { + expect_disconnect = true + inline = ["echo 'Reboot VM'", "sudo reboot"] + } + + provisioner "shell" { + pause_before = "1m0s" + script = "bootstrap.sh" + } +} diff --git a/ops/packer/linux/setup_ssh.sh b/ops/packer/linux/setup_ssh.sh new file mode 100644 index 000000000000..501b4da455f5 --- /dev/null +++ b/ops/packer/linux/setup_ssh.sh @@ -0,0 +1,2 @@ +#!/bin/bash +systemctl start ssh diff --git a/ops/packer/windows/bootstrap.ps1 b/ops/packer/windows/bootstrap.ps1 new file mode 100644 index 000000000000..c67f3b73fb9a --- /dev/null +++ b/ops/packer/windows/bootstrap.ps1 @@ -0,0 +1,73 @@ +## Install packages from Chocolatey + +# jq & yq +Write-Output "Installing jq and yq..." +choco install jq --version=1.7.1 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +choco install yq --version=4.40.2 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# AWS CLI +Write-Output "Installing AWS CLI..." +choco install awscli --version=2.18.11 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# Git +Write-Host '>>> Installing Git...' +choco install git --version=2.47.0 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# CMake +Write-Host '>>> Installing CMake 3.30.5...' +choco install cmake --version 3.30.5 --installargs "ADD_CMAKE_TO_PATH=System" +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# Notepad++ +Write-Host '>>> Installing Notepad++...' +choco install notepadplusplus +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# Miniforge3 +Write-Host '>>> Installing Miniforge3...' +choco install miniforge3 --params="'/InstallationType:AllUsers /RegisterPython:1 /D:C:\tools\miniforge3'" +C:\tools\miniforge3\Scripts\conda.exe init --user --system +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1" +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +conda config --set auto_activate_base false + +# Java 11 +Write-Host '>>> Installing Java 11...' +choco install openjdk11 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# Maven +Write-Host '>>> Installing Maven...' +choco install maven +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# GraphViz +Write-Host '>>> Installing GraphViz...' +choco install graphviz +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# Visual Studio 2022 Community +Write-Host '>>> Installing Visual Studio 2022 Community...' +choco install visualstudio2022community ` + --params "--wait --passive --norestart" +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +choco install visualstudio2022-workload-nativedesktop --params ` + "--wait --passive --norestart --includeOptional" +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# CUDA 12.5 +Write-Host '>>> Installing CUDA 12.5...' +choco install cuda --version=12.5.1.555 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +# R 4.3 +Write-Host '>>> Installing R...' +choco install r.project --version=4.3.2 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +choco install rtools --version=4.3.5550 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } diff --git a/ops/packer/windows/install_choco.ps1 b/ops/packer/windows/install_choco.ps1 new file mode 100644 index 000000000000..131e8129feaa --- /dev/null +++ b/ops/packer/windows/install_choco.ps1 @@ -0,0 +1,14 @@ +## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/InstallChoco.ps1 +## Author: Christopher Horrell (https://github.com/chorrell) + +$ErrorActionPreference = "Stop" + +# Install Chocolatey +# See https://chocolatey.org/install#individual +Set-ExecutionPolicy Bypass -Scope Process -Force +[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072 +Invoke-Expression ((New-Object System.Net.WebClient).DownloadString("https://community.chocolatey.org/install.ps1")) + +# Globally Auto confirm every action +# See: https://docs.chocolatey.org/en-us/faqs#why-do-i-have-to-confirm-packages-now-is-there-a-way-to-remove-this +choco feature enable -n allowGlobalConfirmation diff --git a/ops/packer/windows/setup_ssh.ps1 b/ops/packer/windows/setup_ssh.ps1 new file mode 100644 index 000000000000..a7bdee898002 --- /dev/null +++ b/ops/packer/windows/setup_ssh.ps1 @@ -0,0 +1,58 @@ + +## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/SetupSsh.ps1 +## Author: Christopher Horrell (https://github.com/chorrell) + +# Don't display progress bars +# See: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_preference_variables?view=powershell-7.3#progresspreference +$ProgressPreference = "SilentlyContinue" +$ErrorActionPreference = "Stop" + +# Install OpenSSH using Add-WindowsCapability +# See: https://learn.microsoft.com/en-us/windows-server/administration/openssh/openssh_install_firstuse?tabs=powershell#install-openssh-for-windows + +Write-Host "Installing and starting ssh-agent" +Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0 +Set-Service -Name ssh-agent -StartupType Automatic +Start-Service ssh-agent + +Write-Host "Installing and starting sshd" +Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0 +Set-Service -Name sshd -StartupType Automatic +Start-Service sshd + +# Confirm the Firewall rule is configured. It should be created automatically by setup. Run the following to verify +if (!(Get-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -ErrorAction SilentlyContinue | Select-Object Name, Enabled)) { + Write-Output "Firewall Rule 'OpenSSH-Server-In-TCP' does not exist, creating it..." + New-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -DisplayName "OpenSSH Server (sshd)" -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 +} else { + Write-Output "Firewall rule 'OpenSSH-Server-In-TCP' has been created and exists." +} + +# Set default shell to Powershell +New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value "C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe" -PropertyType String -Force + +$keyDownloadScript = Join-Path $env:ProgramData "ssh\download-key.ps1" + +@' +# Download private key to $env:ProgramData\ssh\administrators_authorized_keys +$openSSHAuthorizedKeys = Join-Path $env:ProgramData "ssh\administrators_authorized_keys" + +$keyUrl = "http://169.254.169.254/latest/meta-data/public-keys/0/openssh-key" +Invoke-WebRequest $keyUrl -OutFile $openSSHAuthorizedKeys + +# Ensure ACL for administrators_authorized_keys is correct +# See https://learn.microsoft.com/en-us/windows-server/administration/openssh/openssh_server_configuration#authorizedkeysfile +icacls.exe $openSSHAuthorizedKeys /inheritance:r /grant "Administrators:F" /grant "SYSTEM:F" +'@ | Out-File $keyDownloadScript + +# Create Task +$taskName = "DownloadKey" +$principal = New-ScheduledTaskPrincipal -UserID "NT AUTHORITY\SYSTEM" -LogonType ServiceAccount -RunLevel Highest +$action = New-ScheduledTaskAction -Execute "Powershell.exe" -Argument "-NoProfile -File ""$keyDownloadScript""" +$trigger = New-ScheduledTaskTrigger -AtStartup +Register-ScheduledTask -Action $action -Trigger $trigger -Principal $principal -TaskName $taskName -Description $taskName + +# Fetch key via $keyDownloadScript +& Powershell.exe -ExecutionPolicy Bypass -File $keyDownloadScript + + diff --git a/ops/packer/windows/sysprep.ps1 b/ops/packer/windows/sysprep.ps1 new file mode 100644 index 000000000000..a0470309f9da --- /dev/null +++ b/ops/packer/windows/sysprep.ps1 @@ -0,0 +1,14 @@ +## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/PrepareImage.ps1 +## Author: Christopher Horrell (https://github.com/chorrell) + +$ErrorActionPreference = "Stop" + +Write-Output "Cleaning up keys" +$openSSHAuthorizedKeys = Join-Path $env:ProgramData "ssh\administrators_authorized_keys" +Remove-Item -Recurse -Force -Path $openSSHAuthorizedKeys + +# Make sure task is enabled +Enable-ScheduledTask "DownloadKey" + +Write-Output "Running Sysprep" +& "$Env:Programfiles\Amazon\EC2Launch\ec2launch.exe" sysprep diff --git a/ops/packer/windows/windows.pkr.hcl b/ops/packer/windows/windows.pkr.hcl new file mode 100644 index 000000000000..4c14b7b75806 --- /dev/null +++ b/ops/packer/windows/windows.pkr.hcl @@ -0,0 +1,90 @@ +packer { + required_plugins { + amazon = { + source = "github.com/hashicorp/amazon" + version = "~> 1" + } + windows-update = { + version = "0.15.0" + source = "github.com/rgl/windows-update" + } + } +} + +locals { + ami_name_prefix = "xgboost-ci" + image_name = "RunsOn worker with Windows Server 2022 + ssh + CUDA driver" + region = "us-west-2" + timestamp = regex_replace(timestamp(), "[- TZ:]", "") + volume_size = 120 +} + +data "amazon-ami" "aws-windows-x64" { + filters = { + name = "Windows_Server-2022-English-Full-Base-*" + root-device-type = "ebs" + virtualization-type = "hvm" + } + most_recent = true + owners = ["amazon"] +} + +source "amazon-ebs" "runs-on-windows" { + source_ami = "${data.amazon-ami.aws-windows-x64.id}" + ami_name = "${local.ami_name_prefix}-runs-on-windows-${local.timestamp}" + ami_description = "${local.image_name}" + ami_regions = ["${local.region}"] + ami_virtualization_type = "hvm" + associate_public_ip_address = true + communicator = "ssh" + instance_type = "g4dn.xlarge" + region = "${local.region}" + ssh_timeout = "10m" + ssh_username = "Administrator" + ssh_file_transfer_method = "sftp" + user_data_file = "setup_ssh.ps1" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = "${local.volume_size}" + volume_type = "gp3" + delete_on_termination = true + } + aws_polling { # Wait up to 2.5 hours until the AMI is ready + delay_seconds = 15 + max_attempts = 600 + } + fast_launch { + enable_fast_launch = true + target_resource_count = 10 + } + snapshot_tags = { + Name = "${local.image_name}" + BuildTime = "${local.timestamp}" + } + tags = { + Name = "${local.image_name}" + BuildTime = "${local.timestamp}" + } +} + +build { + sources = ["source.amazon-ebs.runs-on-windows"] + + provisioner "windows-update" {} + + provisioner "powershell" { + script = "install_choco.ps1" + } + + provisioner "windows-restart" { + max_retries = 3 + } + + provisioner "powershell" { + script = "bootstrap.ps1" + } + + provisioner "powershell" { # Sysprep should run the last + script = "sysprep.ps1" + } +} diff --git a/tests/buildkite/cpu_only_pypkg.patch b/ops/patch/cpu_only_pypkg.patch similarity index 100% rename from tests/buildkite/cpu_only_pypkg.patch rename to ops/patch/cpu_only_pypkg.patch diff --git a/tests/buildkite/manylinux2014_warning.patch b/ops/patch/manylinux2014_warning.patch similarity index 100% rename from tests/buildkite/manylinux2014_warning.patch rename to ops/patch/manylinux2014_warning.patch diff --git a/tests/buildkite/remove_nccl_dep.patch b/ops/patch/remove_nccl_dep.patch similarity index 100% rename from tests/buildkite/remove_nccl_dep.patch rename to ops/patch/remove_nccl_dep.patch diff --git a/tests/buildkite/build-cpu-arm64.sh b/ops/pipeline/build-cpu-arm64.sh similarity index 53% rename from tests/buildkite/build-cpu-arm64.sh rename to ops/pipeline/build-cpu-arm64.sh index 8b3847ed58b9..4be57557ea36 100755 --- a/tests/buildkite/build-cpu-arm64.sh +++ b/ops/pipeline/build-cpu-arm64.sh @@ -1,47 +1,55 @@ #!/bin/bash -set -euo pipefail +set -euox pipefail WHEEL_TAG=manylinux_2_28_aarch64 echo "--- Build CPU code targeting ARM64" -source tests/buildkite/conftest.sh - -command_wrapper="tests/ci_build/ci_build.sh aarch64" +source ops/pipeline/enforce-ci.sh echo "--- Build libxgboost from the source" -$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \ - -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOL=ON +python3 ops/docker_run.py \ + --container-id xgb-ci.aarch64 \ + -- ops/script/build_via_cmake.sh \ + --conda-env=aarch64_test \ + -DUSE_OPENMP=ON \ + -DHIDE_CXX_SYMBOL=ON + echo "--- Run Google Test" -$command_wrapper bash -c "cd build && ctest --extra-verbose" +python3 ops/docker_run.py \ + --container-id xgb-ci.aarch64 \ + -- bash -c "cd build && ctest --extra-verbose" echo "--- Build binary wheel" -$command_wrapper bash -c \ +python3 ops/docker_run.py \ + --container-id xgb-ci.aarch64 \ + -- bash -c \ "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" -$command_wrapper python tests/ci_build/rename_whl.py \ +python3 ops/script/rename_whl.py \ --wheel-path python-package/dist/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ + --commit-hash ${GITHUB_SHA} \ --platform-tag ${WHEEL_TAG} echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" -$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl -$command_wrapper python tests/ci_build/rename_whl.py \ +python3 ops/docker_run.py \ + --container-id xgb-ci.aarch64 \ + -- auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl +python3 ops/script/rename_whl.py \ --wheel-path wheelhouse/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ + --commit-hash ${GITHUB_SHA} \ --platform-tag ${WHEEL_TAG} mv -v wheelhouse/*.whl python-package/dist/ + # Make sure that libgomp.so is vendored in the wheel -$command_wrapper bash -c \ +python3 ops/docker_run.py \ + --container-id xgb-ci.aarch64 \ + -- bash -c \ "unzip -l python-package/dist/*.whl | grep libgomp || exit -1" echo "--- Upload Python wheel" -buildkite-agent artifact upload "python-package/dist/*.whl" if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] then aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ --acl public-read --no-progress fi - -echo "--- Stash XGBoost CLI executable" -buildkite-agent artifact upload ./xgboost diff --git a/ops/pipeline/build-cpu.sh b/ops/pipeline/build-cpu.sh new file mode 100755 index 000000000000..22384d056f15 --- /dev/null +++ b/ops/pipeline/build-cpu.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -euox pipefail + +source ops/pipeline/enforce-ci.sh + +echo "--- Build CPU code" + +# This step is not necessary, but here we include it, to ensure that +# DMLC_CORE_USE_CMAKE flag is correctly propagated. We want to make sure that we use +# the configured header build/dmlc/build_config.h instead of +# include/dmlc/build_config_default.h. +rm -fv dmlc-core/include/dmlc/build_config_default.h + +# Sanitizer tests +echo "--- Run Google Test with sanitizer enabled" +# Work around https://github.com/google/sanitizers/issues/1614 +sudo sysctl vm.mmap_rnd_bits=28 +python3 ops/docker_run.py \ + --container-id xgb-ci.cpu \ + -- ops/script/build_via_cmake.sh \ + -DUSE_SANITIZER=ON \ + -DENABLED_SANITIZERS="address;leak;undefined" \ + -DCMAKE_BUILD_TYPE=Debug \ + -DSANITIZER_PATH=/usr/lib/x86_64-linux-gnu/ +python3 ops/docker_run.py \ + --container-id xgb-ci.cpu \ + --run-args '-e ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer + -e ASAN_OPTIONS=symbolize=1 + -e UBSAN_OPTIONS=print_stacktrace=1:log_path=ubsan_error.log + --cap-add SYS_PTRACE' \ + -- bash -c \ + "cd build && ./testxgboost --gtest_filter=-*DeathTest*" + +echo "--- Run Google Test" +python3 ops/docker_run.py \ + --container-id xgb-ci.cpu \ + -- ops/script/build_via_cmake.sh \ + -DCMAKE_PREFIX_PATH=/opt/grpc \ + -DPLUGIN_FEDERATED=ON +python3 ops/docker_run.py \ + --container-id xgb-ci.cpu \ + -- bash -c "cd build && ctest --extra-verbose" diff --git a/ops/pipeline/build-cuda-with-rmm.sh b/ops/pipeline/build-cuda-with-rmm.sh new file mode 100755 index 000000000000..50bbf8b340f3 --- /dev/null +++ b/ops/pipeline/build-cuda-with-rmm.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +set -euox pipefail + +WHEEL_TAG=manylinux_2_28_x86_64 + +source ops/pipeline/enforce-ci.sh + +echo "--- Build with CUDA with RMM" + +if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] +then + arch_flag="-DGPU_COMPUTE_VER=75" +else + arch_flag="" +fi + +echo "--- Build libxgboost from the source" +python3 ops/docker_run.py \ + --container-id xgb-ci.gpu_build_rockylinux8 \ + -- ops/script/build_via_cmake.sh \ + -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake" \ + -DUSE_CUDA=ON \ + -DUSE_OPENMP=ON \ + -DHIDE_CXX_SYMBOLS=ON \ + -DPLUGIN_FEDERATED=ON \ + -DPLUGIN_RMM=ON \ + -DUSE_NCCL=ON \ + -DUSE_NCCL_LIB_PATH=ON \ + -DNCCL_INCLUDE_DIR=/usr/include \ + -DUSE_DLOPEN_NCCL=ON \ + ${arch_flag} + +echo "--- Build binary wheel" +python3 ops/docker_run.py \ + --container-id xgb-ci.gpu_build_rockylinux8 \ + -- bash -c \ + "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" +python3 ops/script/rename_whl.py \ + --wheel-path python-package/dist/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} + +echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" +python3 ops/docker_run.py \ + --container-id xgb-ci.$WHEEL_TAG \ + -- auditwheel repair \ + --plat ${WHEEL_TAG} python-package/dist/*.whl +python3 ops/script/rename_whl.py \ + --wheel-path wheelhouse/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} +mv -v wheelhouse/*.whl python-package/dist/ +# Make sure that libgomp.so is vendored in the wheel +python3 ops/docker_run.py \ + --container-id xgb-ci.$WHEEL_TAG \ + -- bash -c \ + "unzip -l python-package/dist/*.whl | grep libgomp || exit -1" + +echo "--- Upload Python wheel" +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/experimental_build_with_rmm/ \ + --acl public-read --no-progress +fi diff --git a/ops/pipeline/build-cuda.sh b/ops/pipeline/build-cuda.sh new file mode 100755 index 000000000000..4ed82618da23 --- /dev/null +++ b/ops/pipeline/build-cuda.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +set -euox pipefail + +WHEEL_TAG=manylinux_2_28_x86_64 + +source ops/pipeline/enforce-ci.sh + +echo "--- Build with CUDA" + +if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] +then + arch_flag="-DGPU_COMPUTE_VER=75" +else + arch_flag="" +fi + +echo "--- Build libxgboost from the source" +set -x +# Work around https://github.com/NVIDIA/cccl/issues/1956 +# TODO(hcho3): Remove this once new CUDA version ships with CCCL 2.6.0+ +git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet +python3 ops/docker_run.py \ + --container-id xgb-ci.gpu_build_rockylinux8 \ + -- ops/script/build_via_cmake.sh \ + -DCMAKE_PREFIX_PATH="/opt/grpc;/workspace/cccl" \ + -DUSE_CUDA=ON \ + -DUSE_OPENMP=ON \ + -DHIDE_CXX_SYMBOLS=ON \ + -DPLUGIN_FEDERATED=ON \ + -DUSE_NCCL=ON \ + -DUSE_NCCL_LIB_PATH=ON \ + -DNCCL_INCLUDE_DIR=/usr/include \ + -DUSE_DLOPEN_NCCL=ON \ + ${arch_flag} + +echo "--- Build binary wheel" +python3 ops/docker_run.py \ + --container-id xgb-ci.gpu_build_rockylinux8 \ + -- bash -c \ + "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" +python3 ops/script/rename_whl.py \ + --wheel-path python-package/dist/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} + +echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" +python3 ops/docker_run.py \ + --container-id xgb-ci.manylinux_2_28_x86_64 \ + -- auditwheel repair \ + --plat ${WHEEL_TAG} python-package/dist/*.whl +python3 ops/script/rename_whl.py \ + --wheel-path wheelhouse/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} +mv -v wheelhouse/*.whl python-package/dist/ +# Make sure that libgomp.so is vendored in the wheel +python3 ops/docker_run.py \ + --container-id xgb-ci.manylinux_2_28_x86_64 \ + -- bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1" + +echo "--- Upload Python wheel" +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ + --acl public-read --no-progress + + # Generate the meta info which includes xgboost version and the commit info + python3 ops/docker_run.py \ + --container-id xgb-ci.gpu_build_rockylinux8 \ + -- python ops/script/format_wheel_meta.py \ + --wheel-path python-package/dist/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} \ + --meta-path python-package/dist/ + aws s3 cp python-package/dist/meta.json s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ + --acl public-read --no-progress +fi +echo "-- Stash C++ test executable (testxgboost)" diff --git a/tests/ci_build/build_r_pkg_with_cuda.sh b/ops/pipeline/build-gpu-rpkg-impl.sh similarity index 73% rename from tests/ci_build/build_r_pkg_with_cuda.sh rename to ops/pipeline/build-gpu-rpkg-impl.sh index 78a2afc1cdf7..2815b8f448f1 100755 --- a/tests/ci_build/build_r_pkg_with_cuda.sh +++ b/ops/pipeline/build-gpu-rpkg-impl.sh @@ -1,8 +1,12 @@ #!/bin/bash -set -e -set -x -if [ "$#" -ne 1 ] +## Build XGBoost R package with GPU support and package it in a tarball. +## Users will be able to install it without having CTK installed +## (only a compatible NVIDIA driver is needed). + +set -euo pipefail + +if [[ "$#" -ne 1 ]] then echo "Build the R package tarball with CUDA code. Usage: $0 [commit hash]" exit 1 @@ -10,7 +14,7 @@ fi commit_hash="$1" -python tests/ci_build/test_r_package.py --task=pack +python3 ops/script/test_r_package.py --task=pack mv xgboost/ xgboost_rpack/ mkdir build diff --git a/tests/buildkite/build-gpu-rpkg.sh b/ops/pipeline/build-gpu-rpkg.sh similarity index 50% rename from tests/buildkite/build-gpu-rpkg.sh rename to ops/pipeline/build-gpu-rpkg.sh index 83bcd9eb9c7b..e85826f36a26 100755 --- a/tests/buildkite/build-gpu-rpkg.sh +++ b/ops/pipeline/build-gpu-rpkg.sh @@ -1,16 +1,14 @@ #!/bin/bash -set -euo pipefail +set -euox pipefail -source tests/buildkite/conftest.sh +source ops/pipeline/enforce-ci.sh echo "--- Build XGBoost R package with CUDA" - -tests/ci_build/ci_build.sh gpu_build_r_rockylinux8 \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ - --build-arg R_VERSION_ARG=${R_VERSION} \ - tests/ci_build/build_r_pkg_with_cuda.sh \ - ${BUILDKITE_COMMIT} +python3 ops/docker_run.py \ + --container-id xgb-ci.gpu_build_r_rockylinux8 \ + -- ops/pipeline/build-gpu-rpkg-impl.sh \ + ${GITHUB_SHA} if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] then diff --git a/ops/pipeline/build-jvm-doc-impl.sh b/ops/pipeline/build-jvm-doc-impl.sh new file mode 100755 index 000000000000..4e95f284e25c --- /dev/null +++ b/ops/pipeline/build-jvm-doc-impl.sh @@ -0,0 +1,43 @@ +#!/bin/bash +## Build docs for the JVM packages and package it in a tarball +## Note: Note: this script assumes that the user has already built libxgboost4j.so +## and place it in the lib/ directory. + +if [[ $# -ne 1 ]] +then + echo "Usage: $0 [branch name]" + exit 1 +fi + +set -euo pipefail + +branch_name=$1 + +# Copy in libxgboost4j.so +mkdir -p jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/ +cp -v lib/libxgboost4j.so jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/ + +cd jvm-packages/ +# Install JVM packages in local Maven repository +mvn --no-transfer-progress install -Pdocs +# Build Scaladocs +mvn --no-transfer-progress scala:doc -Pdocs +# Build Javadocs +mvn --no-transfer-progress javadoc:javadoc -Pdocs + +# Package JVM docs in a tarball +mkdir -p tmp/scaladocs +cp -rv xgboost4j/target/reports/apidocs/ ./tmp/javadocs/ +cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/ +cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/ +cp -rv xgboost4j-spark-gpu/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark-gpu/ +cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/ + +cd tmp +tar cvjf ${branch_name}.tar.bz2 javadocs/ scaladocs/ +mv ${branch_name}.tar.bz2 .. +cd .. +rm -rfv tmp/ + +set +x +set +e diff --git a/ops/pipeline/build-jvm-doc.sh b/ops/pipeline/build-jvm-doc.sh new file mode 100755 index 000000000000..0c1afe46e212 --- /dev/null +++ b/ops/pipeline/build-jvm-doc.sh @@ -0,0 +1,19 @@ +#!/bin/bash +## Build docs for the JVM packages and package it in a tarball +## Note: Note: this script assumes that the user has already built libxgboost4j.so +## and place it in the lib/ directory. + +set -euox pipefail + +source ops/pipeline/enforce-ci.sh + +echo "--- Build JVM packages doc" +python3 ops/docker_run.py \ + --container-id xgb-ci.jvm_gpu_build \ + -- ops/pipeline/build-jvm-doc-impl.sh ${BRANCH_NAME} +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + echo "--- Upload JVM packages doc" + aws s3 cp jvm-packages/${BRANCH_NAME}.tar.bz2 \ + s3://xgboost-docs/${BRANCH_NAME}.tar.bz2 --acl public-read --no-progress +fi diff --git a/ops/pipeline/build-jvm-gpu.sh b/ops/pipeline/build-jvm-gpu.sh new file mode 100755 index 000000000000..6bcd2a327553 --- /dev/null +++ b/ops/pipeline/build-jvm-gpu.sh @@ -0,0 +1,33 @@ +#!/bin/bash +## Build libxgboost4j.so with CUDA + +set -euo pipefail + +source ops/pipeline/enforce-ci.sh + +echo "--- Build libxgboost4j.so with CUDA" + +if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] +then + arch_flag="-DGPU_COMPUTE_VER=75" +else + arch_flag="" +fi + +COMMAND=$( +cat <<-EOF +cd build-gpu/ && \ +cmake .. -DCMAKE_PREFIX_PATH=/workspace/cccl -GNinja -DUSE_CUDA=ON -DUSE_NCCL=ON \ + -DJVM_BINDINGS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ${arch_flag} && \ + ninja +EOF +) + +set -x +mkdir -p build-gpu/ +# Work around https://github.com/NVIDIA/cccl/issues/1956 +# TODO(hcho3): Remove this once new CUDA version ships with CCCL 2.6.0+ +git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet --depth 1 +python3 ops/docker_run.py \ + --container-id xgb-ci.jvm_gpu_build \ + -- bash -c "${COMMAND}" diff --git a/ops/pipeline/build-jvm-macos-apple-silicon.sh b/ops/pipeline/build-jvm-macos-apple-silicon.sh new file mode 100755 index 000000000000..99ca20d7e1e3 --- /dev/null +++ b/ops/pipeline/build-jvm-macos-apple-silicon.sh @@ -0,0 +1,44 @@ +#!/bin/bash +## Build libxgboost4j.dylib targeting MacOS (Apple Silicon) + +set -euox pipefail + +source ops/pipeline/enforce-ci.sh + +# Display system info +echo "--- Display system information" +set -x +system_profiler SPSoftwareDataType +sysctl -n machdep.cpu.brand_string +uname -m +set +x + +brew install ninja libomp + +# Build XGBoost4J binary +echo "--- Build libxgboost4j.dylib" +set -x +mkdir build +pushd build +export JAVA_HOME=$(/usr/libexec/java_home) +cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 +ninja -v +popd +rm -rf build +otool -L lib/libxgboost.dylib +set +x + +echo "--- Upload libxgboost4j.dylib" +set -x +pushd lib +libname=libxgboost4j_m1_${GITHUB_SHA}.dylib +mv -v libxgboost4j.dylib ${libname} + +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + aws s3 cp ${libname} \ + s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \ + --acl public-read --no-progress +fi +popd +set +x diff --git a/tests/buildkite/build-jvm-macos-m1.sh b/ops/pipeline/build-jvm-macos-intel.sh old mode 100644 new mode 100755 similarity index 80% rename from tests/buildkite/build-jvm-macos-m1.sh rename to ops/pipeline/build-jvm-macos-intel.sh index 1d2e5e8703bc..ecf480d3c063 --- a/tests/buildkite/build-jvm-macos-m1.sh +++ b/ops/pipeline/build-jvm-macos-intel.sh @@ -1,8 +1,9 @@ #!/bin/bash +## Build libxgboost4j.dylib targeting MacOS (Intel) -set -euo pipefail +set -euox pipefail -source tests/buildkite/conftest.sh +source ops/pipeline/enforce-ci.sh # Display system info echo "--- Display system information" @@ -12,6 +13,8 @@ sysctl -n machdep.cpu.brand_string uname -m set +x +brew install ninja libomp + # Build XGBoost4J binary echo "--- Build libxgboost4j.dylib" set -x @@ -28,9 +31,9 @@ set +x echo "--- Upload libxgboost4j.dylib" set -x pushd lib -libname=libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib +libname=libxgboost4j_intel_${GITHUB_SHA}.dylib mv -v libxgboost4j.dylib ${libname} -buildkite-agent artifact upload ${libname} + if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] then aws s3 cp ${libname} \ diff --git a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh b/ops/pipeline/build-jvm-manylinux2014.sh old mode 100644 new mode 100755 similarity index 62% rename from tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh rename to ops/pipeline/build-jvm-manylinux2014.sh index e7fec780b956..93fa03d2eb0b --- a/tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh +++ b/ops/pipeline/build-jvm-manylinux2014.sh @@ -1,25 +1,34 @@ #!/bin/bash +## Build libxgboost4j.so targeting glibc 2.17 systems -set -euo pipefail +set -euox pipefail -source tests/buildkite/conftest.sh +source ops/pipeline/enforce-ci.sh -command_wrapper="tests/ci_build/ci_build.sh jvm_manylinux2014_aarch64" +if [ $# -ne 1 ]; then + echo "Usage: $0 {x86_64,aarch64}" + exit 1 +fi + +arch=$1 + +image="xgb-ci.manylinux2014_${arch}" # Build XGBoost4J binary echo "--- Build libxgboost4j.so (targeting glibc 2.17)" set -x mkdir build -$command_wrapper bash -c \ +python3 ops/docker_run.py \ + --container-id ${image} \ + -- bash -c \ "cd build && cmake .. -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && make -j$(nproc)" ldd lib/libxgboost4j.so objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu echo "--- Upload libxgboost4j.so" pushd lib -libname=libxgboost4j_linux_arm64_${BUILDKITE_COMMIT}.so +libname=libxgboost4j_linux_${arch}_${GITHUB_SHA}.so mv -v libxgboost4j.so ${libname} -buildkite-agent artifact upload ${libname} if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] then aws s3 cp ${libname} \ diff --git a/tests/buildkite/build-manylinux2014.sh b/ops/pipeline/build-manylinux2014.sh similarity index 59% rename from tests/buildkite/build-manylinux2014.sh rename to ops/pipeline/build-manylinux2014.sh index 426d32b5c361..7802fa555187 100755 --- a/tests/buildkite/build-manylinux2014.sh +++ b/ops/pipeline/build-manylinux2014.sh @@ -1,6 +1,8 @@ #!/bin/bash -set -euo pipefail +set -euox pipefail + +source ops/pipeline/enforce-ci.sh if [ $# -ne 1 ]; then echo "Usage: $0 {x86_64,aarch64}" @@ -9,24 +11,28 @@ fi arch=$1 -source tests/buildkite/conftest.sh - WHEEL_TAG="manylinux2014_${arch}" -command_wrapper="tests/ci_build/ci_build.sh ${WHEEL_TAG}" +image="xgb-ci.$WHEEL_TAG" + python_bin="/opt/python/cp310-cp310/bin/python" echo "--- Build binary wheel for ${WHEEL_TAG}" # Patch to add warning about manylinux2014 variant -patch -p0 < tests/buildkite/remove_nccl_dep.patch -patch -p0 < tests/buildkite/manylinux2014_warning.patch -$command_wrapper bash -c \ +patch -p0 < ops/patch/remove_nccl_dep.patch +patch -p0 < ops/patch/manylinux2014_warning.patch +python3 ops/docker_run.py \ + --container-id ${image} \ + -- bash -c \ "cd python-package && ${python_bin} -m pip wheel --no-deps -v . --wheel-dir dist/" -git checkout python-package/pyproject.toml python-package/xgboost/core.py # discard the patch +git checkout python-package/pyproject.toml python-package/xgboost/core.py + # discard the patch -$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl -$command_wrapper ${python_bin} tests/ci_build/rename_whl.py \ +python3 ops/docker_run.py \ + --container-id ${image} \ + -- auditwheel repair --plat ${WHEEL_TAG} python-package/dist/*.whl +python3 ops/script/rename_whl.py \ --wheel-path wheelhouse/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ + --commit-hash ${GITHUB_SHA} \ --platform-tag ${WHEEL_TAG} rm -rf python-package/dist/ mkdir python-package/dist/ @@ -34,25 +40,25 @@ mv -v wheelhouse/*.whl python-package/dist/ echo "--- Build binary wheel for ${WHEEL_TAG} (CPU only)" # Patch to rename pkg to xgboost-cpu -patch -p0 < tests/buildkite/remove_nccl_dep.patch -patch -p0 < tests/buildkite/cpu_only_pypkg.patch -$command_wrapper bash -c \ +patch -p0 < ops/patch/remove_nccl_dep.patch +patch -p0 < ops/patch/cpu_only_pypkg.patch +python3 ops/docker_run.py \ + --container-id ${image} \ + -- bash -c \ "cd python-package && ${python_bin} -m pip wheel --no-deps -v . --wheel-dir dist/" git checkout python-package/pyproject.toml # discard the patch -$command_wrapper auditwheel repair --plat ${WHEEL_TAG} python-package/dist/xgboost_cpu-*.whl -$command_wrapper ${python_bin} tests/ci_build/rename_whl.py \ +python3 ops/docker_run.py \ + --container-id ${image} \ + -- auditwheel repair --plat ${WHEEL_TAG} python-package/dist/xgboost_cpu-*.whl +python3 ops/script/rename_whl.py \ --wheel-path wheelhouse/xgboost_cpu-*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ + --commit-hash ${GITHUB_SHA} \ --platform-tag ${WHEEL_TAG} rm -v python-package/dist/xgboost_cpu-*.whl mv -v wheelhouse/xgboost_cpu-*.whl python-package/dist/ echo "--- Upload Python wheel" -for wheel in python-package/dist/*.whl -do - buildkite-agent artifact upload "${wheel}" -done if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] then for wheel in python-package/dist/*.whl diff --git a/tests/ci_build/build_python_wheels.sh b/ops/pipeline/build-python-wheels-macos.sh old mode 100644 new mode 100755 similarity index 94% rename from tests/ci_build/build_python_wheels.sh rename to ops/pipeline/build-python-wheels-macos.sh index d9927905cf83..697514c0c3ad --- a/tests/ci_build/build_python_wheels.sh +++ b/ops/pipeline/build-python-wheels-macos.sh @@ -1,7 +1,6 @@ #!/bin/bash -set -e -set -x +set -euox pipefail if [[ $# -ne 2 ]]; then echo "Usage: $0 [platform_id] [commit ID]" @@ -31,7 +30,6 @@ if [[ "$platform_id" == macosx_* ]]; then # Set up environment variables to configure cibuildwheel export CIBW_BUILD=cp${cpython_ver}-${platform_id} export CIBW_ARCHS=${cibw_archs} - export CIBW_ENVIRONMENT=${setup_env_var} export CIBW_TEST_SKIP='*-macosx_arm64' export CIBW_BUILD_VERBOSITY=3 else @@ -44,7 +42,7 @@ export CIBW_REPAIR_WHEEL_COMMAND_MACOS="delocate-wheel --require-archs {delocate python -m pip install cibuildwheel python -m cibuildwheel python-package --output-dir wheelhouse -python tests/ci_build/rename_whl.py \ +python ops/script/rename_whl.py \ --wheel-path wheelhouse/*.whl \ --commit-hash ${commit_id} \ --platform-tag ${wheel_tag} diff --git a/ops/pipeline/build-test-jvm-packages-impl.sh b/ops/pipeline/build-test-jvm-packages-impl.sh new file mode 100755 index 000000000000..3290bf0f17c9 --- /dev/null +++ b/ops/pipeline/build-test-jvm-packages-impl.sh @@ -0,0 +1,98 @@ +#!/bin/bash +## Build and test JVM packages. +## +## Note. This script takes in all inputs via environment variables. + +INPUT_DOC=$( +cat <<-EOF +Inputs + - SCALA_VERSION: Scala version, either 2.12 or 2.13 (Required) + - USE_CUDA: Set to 1 to enable CUDA + - SKIP_NATIVE_BUILD: Set to 1 to have the JVM packages use an externally provided + libxgboost4j.so. (Usually Maven will invoke create_jni.py to + build it from scratch.) When using this option, make sure to + place libxgboost4j.so in lib/ directory. +EOF +) + +set -euo pipefail + +for arg in "SCALA_VERSION" +do + if [[ -z "${!arg:-}" ]] + then + echo -e "Error: $arg must be set.\n${INPUT_DOC}" + exit 1 + fi +done + +set -x + +# Set Scala version +if [[ "${SCALA_VERSION}" == "2.12" || "${SCALA_VERSION}" == "2.13" ]] +then + python ops/script/change_scala_version.py --scala-version ${SCALA_VERSION} --purge-artifacts +else + echo "Error: SCALA_VERSION must be either 2.12 or 2.13" + exit 2 +fi + +# If SKIP_NATIVE_BUILD is set, copy in libxgboost4j.so from lib/ +# Also copy in other files needed for testing. (Usually create_jni.py would perform this +# step, but we need to do it manually here.) +if [[ "${SKIP_NATIVE_BUILD:-}" == "1" ]] +then + echo "Using externally provided libxgboost4j.so. Locating one from lib/..." + mkdir -p jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/ + cp -v lib/libxgboost4j.so jvm-packages/xgboost4j/src/main/resources/lib/linux/x86_64/ + mkdir -p jvm-packages/xgboost4j/src/test/resources + mkdir -p jvm-packages/xgboost4j-spark/src/test/resources + mkdir -p jvm-packages/xgboost4j-spark-gpu/src/test/resources + + # Generate machine.txt.* files from the CLI regression demo + # TODO(hcho3): Remove once CLI is removed + pushd demo/CLI/regression + python3 mapfeat.py + python3 mknfold.py machine.txt 1 + popd + + cp -v demo/data/agaricus.* \ + jvm-packages/xgboost4j/src/test/resources + cp -v demo/CLI/regression/machine.txt.t* demo/data/agaricus.* \ + jvm-packages/xgboost4j-spark/src/test/resources + cp -v demo/data/veterans_lung_cancer.csv \ + jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv \ + jvm-packages/xgboost4j-spark-gpu/src/test/resources +fi + +cd jvm-packages/ + +# Ensure that XGBoost4J-Spark is compatible with multiple versions of Spark +if [[ "${USE_CUDA:-}" != "1" && "${SCALA_VERSION}" == "2.12" ]] +then + for spark_version in 3.1.3 3.2.4 3.3.4 3.4.3 + do + mvn --no-transfer-progress clean package -Dspark.version=${spark_version} \ + -pl xgboost4j,xgboost4j-spark + done +fi + +set +x +mvn_options="" +if [[ "${USE_CUDA:-}" == "1" ]] +then + mvn_options="${mvn_options} -Pgpu" +fi +if [[ "${SKIP_NATIVE_BUILD:-}" == "1" ]] +then + mvn_options="${mvn_options} -Dskip.native.build=true" +fi +set -x + +mvn --no-transfer-progress clean install ${mvn_options} + +# Integration tests +if [[ "${USE_CUDA:-}" != "1" ]] +then + mvn --no-transfer-progress test -pl xgboost4j-example +fi diff --git a/ops/pipeline/build-test-jvm-packages.sh b/ops/pipeline/build-test-jvm-packages.sh new file mode 100755 index 000000000000..1feddf2bff98 --- /dev/null +++ b/ops/pipeline/build-test-jvm-packages.sh @@ -0,0 +1,30 @@ +#!/bin/bash +## Build and test JVM packages. +## +## Note. This script takes in all inputs via environment variables. + +INPUT_DOC=$( +cat <<-EOF +Inputs + - SCALA_VERSION: Scala version, either 2.12 or 2.13 (Required) +EOF +) + +set -euo pipefail + +source ops/pipeline/enforce-ci.sh + +for arg in "SCALA_VERSION" +do + if [[ -z "${!arg:-}" ]] + then + echo -e "Error: $arg must be set.\n${INPUT_DOC}" + exit 1 + fi +done + +set -x + +python3 ops/docker_run.py --container-id xgb-ci.jvm \ + --run-args "-e SCALA_VERSION=${SCALA_VERSION}" \ + -- ops/pipeline/build-test-jvm-packages-impl.sh diff --git a/ops/pipeline/build-win64-gpu.ps1 b/ops/pipeline/build-win64-gpu.ps1 new file mode 100644 index 000000000000..76cc955059b8 --- /dev/null +++ b/ops/pipeline/build-win64-gpu.ps1 @@ -0,0 +1,46 @@ +$ErrorActionPreference = "Stop" + +. ops/pipeline/enforce-ci.ps1 + +Write-Host "--- Build libxgboost on Windows with CUDA" + +nvcc --version +if ( $is_release_branch -eq 0 ) { + $arch_flag = "-DGPU_COMPUTE_VER=75" +} else { + $arch_flag = "" +} + +# Work around https://github.com/NVIDIA/cccl/issues/1956 +# TODO(hcho3): Remove this once new CUDA version ships with CCCL 2.6.0+ +git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet +mkdir build +cd build +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON ` + -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON ` + -DCMAKE_PREFIX_PATH="$(Get-Location)/../cccl" ${arch_flag} +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +cmake --build . --config Release -- /m /nodeReuse:false ` + "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal" +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +Write-Host "--- Build binary wheel" +cd ../python-package +conda activate +pip install --user -v "pip>=23" +pip --version +pip wheel --no-deps -v . --wheel-dir dist/ +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +python ../ops/script/rename_whl.py ` + --wheel-path (Get-ChildItem dist/*.whl | Select-Object -Expand FullName) ` + --commit-hash $Env:GITHUB_SHA ` + --platform-tag win_amd64 +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +Write-Host "--- Upload Python wheel" +cd .. +if ( $is_release_branch -eq 1 ) { + aws s3 cp (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) ` + s3://xgboost-nightly-builds/$Env:BRANCH_NAME/ --acl public-read --no-progress + if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +} diff --git a/ops/pipeline/deploy-jvm-packages-impl.sh b/ops/pipeline/deploy-jvm-packages-impl.sh new file mode 100755 index 000000000000..36fd23a583d6 --- /dev/null +++ b/ops/pipeline/deploy-jvm-packages-impl.sh @@ -0,0 +1,45 @@ +#!/bin/bash +## Deploy JVM packages to xgboost-maven-repo S3 bucket + +set -euox pipefail + +if [[ "$#" -lt 1 ]] +then + echo "Usage: $0 {cpu,gpu}" + exit 1 +fi + +variant="$1" + +maven_options="-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true" +case "$variant" in + cpu) + # CPU variant + for scala_version in 2.12 2.13 + do + python ops/script/change_scala_version.py --scala-version ${scala_version} --purge-artifacts + pushd jvm-packages + mvn --no-transfer-progress deploy -Pdefault,release-to-s3 ${maven_options} + mvn clean + mvn clean -Pdefault,release-to-s3 + popd + done + ;; + gpu) + # GPU variant + for scala_version in 2.12 2.13 + do + python ops/script/change_scala_version.py --scala-version ${scala_version} --purge-artifacts + pushd jvm-packages + mvn --no-transfer-progress install -Pgpu ${maven_options} + mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu ${maven_options} + mvn clean + mvn clean -Pgpu,release-to-s3 + popd + done + ;; + *) + echo "Unrecognized argument: $variant" + exit 2 + ;; +esac diff --git a/ops/pipeline/deploy-jvm-packages.sh b/ops/pipeline/deploy-jvm-packages.sh new file mode 100755 index 000000000000..866b6dded393 --- /dev/null +++ b/ops/pipeline/deploy-jvm-packages.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -euox pipefail + +source ops/pipeline/enforce-ci.sh + +if [[ "$#" -lt 2 ]] +then + echo "Usage: $0 {cpu,gpu} {container_id}" + exit 1 +fi + +variant="$1" +container_id="$2" + +# if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +# then + echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" + python3 ops/docker_run.py --container-id "${container_id}" \ + -- ops/pipeline/deploy-jvm-packages-impl.sh "${variant}" +# fi diff --git a/ops/pipeline/enforce-ci.ps1 b/ops/pipeline/enforce-ci.ps1 new file mode 100644 index 000000000000..0528472be6cb --- /dev/null +++ b/ops/pipeline/enforce-ci.ps1 @@ -0,0 +1,28 @@ +## Ensure that a script is running inside the CI. +## Usage: . ops/pipeline/enforce-ci.ps1 + +if ( -Not $Env:GITHUB_ACTION ) { + $script_name = (Split-Path -Path $PSCommandPath -Leaf) + Write-Host "$script_name is not meant to run locally; it should run inside GitHub Actions." + Write-Host "Please inspect the content of $script_name and locate the desired command manually." + exit 1 +} + +if ( -Not $Env:BRANCH_NAME ) { + Write-Host "Make sure to define environment variable BRANCH_NAME." + exit 2 +} + +if ( $Env:GITHUB_BASE_REF ) { + $is_pull_request = 1 +} else { + $is_pull_request = 0 +} + +if ( ($Env:BRANCH_NAME -eq "master") -or ($Env:BRANCH_NAME -match "release_.+") ) { + $is_release_branch = 1 + $enforce_daily_budget = 0 +} else { + $is_release_branch = 0 + $enforce_daily_budget = 1 +} diff --git a/ops/pipeline/enforce-ci.sh b/ops/pipeline/enforce-ci.sh new file mode 100755 index 000000000000..eefb6450b98d --- /dev/null +++ b/ops/pipeline/enforce-ci.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +## Ensure that a script is running inside the CI. +## Usage: source ops/pipeline/enforce-ci.sh + +set -euo pipefail + +if [[ -z ${GITHUB_ACTION:-} ]] +then + echo "$0 is not meant to run locally; it should run inside GitHub Actions." + echo "Please inspect the content of $0 and locate the desired command manually." + exit 1 +fi + +if [[ -z ${BRANCH_NAME:-} ]] +then + echo "Make sure to define environment variable BRANCH_NAME." + exit 2 +fi + +if [[ -n ${GITHUB_BASE_REF:-} ]] +then + is_pull_request=1 +else + is_pull_request=0 +fi + +if [[ $BRANCH_NAME == "master" || $BRANCH_NAME == "release_"* || $BRANCH_NAME == "federated-secure" ]] +then + is_release_branch=1 + enforce_daily_budget=0 +else + is_release_branch=0 + enforce_daily_budget=1 +fi + +if [[ -n ${DISABLE_RELEASE:-} ]] +then + is_release_branch=0 +fi diff --git a/ops/pipeline/run-clang-tidy.sh b/ops/pipeline/run-clang-tidy.sh new file mode 100755 index 000000000000..a9ff039ee4ca --- /dev/null +++ b/ops/pipeline/run-clang-tidy.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -euox pipefail + +echo "--- Run clang-tidy" + +source ops/pipeline/enforce-ci.sh + +python3 ops/docker_run.py \ + --container-id xgb-ci.clang_tidy \ + -- python3 ops/script/run_clang_tidy.py --cuda-archs 75 diff --git a/ops/pipeline/test-cpp-gpu.sh b/ops/pipeline/test-cpp-gpu.sh new file mode 100755 index 000000000000..b66162d66a50 --- /dev/null +++ b/ops/pipeline/test-cpp-gpu.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -euox pipefail + +source ops/pipeline/enforce-ci.sh + +if [[ "$#" -lt 1 ]] +then + echo "Usage: $0 {gpu,gpu-rmm,mgpu}" + exit 1 +fi +arg=$1 + +case "${arg}" in + gpu) + echo "--- Run Google Tests, using a single GPU" + python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \ + -- nvidia-smi + python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \ + -- build/testxgboost + ;; + + gpu-rmm) + echo "--- Run Google Tests, using a single GPU, RMM enabled" + python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \ + -- nvidia-smi + python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \ + -- build/testxgboost --use-rmm-pool + ;; + + mgpu) + echo "--- Run Google Tests, using multiple GPUs" + python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \ + -- nvidia-smi + python3 ops/docker_run.py --container-id xgb-ci.gpu --use-gpus \ + --run-args='--shm-size=4g' \ + -- build/testxgboost --gtest_filter=*MGPU* + ;; + + *) + echo "Unrecognized arg: ${arg}" + exit 2 + ;; +esac diff --git a/ops/pipeline/test-jvm-gpu.sh b/ops/pipeline/test-jvm-gpu.sh new file mode 100755 index 000000000000..272b55ad0d1a --- /dev/null +++ b/ops/pipeline/test-jvm-gpu.sh @@ -0,0 +1,18 @@ +#!/bin/bash +## Test JVM packages with CUDA. Note: this script assumes that +## the user has already built libxgboost4j.so with CUDA support +## and place it in the lib/ directory. + +set -euo pipefail + +# source ops/pipeline/enforce-ci.sh + +SCALA_VERSION=2.12 + +set -x + +python3 ops/docker_run.py --container-id xgb-ci.jvm_gpu_build --use-gpus \ + -- nvidia-smi +python3 ops/docker_run.py --container-id xgb-ci.jvm_gpu_build --use-gpus \ + --run-args "-e SCALA_VERSION=${SCALA_VERSION} -e USE_CUDA=1 -e SKIP_NATIVE_BUILD=1" \ + -- ops/pipeline/build-test-jvm-packages-impl.sh diff --git a/ops/pipeline/test-python-impl.sh b/ops/pipeline/test-python-impl.sh new file mode 100755 index 000000000000..be1cb410c96c --- /dev/null +++ b/ops/pipeline/test-python-impl.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +set -eo pipefail + +if [[ "$#" -lt 1 ]] +then + echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64}" + exit 1 +fi + +suite="$1" + +# Cannot set -u before Conda env activation +case "$suite" in + gpu|mgpu) + source activate gpu_test + ;; + cpu) + source activate linux_cpu_test + ;; + cpu-arm64) + source activate aarch64_test + ;; + *) + echo "Unrecognized argument: $suite" + exit 1 + ;; +esac + +set -xu + +export PYSPARK_DRIVER_PYTHON=$(which python) +export PYSPARK_PYTHON=$(which python) +export SPARK_TESTING=1 + +pip install -v ./python-package/dist/*.whl + +case "$suite" in + gpu) + echo "-- Run Python tests, using a single GPU" + python -c 'from cupy.cuda import jitify; jitify._init_module()' + pytest -v -s -rxXs --fulltrace --durations=0 -m 'not mgpu' tests/python-gpu + ;; + mgpu) + echo "-- Run Python tests, using multiple GPUs" + python -c 'from cupy.cuda import jitify; jitify._init_module()' + pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' tests/python-gpu + pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \ + tests/test_distributed/test_gpu_with_dask + pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \ + tests/test_distributed/test_gpu_with_spark + pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \ + tests/test_distributed/test_gpu_federated + ;; + cpu) + echo "-- Run Python tests (CPU)" + export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1 + pytest -v -s -rxXs --fulltrace --durations=0 tests/python + pytest -v -s -rxXs --fulltrace --durations=0 tests/test_distributed/test_with_dask + pytest -v -s -rxXs --fulltrace --durations=0 tests/test_distributed/test_with_spark + pytest -v -s -rxXs --fulltrace --durations=0 tests/test_distributed/test_federated + ;; + cpu-arm64) + echo "-- Run Python tests (CPU, ARM64)" + pytest -v -s -rxXs --fulltrace --durations=0 \ + tests/python/test_basic.py tests/python/test_basic_models.py \ + tests/python/test_model_compatibility.py + ;; + *) + echo "Unrecognized argument: $suite" + exit 1 + ;; +esac diff --git a/ops/pipeline/test-python.sh b/ops/pipeline/test-python.sh new file mode 100755 index 000000000000..507deb37d9c0 --- /dev/null +++ b/ops/pipeline/test-python.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -euo pipefail + +source ops/pipeline/enforce-ci.sh + +if [[ "$#" -lt 2 ]] +then + echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64} {container_id}" + exit 1 +fi + +suite="$1" +container_id="$2" + +if [[ "$suite" == "gpu" || "$suite" == "mgpu" ]] +then + gpu_option="--use-gpus" +else + gpu_option="" +fi + +python3 ops/docker_run.py --container-id "${container_id}" ${gpu_option} \ + --run-args='--shm-size=4g' \ + -- bash ops/pipeline/test-python-impl.sh "${suite}" diff --git a/ops/pipeline/test-win64-gpu.ps1 b/ops/pipeline/test-win64-gpu.ps1 new file mode 100644 index 000000000000..2416d53b3f85 --- /dev/null +++ b/ops/pipeline/test-win64-gpu.ps1 @@ -0,0 +1,28 @@ +$ErrorActionPreference = "Stop" + +. ops/pipeline/enforce-ci.ps1 + +Write-Host "--- Test XGBoost on Windows with CUDA" + +nvcc --version + +Write-Host "--- Run Google Tests" +build/testxgboost.exe +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +Write-Host "--- Set up Python env" +conda activate +$env_name = -join("win64_", (New-Guid).ToString().replace("-", "")) +mamba env create -n ${env_name} --file=ops/conda_env/win64_test.yml +conda activate ${env_name} +python -m pip install ` + (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } + +Write-Host "--- Run Python tests" +python -X faulthandler -m pytest -v -s -rxXs --fulltrace tests/python +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } +Write-Host "--- Run Python tests with GPU" +python -X faulthandler -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)"` + tests/python-gpu +if ($LASTEXITCODE -ne 0) { throw "Last command failed" } diff --git a/tests/ci_build/build_via_cmake.sh b/ops/script/build_via_cmake.sh similarity index 56% rename from tests/ci_build/build_via_cmake.sh rename to ops/script/build_via_cmake.sh index 3238c41e1bcb..86e3677f4392 100755 --- a/tests/ci_build/build_via_cmake.sh +++ b/ops/script/build_via_cmake.sh @@ -1,9 +1,17 @@ -#!/usr/bin/env bash -set -e +#!/bin/bash -if [[ "$1" == --conda-env=* ]] +set -euo pipefail + +if [[ "$#" -lt 1 ]] +then + conda_env="" +else + conda_env="$1" +fi + +if [[ "${conda_env}" == --conda-env=* ]] then - conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -) + conda_env=$(echo "${conda_env}" | sed 's/^--conda-env=//g' -) echo "Activating Conda environment ${conda_env}" shift 1 cmake_args="$@" @@ -26,7 +34,17 @@ mkdir build cd build # Disable CMAKE_COMPILE_WARNING_AS_ERROR option temporarily until # https://github.com/dmlc/xgboost/issues/10400 is fixed -cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DENABLE_ALL_WARNINGS=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF -GNinja ${cmake_prefix_flag} -DHIDE_CXX_SYMBOLS=ON -DBUILD_DEPRECATED_CLI=ON +set -x +cmake .. ${cmake_args} \ + -DGOOGLE_TEST=ON \ + -DUSE_DMLC_GTEST=ON \ + -DENABLE_ALL_WARNINGS=ON \ + -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -GNinja \ + ${cmake_prefix_flag} \ + -DHIDE_CXX_SYMBOLS=ON \ + -DBUILD_DEPRECATED_CLI=ON ninja clean time ninja -v cd .. +set +x diff --git a/dev/change_scala_version.py b/ops/script/change_scala_version.py similarity index 93% rename from dev/change_scala_version.py rename to ops/script/change_scala_version.py index c8a9b54ccf91..ed475a1f9582 100644 --- a/dev/change_scala_version.py +++ b/ops/script/change_scala_version.py @@ -4,7 +4,7 @@ import shutil -def main(args): +def main(args: argparse.Namespace) -> None: if args.scala_version == "2.12": scala_ver = "2.12" scala_patchver = "2.12.18" @@ -20,6 +20,9 @@ def main(args): if target.is_dir(): print(f"Removing {target}...") shutil.rmtree(target) + for target in pathlib.Path("jvm-packages/").glob("**/*.so"): + print(f"Removing {target}...") + target.unlink() # Update pom.xml for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"): diff --git a/tests/ci_build/change_version.py b/ops/script/change_version.py similarity index 100% rename from tests/ci_build/change_version.py rename to ops/script/change_version.py diff --git a/tests/ci_build/format_wheel_meta.py b/ops/script/format_wheel_meta.py similarity index 92% rename from tests/ci_build/format_wheel_meta.py rename to ops/script/format_wheel_meta.py index 9e7bad907687..a7def879905e 100644 --- a/tests/ci_build/format_wheel_meta.py +++ b/ops/script/format_wheel_meta.py @@ -2,18 +2,19 @@ Script to generate meta.json to store metadata for a nightly build of XGBoost Python package. """ + +import argparse import json import pathlib -from argparse import ArgumentParser -def main(args): +def main(args: argparse.Namespace) -> None: wheel_path = pathlib.Path(args.wheel_path).expanduser().resolve() if not wheel_path.exists(): raise ValueError(f"Wheel cannot be found at path {wheel_path}") if not wheel_path.is_file(): raise ValueError(f"Path {wheel_path} is not a valid file") - wheel_dir, wheel_name = wheel_path.parent, wheel_path.name + wheel_name = wheel_path.name meta_path = pathlib.Path(args.meta_path) if not meta_path.exists(): @@ -36,7 +37,7 @@ def main(args): if __name__ == "__main__": - parser = ArgumentParser( + parser = argparse.ArgumentParser( description="Format meta.json encoding the latest nightly version of the Python wheel" ) parser.add_argument( diff --git a/tests/ci_build/lint_cmake.sh b/ops/script/lint_cmake.sh old mode 100644 new mode 100755 similarity index 94% rename from tests/ci_build/lint_cmake.sh rename to ops/script/lint_cmake.sh index d67ecd0844ed..55aeb20e8fb2 --- a/tests/ci_build/lint_cmake.sh +++ b/ops/script/lint_cmake.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -e +set -euo pipefail cmake_files=$( find . -name CMakeLists.txt -o -path "./cmake/*.cmake" \ diff --git a/tests/ci_build/lint_cpp.py b/ops/script/lint_cpp.py similarity index 86% rename from tests/ci_build/lint_cpp.py rename to ops/script/lint_cpp.py index d4775d6b6b3e..2d00b219ceab 100644 --- a/tests/ci_build/lint_cpp.py +++ b/ops/script/lint_cpp.py @@ -2,6 +2,7 @@ import os import re import sys +from typing import TextIO import cpplint from cpplint import _cpplint_state @@ -9,7 +10,7 @@ CXX_SUFFIX = set(["cc", "c", "cpp", "h", "cu", "hpp"]) -def filepath_enumerate(paths): +def filepath_enumerate(paths: list[str]) -> list[str]: """Enumerate the file paths of all subfiles of the list of paths""" out = [] for path in paths: @@ -22,7 +23,7 @@ def filepath_enumerate(paths): return out -def get_header_guard_dmlc(filename): +def get_header_guard_dmlc(filename: str) -> str: """Get Header Guard Convention for DMLC Projects. For headers in include, directly use the path @@ -54,11 +55,10 @@ def get_header_guard_dmlc(filename): class Lint: - def __init__(self): + def __init__(self) -> None: self.project_name = "xgboost" - self.cpp_header_map = {} - self.cpp_src_map = {} - self.python_map = {} + self.cpp_header_map: dict[str, dict[str, int]] = {} + self.cpp_src_map: dict[str, dict[str, int]] = {} self.pylint_cats = set(["error", "warning", "convention", "refactor"]) # setup cpp lint @@ -78,7 +78,7 @@ def __init__(self): cpplint._SetCountingStyle("toplevel") cpplint._line_length = 100 - def process_cpp(self, path, suffix): + def process_cpp(self, path: str, suffix: str) -> None: """Process a cpp file.""" _cpplint_state.ResetErrorCounts() cpplint.ProcessFile(str(path), _cpplint_state.verbose_level) @@ -91,7 +91,9 @@ def process_cpp(self, path, suffix): self.cpp_src_map[str(path)] = errors @staticmethod - def _print_summary_map(strm, result_map, ftype): + def _print_summary_map( + strm: TextIO, result_map: dict[str, dict[str, int]], ftype: str + ) -> int: """Print summary of certain result map.""" if len(result_map) == 0: return 0 @@ -105,7 +107,7 @@ def _print_summary_map(strm, result_map, ftype): ) return len(result_map) - npass - def print_summary(self, strm): + def print_summary(self, strm: TextIO) -> int: """Print summary of lint.""" nerr = 0 nerr += Lint._print_summary_map(strm, self.cpp_header_map, "cpp-header") @@ -122,7 +124,7 @@ def print_summary(self, strm): cpplint.GetHeaderGuardCPPVariable = get_header_guard_dmlc -def process(fname, allow_type): +def process(fname: str, allow_type: list[str]) -> None: """Process a file.""" fname = str(fname) arr = fname.rsplit(".", 1) @@ -132,13 +134,19 @@ def process(fname, allow_type): _HELPER.process_cpp(fname, arr[-1]) -def main(): +def main() -> None: parser = argparse.ArgumentParser(description="run cpp lint") parser.add_argument( "path", nargs="*", help="Path to traverse", - default=["src", "include", os.path.join("R-package", "src"), "python-package", "plugin/sycl"], + default=[ + "src", + "include", + os.path.join("R-package", "src"), + "python-package", + "plugin/sycl", + ], ) parser.add_argument( "--exclude_path", @@ -149,7 +157,7 @@ def main(): args = parser.parse_args() excluded_paths = filepath_enumerate(args.exclude_path) - allow_type = [] + allow_type: list[str] = [] allow_type += CXX_SUFFIX for path in args.path: diff --git a/tests/ci_build/lint_python.py b/ops/script/lint_python.py similarity index 95% rename from tests/ci_build/lint_python.py rename to ops/script/lint_python.py index e97b13f2c465..67343cc430ac 100644 --- a/tests/ci_build/lint_python.py +++ b/ops/script/lint_python.py @@ -16,8 +16,6 @@ class LintersPaths: BLACK = ( # core "python-package/", - # CI - "tests/ci_build/tidy.py", # tests "tests/python/test_config.py", "tests/python/test_callback.py", @@ -70,10 +68,7 @@ class LintersPaths: "demo/guide-python/update_process.py", "demo/aft_survival/aft_survival_viz_demo.py", # CI - "tests/ci_build/lint_python.py", - "tests/ci_build/test_r_package.py", - "tests/ci_build/test_utils.py", - "tests/ci_build/change_version.py", + "ops/", ) ISORT = ( @@ -83,12 +78,13 @@ class LintersPaths: "tests/test_distributed/", "tests/python/", "tests/python-gpu/", - "tests/ci_build/", # demo "demo/", # misc "dev/", "doc/", + # CI + "ops/", ) MYPY = ( @@ -130,11 +126,7 @@ class LintersPaths: "demo/guide-python/learning_to_rank.py", "demo/aft_survival/aft_survival_viz_demo.py", # CI - "tests/ci_build/tidy.py", - "tests/ci_build/lint_python.py", - "tests/ci_build/test_r_package.py", - "tests/ci_build/test_utils.py", - "tests/ci_build/change_version.py", + "ops/", ) diff --git a/tests/ci_build/lint_r.R b/ops/script/lint_r.R similarity index 100% rename from tests/ci_build/lint_r.R rename to ops/script/lint_r.R diff --git a/tests/ci_build/rename_whl.py b/ops/script/rename_whl.py similarity index 95% rename from tests/ci_build/rename_whl.py rename to ops/script/rename_whl.py index 500196190b3d..d4467720c738 100644 --- a/tests/ci_build/rename_whl.py +++ b/ops/script/rename_whl.py @@ -1,8 +1,8 @@ +import argparse import pathlib -from argparse import ArgumentParser -def main(args): +def main(args: argparse.Namespace) -> None: wheel_path = pathlib.Path(args.wheel_path).expanduser().resolve() if not wheel_path.exists(): raise ValueError(f"Wheel cannot be found at path {wheel_path}") @@ -43,7 +43,7 @@ def main(args): if __name__ == "__main__": - parser = ArgumentParser( + parser = argparse.ArgumentParser( description="Format a Python wheel's name using the git commit hash and platform tag" ) parser.add_argument( diff --git a/tests/ci_build/tidy.py b/ops/script/run_clang_tidy.py similarity index 97% rename from tests/ci_build/tidy.py rename to ops/script/run_clang_tidy.py index 13bbedc0b4b5..dca5d1069598 100755 --- a/tests/ci_build/tidy.py +++ b/ops/script/run_clang_tidy.py @@ -19,7 +19,9 @@ def call(args: list[str]) -> tuple[int, int, str, list[str]]: # `workspace` is a name used in the CI container. Normally we should keep the dir # as `xgboost`. matched = re.search( - "(workspace|xgboost)/.*(src|tests|include)/.*warning:", error_msg, re.MULTILINE + "(workspace|xgboost)/.*(ops|src|tests|include)/.*warning:", + error_msg, + re.MULTILINE, ) if matched is None: @@ -265,7 +267,7 @@ def test_tidy(args: argparse.Namespace) -> None: """ root_path = os.path.abspath(os.path.curdir) tidy_file = os.path.join(root_path, ".clang-tidy") - test_file_path = os.path.join(root_path, "tests", "ci_build", "test_tidy.cc") + test_file_path = os.path.join(root_path, "ops", "script", "test_tidy.cc") tidy_config = "--config-file=" + tidy_file if not args.tidy_version: @@ -274,8 +276,8 @@ def test_tidy(args: argparse.Namespace) -> None: tidy = "clang-tidy-" + str(args.tidy_version) cmd = [tidy, tidy_config, test_file_path] (proc_code, tidy_status, error_msg, _) = call(cmd) - assert proc_code == 0 - assert tidy_status == 1 + if proc_code != 0 or tidy_status != 1: + raise RuntimeError(error_msg) print("clang-tidy is working.") diff --git a/tests/ci_build/test_r_package.py b/ops/script/test_r_package.py similarity index 99% rename from tests/ci_build/test_r_package.py rename to ops/script/test_r_package.py index 5ca7fa69b21a..3ce886c1bc41 100644 --- a/tests/ci_build/test_r_package.py +++ b/ops/script/test_r_package.py @@ -42,7 +42,7 @@ def pkgroot(path: str) -> None: else: would_remove = output.stdout.decode("utf-8").strip().split("\n") - if would_remove and not all(f.find("tests/ci_build") != -1 for f in would_remove): + if would_remove and not all(f.find("ops") != -1 for f in would_remove): raise ValueError( "\n".join(would_remove) + "\nPlease cleanup the working git repository." ) diff --git a/tests/ci_build/test_tidy.cc b/ops/script/test_tidy.cc similarity index 100% rename from tests/ci_build/test_tidy.cc rename to ops/script/test_tidy.cc diff --git a/tests/ci_build/test_utils.py b/ops/script/test_utils.py similarity index 100% rename from tests/ci_build/test_utils.py rename to ops/script/test_utils.py diff --git a/tests/buildkite/update-rapids.sh b/ops/script/update_rapids.sh similarity index 50% rename from tests/buildkite/update-rapids.sh rename to ops/script/update_rapids.sh index f6a2675bdfa9..d7958ce70d86 100755 --- a/tests/buildkite/update-rapids.sh +++ b/ops/script/update_rapids.sh @@ -7,7 +7,10 @@ echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION" DEV_RAPIDS_VERSION=$(date +%Y-%m-%d -d "20${LATEST_RAPIDS_VERSION//./-}-01 + 2 month" | cut -c3-7 | tr - .) echo "DEV_RAPIDS_VERSION = $DEV_RAPIDS_VERSION" -PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) +OPS_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")/.." ; pwd -P ) +CONTAINER_YAML="$OPS_PATH/docker/ci_container.yml" -sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh -sed -i "s/^DEV_RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/DEV_RAPIDS_VERSION=${DEV_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh +sed -i "s/\&rapids_version \"[[:digit:]]\+\.[[:digit:]]\+\"/\&rapids_version \"${LATEST_RAPIDS_VERSION}\"/" \ + "$CONTAINER_YAML" +sed -i "s/\&dev_rapids_version \"[[:digit:]]\+\.[[:digit:]]\+\"/\&dev_rapids_version \"${DEV_RAPIDS_VERSION}\"/" \ + "$CONTAINER_YAML" diff --git a/tests/ci_build/verify_link.sh b/ops/script/verify_link.sh similarity index 100% rename from tests/ci_build/verify_link.sh rename to ops/script/verify_link.sh diff --git a/ops/stash_artifacts.ps1 b/ops/stash_artifacts.ps1 new file mode 100644 index 000000000000..57a58d884226 --- /dev/null +++ b/ops/stash_artifacts.ps1 @@ -0,0 +1,47 @@ +[CmdletBinding()] +Param( + [Parameter( + Mandatory=$true, + Position=0, + ValueFromRemainingArguments=$true + )][string[]]$artifacts +) + +## Convenience wrapper for ops/stash_artifacts.py +## Meant to be used inside GitHub Actions + +$ENV_VAR_DOC = @' +Inputs + - COMMAND: Either "upload" or "download" + - KEY: Unique string to identify a group of artifacts +'@ + +$ErrorActionPreference = "Stop" + +. ops/pipeline/enforce-ci.ps1 + +foreach ($env in "COMMAND", "KEY", "GITHUB_REPOSITORY", "GITHUB_RUN_ID", + "RUNS_ON_S3_BUCKET_CACHE") { + $val = [Environment]::GetEnvironmentVariable($env) + if ($val -eq $null) { + Write-Host "Error: $env must be set.`n${ENV_VAR_DOC}" + exit 1 + } +} + +$artifact_stash_prefix = "cache/${Env:GITHUB_REPOSITORY}/stash/${Env:GITHUB_RUN_ID}" + +conda activate + +Write-Host @" +python ops/stash_artifacts.py ` + --command "${Env:COMMAND}" ` + --s3-bucket "${Env:RUNS_ON_S3_BUCKET_CACHE}" ` + --prefix "${artifact_stash_prefix}/${Env:KEY}" ` + -- $artifacts +"@ +python ops/stash_artifacts.py ` + --command "${Env:COMMAND}" ` + --s3-bucket "${Env:RUNS_ON_S3_BUCKET_CACHE}" ` + --prefix "${artifact_stash_prefix}/${Env:KEY}" ` + -- $artifacts diff --git a/ops/stash_artifacts.py b/ops/stash_artifacts.py new file mode 100644 index 000000000000..827e448ac49e --- /dev/null +++ b/ops/stash_artifacts.py @@ -0,0 +1,144 @@ +""" +Stash an artifact in an S3 bucket for later use + +Note. This script takes in all inputs via environment variables + except the path to the artifact(s). +""" + +import argparse +import os +import subprocess +from pathlib import Path +from urllib.parse import SplitResult, urlsplit, urlunsplit + + +def resolve(x: Path) -> Path: + return x.expanduser().resolve() + + +def path_equals(a: Path, b: Path) -> bool: + return resolve(a) == resolve(b) + + +def compute_s3_url(s3_bucket: str, prefix: str, artifact: Path) -> str: + filename = artifact.name + relative_path = resolve(artifact).relative_to(Path.cwd()) + if resolve(artifact.parent) == resolve(Path.cwd()): + full_prefix = prefix + else: + full_prefix = f"{prefix}/{str(relative_path.parent)}" + return f"s3://{s3_bucket}/{full_prefix}/{filename}" + + +def aws_s3_upload(src: Path, dest: str) -> None: + cli_args = ["aws", "s3", "cp", "--no-progress", str(src), dest] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def aws_s3_download(src: str, dest: Path) -> None: + cli_args = ["aws", "s3", "cp", "--no-progress", src, str(dest)] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def aws_s3_download_with_wildcard(src: str, dest: Path) -> None: + parsed_src = urlsplit(src) + src_dir = urlunsplit( + SplitResult( + scheme="s3", + netloc=parsed_src.netloc, + path=os.path.dirname(parsed_src.path), + query="", + fragment="", + ) + ) + dest_dir = dest.parent + src_glob = os.path.basename(parsed_src.path) + cli_args = [ + "aws", + "s3", + "cp", + "--recursive", + "--no-progress", + "--exclude", + "'*'", + "--include", + src_glob, + src_dir, + str(dest_dir), + ] + print(" ".join(cli_args)) + subprocess.run( + cli_args, + check=True, + encoding="utf-8", + ) + + +def upload(args: argparse.Namespace) -> None: + print(f"Uploading artifacts with prefix {args.prefix}...") + for artifact in args.artifacts: + artifact_path = Path(artifact) + s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path) + aws_s3_upload(artifact_path, s3_url) + + +def download(args: argparse.Namespace) -> None: + print(f"Downloading artifacts with prefix {args.prefix}...") + for artifact in args.artifacts: + artifact_path = Path(artifact) + print(f"mkdir -p {str(artifact_path.parent)}") + artifact_path.parent.mkdir(parents=True, exist_ok=True) + s3_url = compute_s3_url(args.s3_bucket, args.prefix, artifact_path) + if "*" in artifact: + aws_s3_download_with_wildcard(s3_url, artifact_path) + else: + aws_s3_download(s3_url, artifact_path) + + +if __name__ == "__main__": + # Ensure that the current working directory is the project root + if not (Path.cwd() / "ops").is_dir() or not path_equals( + Path(__file__).parent, Path.cwd() / "ops" + ): + x = Path(__file__).name + raise RuntimeError(f"Script {x} must be run at the project's root directory") + + parser = argparse.ArgumentParser() + parser.add_argument( + "--command", + type=str, + choices=["upload", "download"], + required=True, + help="Whether to upload or download the artifact (upload/download)", + ) + parser.add_argument( + "--s3-bucket", + type=str, + required=True, + help="Name of the S3 bucket to store the artifact", + ) + parser.add_argument( + "--prefix", + type=str, + required=True, + help=( + "Where the artifact would be stored. The artifact will be stored in " + "s3://[s3-bucket]/[prefix]." + ), + ) + parser.add_argument("artifacts", type=str, nargs="+", metavar="artifact") + parsed_args = parser.parse_args() + if parsed_args.command == "upload": + upload(parsed_args) + elif parsed_args.command == "download": + download(parsed_args) diff --git a/ops/stash_artifacts.sh b/ops/stash_artifacts.sh new file mode 100755 index 000000000000..c2a16f42a26c --- /dev/null +++ b/ops/stash_artifacts.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +## Convenience wrapper for ops/stash_artifacts.py +## Meant to be used inside GitHub Actions + +ENV_VAR_DOC=$( +cat <<-EOF +Inputs + - COMMAND: Either "upload" or "download" + - KEY: Unique string to identify a group of artifacts +EOF +) + +set -euo pipefail + +source ops/pipeline/enforce-ci.sh + +if [ "$#" -lt 1 ]; then + echo "Usage: $0 [artifact] [artifact ...]" + exit 1 +fi + +for arg in "COMMAND" "KEY" "GITHUB_REPOSITORY" "GITHUB_RUN_ID" "RUNS_ON_S3_BUCKET_CACHE" +do + if [[ -z "${!arg:-}" ]] + then + echo -e "Error: $arg must be set.\n${ENV_VAR_DOC}" + exit 1 + fi +done + +artifact_stash_prefix="cache/${GITHUB_REPOSITORY}/stash/${GITHUB_RUN_ID}" + +set -x +python3 ops/stash_artifacts.py \ + --command "${COMMAND}" \ + --s3-bucket "${RUNS_ON_S3_BUCKET_CACHE}" \ + --prefix "${artifact_stash_prefix}/${KEY}" \ + -- "$@" diff --git a/tests/buildkite/build-containers.sh b/tests/buildkite/build-containers.sh deleted file mode 100755 index aa8f572483a3..000000000000 --- a/tests/buildkite/build-containers.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -set -euo pipefail -set -x - -if [ "$#" -lt 1 ] -then - echo "Usage: $0 [container to build]" - exit 1 -fi -container=$1 - -source tests/buildkite/conftest.sh - -echo "--- Build container ${container}" - -BUILD_ARGS="" - -case "${container}" in - cpu) - ;; - - gpu|gpu_build_rockylinux8) - BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" - BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" - BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" - ;; - - gpu_dev_ver) - BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" - BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" - BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$DEV_RAPIDS_VERSION" - ;; - - jvm_gpu_build) - BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" - BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" - ;; - - *) - echo "Unrecognized container ID: ${container}" - exit 2 - ;; -esac - -# Run a no-op command. This will simply build the container and push it to the private registry -tests/ci_build/ci_build.sh ${container} ${BUILD_ARGS} bash diff --git a/tests/buildkite/build-cpu.sh b/tests/buildkite/build-cpu.sh deleted file mode 100755 index 11679d644de1..000000000000 --- a/tests/buildkite/build-cpu.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -echo "--- Build CPU code" - -source tests/buildkite/conftest.sh - -command_wrapper="tests/ci_build/ci_build.sh cpu" - -$command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h - # This step is not necessary, but here we include it, to ensure that - # DMLC_CORE_USE_CMAKE flag is correctly propagated. We want to make sure that we use - # the configured header build/dmlc/build_config.h instead of - # include/dmlc/build_config_default.h. -echo "--- Build libxgboost from the source" -$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \ - -DPLUGIN_FEDERATED=ON -echo "--- Run Google Test" -$command_wrapper bash -c "cd build && ctest --extra-verbose" -echo "--- Stash XGBoost CLI executable" -buildkite-agent artifact upload ./xgboost - -# Sanitizer test -echo "--- Run Google Test with sanitizer enabled" -$command_wrapper tests/ci_build/build_via_cmake.sh -DUSE_SANITIZER=ON \ - -DENABLED_SANITIZERS="address;leak;undefined" -DCMAKE_BUILD_TYPE=Debug \ - -DSANITIZER_PATH=/usr/lib/x86_64-linux-gnu/ -CI_DOCKER_EXTRA_PARAMS_INIT="-e ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer "` - `"-e ASAN_OPTIONS=symbolize=1 "` - `"-e UBSAN_OPTIONS=print_stacktrace=1:log_path=ubsan_error.log "` - `"--cap-add SYS_PTRACE" \ - $command_wrapper bash -c "cd build && ctest --exclude-regex AllTestsInDMLCUnitTests "` - `"--extra-verbose" diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh deleted file mode 100755 index 189c67cba449..000000000000 --- a/tests/buildkite/build-cuda-with-rmm.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -if [ "$#" -lt 1 ] -then - mode=stable - exit 1 -else - mode=$1 -fi - -WHEEL_TAG=manylinux_2_28_x86_64 - -source tests/buildkite/conftest.sh - - -case "${mode}" in - stable) - container_tag='gpu_build_rockylinux8' - rapids_version=$RAPIDS_VERSION - ;; - - dev) - container_tag='gpu_dev_ver' - rapids_version=$DEV_RAPIDS_VERSION - ;; - - *) - echo "Unrecognized mode ID: ${mode}" - exit 2 - ;; -esac - -echo "--- Build with CUDA ${CUDA_VERSION} with RMM" - -if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] -then - arch_flag="-DGPU_COMPUTE_VER=75" -else - arch_flag="" -fi - -command_wrapper="tests/ci_build/ci_build.sh $container_tag --build-arg "` - `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=$rapids_version" - -echo "--- Build libxgboost from the source" -$command_wrapper tests/ci_build/build_via_cmake.sh \ - -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake" \ - -DUSE_CUDA=ON \ - -DUSE_OPENMP=ON \ - -DHIDE_CXX_SYMBOLS=ON \ - -DPLUGIN_FEDERATED=ON \ - -DPLUGIN_RMM=ON \ - -DUSE_NCCL=ON \ - -DUSE_NCCL_LIB_PATH=ON \ - -DNCCL_INCLUDE_DIR=/usr/include \ - -DUSE_DLOPEN_NCCL=ON \ - ${arch_flag} -echo "--- Build binary wheel" -$command_wrapper bash -c \ - "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" -$command_wrapper python tests/ci_build/rename_whl.py \ - --wheel-path python-package/dist/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ - --platform-tag ${WHEEL_TAG} - -echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" -tests/ci_build/ci_build.sh manylinux_2_28_x86_64 auditwheel repair \ - --plat ${WHEEL_TAG} python-package/dist/*.whl -$command_wrapper python tests/ci_build/rename_whl.py \ - --wheel-path wheelhouse/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ - --platform-tag ${WHEEL_TAG} -mv -v wheelhouse/*.whl python-package/dist/ -# Make sure that libgomp.so is vendored in the wheel -tests/ci_build/ci_build.sh manylinux_2_28_x86_64 bash -c \ - "unzip -l python-package/dist/*.whl | grep libgomp || exit -1" - -echo "--- Upload Python wheel" -buildkite-agent artifact upload python-package/dist/*.whl -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/experimental_build_with_rmm/ \ - --acl public-read --no-progress -fi - -echo "-- Stash C++ test executable (testxgboost)" -buildkite-agent artifact upload build/testxgboost diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh deleted file mode 100755 index 03d2cc8a6a24..000000000000 --- a/tests/buildkite/build-cuda.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -WHEEL_TAG=manylinux_2_28_x86_64 - -source tests/buildkite/conftest.sh - -echo "--- Build with CUDA ${CUDA_VERSION}" - -if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] -then - arch_flag="-DGPU_COMPUTE_VER=75" -else - arch_flag="" -fi - -command_wrapper="tests/ci_build/ci_build.sh gpu_build_rockylinux8 --build-arg "` - `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" - -echo "--- Build libxgboost from the source" -$command_wrapper tests/ci_build/build_via_cmake.sh \ - -DCMAKE_PREFIX_PATH="/opt/grpc" \ - -DUSE_CUDA=ON \ - -DUSE_OPENMP=ON \ - -DHIDE_CXX_SYMBOLS=ON \ - -DPLUGIN_FEDERATED=ON \ - -DUSE_NCCL=ON \ - -DUSE_NCCL_LIB_PATH=ON \ - -DNCCL_INCLUDE_DIR=/usr/include \ - -DUSE_DLOPEN_NCCL=ON \ - ${arch_flag} -echo "--- Build binary wheel" -$command_wrapper bash -c \ - "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" -$command_wrapper python tests/ci_build/rename_whl.py \ - --wheel-path python-package/dist/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ - --platform-tag ${WHEEL_TAG} - -echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" -tests/ci_build/ci_build.sh manylinux_2_28_x86_64 auditwheel repair \ - --plat ${WHEEL_TAG} python-package/dist/*.whl -$command_wrapper python tests/ci_build/rename_whl.py \ - --wheel-path wheelhouse/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ - --platform-tag ${WHEEL_TAG} -mv -v wheelhouse/*.whl python-package/dist/ -# Make sure that libgomp.so is vendored in the wheel -tests/ci_build/ci_build.sh manylinux_2_28_x86_64 bash -c \ - "unzip -l python-package/dist/*.whl | grep libgomp || exit -1" - -echo "--- Upload Python wheel" -buildkite-agent artifact upload python-package/dist/*.whl -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - aws s3 cp python-package/dist/*.whl s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ - --acl public-read --no-progress - - # Generate the meta info which includes xgboost version and the commit info - $command_wrapper python tests/ci_build/format_wheel_meta.py \ - --wheel-path python-package/dist/*.whl \ - --commit-hash ${BUILDKITE_COMMIT} \ - --platform-tag ${WHEEL_TAG} \ - --meta-path python-package/dist/ - aws s3 cp python-package/dist/meta.json s3://xgboost-nightly-builds/${BRANCH_NAME}/ \ - --acl public-read --no-progress -fi -echo "-- Stash C++ test executable (testxgboost)" -buildkite-agent artifact upload build/testxgboost diff --git a/tests/buildkite/build-jvm-doc.sh b/tests/buildkite/build-jvm-doc.sh deleted file mode 100755 index d168eb8cc58d..000000000000 --- a/tests/buildkite/build-jvm-doc.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -echo "--- Build JVM packages doc" -tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME} -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - echo "--- Upload JVM packages doc" - aws s3 cp jvm-packages/${BRANCH_NAME}.tar.bz2 \ - s3://xgboost-docs/${BRANCH_NAME}.tar.bz2 --acl public-read --no-progress -fi diff --git a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh b/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh deleted file mode 100644 index 46a819a016d3..000000000000 --- a/tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -command_wrapper="tests/ci_build/ci_build.sh jvm_manylinux2014_x86_64" - -# Build XGBoost4J binary -echo "--- Build libxgboost4j.so (targeting glibc 2.17)" -set -x -mkdir build -$command_wrapper bash -c \ - "cd build && cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON && ninja -v" -ldd lib/libxgboost4j.so -objdump -T lib/libxgboost4j.so | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/\1/g' | sort -Vu - -echo "--- Upload libxgboost4j.so" -pushd lib -libname=libxgboost4j_linux_x86_64_${BUILDKITE_COMMIT}.so -mv -v libxgboost4j.so ${libname} -buildkite-agent artifact upload ${libname} -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - aws s3 cp ${libname} \ - s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \ - --acl public-read --no-progress -fi -popd diff --git a/tests/buildkite/build-jvm-packages-gpu.sh b/tests/buildkite/build-jvm-packages-gpu.sh deleted file mode 100755 index 76ffafbcfdd7..000000000000 --- a/tests/buildkite/build-jvm-packages-gpu.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -echo "--- Build and test XGBoost JVM packages with CUDA" - -if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] -then - arch_flag="-DGPU_COMPUTE_VER=75" -else - arch_flag="" -fi - -tests/ci_build/ci_build.sh jvm_gpu_build --use-gpus \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ - --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ - tests/ci_build/build_jvm_packages.sh \ - ${SPARK_VERSION} -Duse.cuda=ON ${arch_flag} diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh deleted file mode 100755 index da4d1e9d8c8a..000000000000 --- a/tests/buildkite/build-jvm-packages.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -echo "--- Build and test XGBoost JVM packages with Scala 2.12" -tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \ - ${SPARK_VERSION} - -echo "--- Build and test XGBoost JVM packages with Scala 2.13" - -tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \ - ${SPARK_VERSION} "" "" "true" diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1 deleted file mode 100644 index 9114d3237751..000000000000 --- a/tests/buildkite/build-win64-gpu.ps1 +++ /dev/null @@ -1,55 +0,0 @@ -$ErrorActionPreference = "Stop" - -. tests/buildkite/conftest.ps1 - -Write-Host "--- Build libxgboost on Windows with CUDA" - -nvcc --version -if ( $is_release_branch -eq 0 ) { - $arch_flag = "-DGPU_COMPUTE_VER=75" -} else { - $arch_flag = "" -} -mkdir build -cd build -cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON ` - -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON ${arch_flag} -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -cmake --build . --config Release -- /m /nodeReuse:false ` - "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal" -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - -Write-Host "--- Build binary wheel" -cd ../python-package -conda activate -& pip install --user -v "pip>=23" -& pip --version -& pip wheel --no-deps -v . --wheel-dir dist/ -Get-ChildItem . -Filter dist/*.whl | -Foreach-Object { - & python ../tests/ci_build/rename_whl.py ` - --wheel-path $_.FullName ` - --commit-hash $Env:BUILDKITE_COMMIT ` - --platform-tag win_amd64 - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -} - -Write-Host "--- Upload Python wheel" -cd .. -Get-ChildItem . -Filter python-package/dist/*.whl | -Foreach-Object { - & buildkite-agent artifact upload python-package/dist/$_ - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -} -if ( $is_release_branch -eq 1 ) { - Get-ChildItem . -Filter python-package/dist/*.whl | - Foreach-Object { - & aws s3 cp python-package/dist/$_ s3://xgboost-nightly-builds/$Env:BUILDKITE_BRANCH/ ` - --acl public-read --no-progress - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - } -} - -Write-Host "--- Stash C++ test executables" -& buildkite-agent artifact upload build/testxgboost.exe -& buildkite-agent artifact upload xgboost.exe diff --git a/tests/buildkite/conftest.ps1 b/tests/buildkite/conftest.ps1 deleted file mode 100644 index bd623caf0c03..000000000000 --- a/tests/buildkite/conftest.ps1 +++ /dev/null @@ -1,13 +0,0 @@ -if ( $Env:BUILDKITE_PULL_REQUEST -and ($Env:BUILDKITE_PULL_REQUEST -ne "false") ) { - $is_pull_request = 1 -} else { - $is_pull_request = 0 -} - -if ( ($Env:BUILDKITE_BRANCH -eq "master") -or ($Env:BUILDKITE_BRANCH -match "release_.+") ) { - $is_release_branch = 1 - $enforce_daily_budget = 0 -} else { - $is_release_branch = 0 - $enforce_daily_budget = 1 -} diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh deleted file mode 100755 index 12f4c07ac6c9..000000000000 --- a/tests/buildkite/conftest.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -function get_aws_secret { - if [[ $# -ne 1 ]] - then - echo "Usage: get_aws_secret [Name of secret]" - return 1 - fi - aws secretsmanager get-secret-value --secret-id $1 --output text --region us-west-2 --query SecretString -} - -function set_buildkite_env_vars_in_container { - # Pass all Buildkite-specific env vars to Docker containers. - # This is to be used with tests/ci_build/ci_build.sh - export CI_DOCKER_EXTRA_PARAMS_INIT="${CI_DOCKER_EXTRA_PARAMS_INIT:-} "` - `"--env BUILDKITE_ANALYTICS_TOKEN --env BUILDKITE_BUILD_ID --env BUILDKITE_BUILD_NUMBER "` - `"--env BUILDKITE_JOB_ID --env BUILDKITE_BRANCH --env BUILDKITE_COMMIT "` - `"--env BUILDKITE_MESSAGE --env BUILDKITE_BUILD_URL" -} - -set -x - -CUDA_VERSION=12.4.1 -NCCL_VERSION=2.23.4-1 -RAPIDS_VERSION=24.10 -DEV_RAPIDS_VERSION=24.12 -SPARK_VERSION=3.5.1 -JDK_VERSION=8 -R_VERSION=4.3.2 - -if [[ -z ${BUILDKITE:-} ]] -then - echo "$0 is not meant to run locally; it should run inside BuildKite." - echo "Please inspect the content of $0 and locate the desired command manually." - exit 1 -fi - -if [[ -n $BUILDKITE_PULL_REQUEST && $BUILDKITE_PULL_REQUEST != "false" ]] -then - is_pull_request=1 - BRANCH_NAME=PR-$BUILDKITE_PULL_REQUEST -else - is_pull_request=0 - BRANCH_NAME=$BUILDKITE_BRANCH -fi -export BRANCH_NAME=${BRANCH_NAME//\//-} - -if [[ $BRANCH_NAME == "master" || $BRANCH_NAME == "release_"* || $BRANCH_NAME == "federated-secure" ]] -then - is_release_branch=1 - enforce_daily_budget=0 -else - is_release_branch=0 - enforce_daily_budget=1 -fi - -if [[ -n ${DISABLE_RELEASE:-} ]] -then - is_release_branch=0 -fi - -set +x diff --git a/tests/buildkite/deploy-jvm-packages.sh b/tests/buildkite/deploy-jvm-packages.sh deleted file mode 100755 index 812a6c5cafec..000000000000 --- a/tests/buildkite/deploy-jvm-packages.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" - tests/ci_build/ci_build.sh jvm_gpu_build \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ - --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ - tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION} -fi diff --git a/tests/buildkite/enforce_daily_budget.py b/tests/buildkite/enforce_daily_budget.py deleted file mode 100644 index af1b1ce484b8..000000000000 --- a/tests/buildkite/enforce_daily_budget.py +++ /dev/null @@ -1,14 +0,0 @@ -import json -import argparse - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--response", type=str, required=True) - args = parser.parse_args() - with open(args.response, "r") as f: - payload = f.read() - response = json.loads(payload) - if response["approved"]: - print(f"Testing approved. Reason: {response['reason']}") - else: - raise RuntimeError(f"Testing rejected. Reason: {response['reason']}") diff --git a/tests/buildkite/enforce_daily_budget.sh b/tests/buildkite/enforce_daily_budget.sh deleted file mode 100755 index 8212f07c1b24..000000000000 --- a/tests/buildkite/enforce_daily_budget.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -echo "--- Enforce daily budget" - -source tests/buildkite/conftest.sh - -if [[ $enforce_daily_budget == 0 ]] -then - echo "Automatically approving all test jobs for trunk branches" -else - aws lambda invoke --function-name XGBoostCICostWatcher --invocation-type RequestResponse --region us-west-2 response.json - python3 tests/buildkite/enforce_daily_budget.py --response response.json -fi diff --git a/tests/buildkite/infrastructure/README.md b/tests/buildkite/infrastructure/README.md deleted file mode 100644 index cc3e552e70ff..000000000000 --- a/tests/buildkite/infrastructure/README.md +++ /dev/null @@ -1,106 +0,0 @@ -BuildKite CI Infrastructure -=========================== - -# Worker image builder (`worker-image-pipeline/`) - -Use EC2 Image Builder to build machine images in a deterministic fashion. -The machine images are used to initialize workers in the CI/CD pipelines. - -## Editing bootstrap scripts - -Currently, we create two pipelines for machine images: one for Linux workers and another -for Windows workers. -You can edit the bootstrap scripts to change how the worker machines are initialized. - -* `linux-amd64-gpu-bootstrap.yml`: Bootstrap script for Linux worker machines -* `windows-gpu-bootstrap.yml`: Bootstrap script for Windows worker machines - -## Creating and running Image Builder pipelines - -Run the following commands to create and run pipelines in EC2 Image Builder service: -```bash -python worker-image-pipeline/create_worker_image_pipelines.py --aws-region us-west-2 -python worker-image-pipeline/run_pipelines.py --aws-region us-west-2 -``` -Go to the AWS CloudFormation console and verify the existence of two CloudFormation stacks: -* `buildkite-windows-gpu-worker` -* `buildkite-linux-amd64-gpu-worker` - -Then go to the EC2 Image Builder console to check the status of the image builds. You may -want to inspect the log output should a build fails. -Once the new machine images are done building, see the next section to deploy the new -images to the worker machines. - -# Elastic CI Stack for AWS (`aws-stack-creator/`) - -Use EC2 Autoscaling groups to launch worker machines in EC2. BuildKite periodically sends -messages to the Autoscaling groups to increase or decrease the number of workers according -to the number of outstanding testing jobs. - -## Deploy an updated CI stack with new machine images - -First, edit `aws-stack-creator/metadata.py` to update the `AMI_ID` fields: -```python -AMI_ID = { - # Managed by XGBoost team - "linux-amd64-gpu": { - "us-west-2": "...", - }, - "linux-amd64-mgpu": { - "us-west-2": "...", - }, - "windows-gpu": { - "us-west-2": "...", - }, - "windows-cpu": { - "us-west-2": "...", - }, - # Managed by BuildKite - # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml - "linux-amd64-cpu": { - "us-west-2": "...", - }, - "pipeline-loader": { - "us-west-2": "...", - }, - "linux-arm64-cpu": { - "us-west-2": "...", - }, -} -``` -AMI IDs uniquely identify the machine images in the EC2 service. -Go to the EC2 Image Builder console to find the AMI IDs for the new machine images -(see the previous section), and update the following fields: - -* `AMI_ID["linux-amd64-gpu"]["us-west-2"]`: - Use the latest output from the `buildkite-linux-amd64-gpu-worker` pipeline -* `AMI_ID["linux-amd64-mgpu"]["us-west-2"]`: - Should be identical to `AMI_ID["linux-amd64-gpu"]["us-west-2"]` -* `AMI_ID["windows-gpu"]["us-west-2"]`: - Use the latest output from the `buildkite-windows-gpu-worker` pipeline -* `AMI_ID["windows-cpu"]["us-west-2"]`: - Should be identical to `AMI_ID["windows-gpu"]["us-west-2"]` - -Next, visit https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml -to look up the AMI IDs for the following fields: - -* `AMI_ID["linux-amd64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field - `Mappings/AWSRegion2AMI/us-west-2/linuxamd64` -* `AMI_ID["pipeline-loader"]["us-west-2"]`: - Should be identical to `AMI_ID["linux-amd64-cpu"]["us-west-2"]` -* `AMI_ID["linux-arm64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field - `Mappings/AWSRegion2AMI/us-west-2/linuxarm64` - -Finally, run the following commands to deploy the new machine images: -``` -python aws-stack-creator/create_stack.py --aws-region us-west-2 --agent-token AGENT_TOKEN -``` -Go to the AWS CloudFormation console and verify the existence of the following -CloudFormation stacks: -* `buildkite-pipeline-loader-autoscaling-group` -* `buildkite-linux-amd64-cpu-autoscaling-group` -* `buildkite-linux-amd64-gpu-autoscaling-group` -* `buildkite-linux-amd64-mgpu-autoscaling-group` -* `buildkite-linux-arm64-cpu-autoscaling-group` -* `buildkite-windows-cpu-autoscaling-group` -* `buildkite-windows-gpu-autoscaling-group` diff --git a/tests/buildkite/infrastructure/aws-stack-creator/agent-iam-policy-template.yml b/tests/buildkite/infrastructure/aws-stack-creator/agent-iam-policy-template.yml deleted file mode 100644 index 7f15b1fbcd4f..000000000000 --- a/tests/buildkite/infrastructure/aws-stack-creator/agent-iam-policy-template.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -AWSTemplateFormatVersion: "2010-09-09" -Description: "Buildkite agent's IAM policy" - -Resources: - BuildkiteAgentManagedPolicy: - Type: AWS::IAM::ManagedPolicy - Properties: - PolicyDocument: - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "s3:*", - "s3-object-lambda:*" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": "lambda:InvokeFunction", - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": "secretsmanager:GetSecretValue", - "Resource": "*" - } - ] - } diff --git a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py deleted file mode 100644 index 8f8db348a073..000000000000 --- a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py +++ /dev/null @@ -1,127 +0,0 @@ -import argparse -import copy -import os -import re -import sys - -import boto3 -import botocore -from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS - -current_dir = os.path.dirname(__file__) -sys.path.append(os.path.join(current_dir, "..")) - -from common_blocks.utils import create_or_update_stack, wait - -TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" - - -def get_availability_zones(*, aws_region): - client = boto3.client("ec2", region_name=aws_region) - r = client.describe_availability_zones( - Filters=[ - {"Name": "region-name", "Values": [aws_region]}, - {"Name": "zone-type", "Values": ["availability-zone"]}, - ] - ) - return sorted([x["ZoneName"] for x in r["AvailabilityZones"]]) - - -def get_default_vpc(*, aws_region): - ec2 = boto3.resource("ec2", region_name=aws_region) - default_vpc_id = None - for x in ec2.vpcs.filter(Filters=[{"Name": "is-default", "Values": ["true"]}]): - return x - - # Create default VPC if not exist - client = boto3.client("ec2", region_name=aws_region) - r = client.create_default_vpc() - default_vpc_id = r["Vpc"]["VpcId"] - - return ec2.Vpc(default_vpc_id) - - -def format_params(args, *, stack_id, agent_iam_policy): - default_vpc = get_default_vpc(aws_region=args.aws_region) - azs = get_availability_zones(aws_region=args.aws_region) - # For each of the first two availability zones (AZs), choose the default subnet - subnets = [ - x.id - for x in default_vpc.subnets.filter( - Filters=[ - {"Name": "default-for-az", "Values": ["true"]}, - {"Name": "availability-zone", "Values": azs[:2]}, - ] - ) - ] - assert len(subnets) == 2 - - params = copy.deepcopy(STACK_PARAMS[stack_id]) - params["ImageId"] = AMI_ID[stack_id][args.aws_region] - params["BuildkiteQueue"] = stack_id - params["CostAllocationTagValue"] = f"buildkite-{stack_id}" - params["BuildkiteAgentToken"] = args.agent_token - params["VpcId"] = default_vpc.id - params["Subnets"] = ",".join(subnets) - params["ManagedPolicyARNs"] = agent_iam_policy - params.update(COMMON_STACK_PARAMS) - return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()] - - -def get_full_stack_id(stack_id): - return f"buildkite-{stack_id}-autoscaling-group" - - -def create_agent_iam_policy(args, *, client): - policy_stack_name = "buildkite-agent-iam-policy" - print(f"Creating stack {policy_stack_name} for agent IAM policy...") - with open( - os.path.join(current_dir, "agent-iam-policy-template.yml"), - encoding="utf-8", - ) as f: - policy_template = f.read() - promise = create_or_update_stack( - args, client=client, stack_name=policy_stack_name, template_body=policy_template - ) - wait(promise, client=client) - - cf = boto3.resource("cloudformation", region_name=args.aws_region) - policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy") - return policy.physical_resource_id - - -def main(args): - client = boto3.client("cloudformation", region_name=args.aws_region) - - agent_iam_policy = create_agent_iam_policy(args, client=client) - - promises = [] - - for stack_id in AMI_ID: - stack_id_full = get_full_stack_id(stack_id) - print(f"Creating elastic CI stack {stack_id_full}...") - - params = format_params( - args, stack_id=stack_id, agent_iam_policy=agent_iam_policy - ) - - promise = create_or_update_stack( - args, - client=client, - stack_name=stack_id_full, - template_url=TEMPLATE_URL, - params=params, - ) - promises.append(promise) - print(f"CI stack {stack_id_full} is in progress in the background") - - for promise in promises: - wait(promise, client=client) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--aws-region", type=str, required=True) - parser.add_argument("--agent-token", type=str, required=True) - args = parser.parse_args() - main(args) diff --git a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py deleted file mode 100644 index 5012aa738854..000000000000 --- a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py +++ /dev/null @@ -1,114 +0,0 @@ -AMI_ID = { - # Managed by XGBoost team - "linux-amd64-gpu": { - "us-west-2": "ami-0b4079c15bbbd0faf", - }, - "linux-amd64-mgpu": { - "us-west-2": "ami-0b4079c15bbbd0faf", - }, - "windows-gpu": { - "us-west-2": "ami-0123456bcf4cdfb82", - }, - "windows-cpu": { - "us-west-2": "ami-0123456bcf4cdfb82", - }, - # Managed by BuildKite - # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml - "linux-amd64-cpu": { - "us-west-2": "ami-0083e0ae73c175ec6", - }, - "pipeline-loader": { - "us-west-2": "ami-0083e0ae73c175ec6", - }, - "linux-arm64-cpu": { - "us-west-2": "ami-0dbf1f9da54222f21", - }, -} - -STACK_PARAMS = { - "linux-amd64-gpu": { - "InstanceOperatingSystem": "linux", - "InstanceTypes": "g4dn.xlarge", - "AgentsPerInstance": "1", - "MinSize": "0", - "MaxSize": "8", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, - "linux-amd64-mgpu": { - "InstanceOperatingSystem": "linux", - "InstanceTypes": "g4dn.12xlarge", - "AgentsPerInstance": "1", - "MinSize": "0", - "MaxSize": "1", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, - "windows-gpu": { - "InstanceOperatingSystem": "windows", - "InstanceTypes": "g4dn.2xlarge", - "AgentsPerInstance": "1", - "MinSize": "0", - "MaxSize": "2", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, - "windows-cpu": { - "InstanceOperatingSystem": "windows", - "InstanceTypes": "c5a.2xlarge", - "AgentsPerInstance": "1", - "MinSize": "0", - "MaxSize": "2", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, - "linux-amd64-cpu": { - "InstanceOperatingSystem": "linux", - "InstanceTypes": "c5a.4xlarge", - "AgentsPerInstance": "1", - "MinSize": "0", - "MaxSize": "16", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, - "pipeline-loader": { - "InstanceOperatingSystem": "linux", - "InstanceTypes": "t3a.micro", - "AgentsPerInstance": "1", - "MinSize": "2", - "MaxSize": "2", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, - "linux-arm64-cpu": { - "InstanceOperatingSystem": "linux", - "InstanceTypes": "c6g.4xlarge", - "AgentsPerInstance": "1", - "MinSize": "0", - "MaxSize": "8", - "OnDemandPercentage": "100", - "ScaleOutFactor": "1.0", - "ScaleInIdlePeriod": "60", # in seconds - }, -} - -COMMON_STACK_PARAMS = { - "BuildkiteAgentTimestampLines": "false", - "BuildkiteWindowsAdministrator": "true", - "AssociatePublicIpAddress": "true", - "ScaleOutForWaitingJobs": "false", - "EnableCostAllocationTags": "true", - "CostAllocationTagName": "CreatedBy", - "ECRAccessPolicy": "full", - "EnableSecretsPlugin": "false", - "EnableECRPlugin": "false", - "EnableDockerLoginPlugin": "false", - "EnableDockerUserNamespaceRemap": "false", - "BuildkiteAgentExperiments": "normalised-upload-paths,resolve-commit-after-checkout", -} diff --git a/tests/buildkite/infrastructure/common_blocks/utils.py b/tests/buildkite/infrastructure/common_blocks/utils.py deleted file mode 100644 index 27a0835e8dc0..000000000000 --- a/tests/buildkite/infrastructure/common_blocks/utils.py +++ /dev/null @@ -1,97 +0,0 @@ -import re - -import boto3 -import botocore - - -def stack_exists(args, *, stack_name): - client = boto3.client("cloudformation", region_name=args.aws_region) - waiter = client.get_waiter("stack_exists") - try: - waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) - return True - except botocore.exceptions.WaiterError as e: - return False - - -def create_or_update_stack( - args, *, client, stack_name, template_url=None, template_body=None, params=None -): - kwargs = { - "StackName": stack_name, - "Capabilities": [ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - } - if template_url: - kwargs["TemplateURL"] = template_url - if template_body: - kwargs["TemplateBody"] = template_body - if params: - kwargs["Parameters"] = params - - if stack_exists(args, stack_name=stack_name): - print(f"Stack {stack_name} already exists. Updating...") - try: - response = client.update_stack(**kwargs) - return {"StackName": stack_name, "Action": "update"} - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "ValidationError" and re.search( - "No updates are to be performed", e.response["Error"]["Message"] - ): - print(f"No update was made to {stack_name}") - return {"StackName": stack_name, "Action": "noop"} - else: - raise e - else: - kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) - response = client.create_stack(**kwargs) - return {"StackName": stack_name, "Action": "create"} - - -def replace_stack( - args, *, client, stack_name, template_url=None, template_body=None, params=None -): - """Delete an existing stack and create a new stack with identical name""" - - if not stack_exists(args, stack_name=stack_name): - raise ValueError(f"Stack {stack_name} does not exist") - r = client.delete_stack(StackName=stack_name) - delete_waiter = client.get_waiter("stack_delete_complete") - delete_waiter.wait(StackName=stack_name) - - kwargs = { - "StackName": stack_name, - "Capabilities": [ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - "OnFailure": "ROLLBACK", - "EnableTerminationProtection": False, - } - if template_url: - kwargs["TemplateURL"] = template_url - if template_body: - kwargs["TemplateBody"] = template_body - if params: - kwargs["Parameters"] = params - response = client.create_stack(**kwargs) - return {"StackName": stack_name, "Action": "create"} - - -def wait(promise, *, client): - stack_name = promise["StackName"] - print(f"Waiting for {stack_name}...") - if promise["Action"] == "create": - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_name) - print(f"Finished creating stack {stack_name}") - elif promise["Action"] == "update": - waiter = client.get_waiter("stack_update_complete") - waiter.wait(StackName=stack_name) - print(f"Finished updating stack {stack_name}") - elif promise["Action"] != "noop": - raise ValueError(f"Invalid promise {promise}") diff --git a/tests/buildkite/infrastructure/requirements.txt b/tests/buildkite/infrastructure/requirements.txt deleted file mode 100644 index 3ce271ebbdd6..000000000000 --- a/tests/buildkite/infrastructure/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -boto3 -cfn_tools diff --git a/tests/buildkite/infrastructure/service-user/create_service_user.py b/tests/buildkite/infrastructure/service-user/create_service_user.py deleted file mode 100644 index ba08779bd159..000000000000 --- a/tests/buildkite/infrastructure/service-user/create_service_user.py +++ /dev/null @@ -1,44 +0,0 @@ -import argparse -import os - -import boto3 - -current_dir = os.path.dirname(__file__) - - -def main(args): - with open( - os.path.join(current_dir, "service-user-template.yml"), encoding="utf-8" - ) as f: - service_user_template = f.read() - - stack_id = "buildkite-elastic-ci-stack-service-user" - - print("Create a new IAM user with suitable permissions...") - client = boto3.client("cloudformation", region_name=args.aws_region) - response = client.create_stack( - StackName=stack_id, - TemplateBody=service_user_template, - Capabilities=[ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - ], - Parameters=[{"ParameterKey": "UserName", "ParameterValue": args.user_name}], - ) - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_id) - user = boto3.resource("iam", region_name=args.aws_region).User(args.user_name) - key_pair = user.create_access_key_pair() - print("Finished creating an IAM users with suitable permissions.") - print(f"Access Key ID: {key_pair.access_key_id}") - print(f"Access Secret Access Key: {key_pair.secret_access_key}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--aws-region", type=str, required=True) - parser.add_argument( - "--user-name", type=str, default="buildkite-elastic-ci-stack-user" - ) - args = parser.parse_args() - main(args) diff --git a/tests/buildkite/infrastructure/service-user/service-user-template.yml b/tests/buildkite/infrastructure/service-user/service-user-template.yml deleted file mode 100644 index 2077cfe7b148..000000000000 --- a/tests/buildkite/infrastructure/service-user/service-user-template.yml +++ /dev/null @@ -1,349 +0,0 @@ ---- -AWSTemplateFormatVersion: "2010-09-09" -Description: "Buildkite Elastic CI Stack CloudFormation service user" - -Parameters: - UserName: - Type: String - Default: buildkite-elastic-ci-stack-user - Description: Name of user to create - -Outputs: - UserNameOutput: - Value: !Ref CloudFormationServiceUser - UserArnOutput: - Value: !GetAtt CloudFormationServiceUser.Arn - -Resources: - CloudFormationServiceUser: - Type: AWS::IAM::User - Properties: - ManagedPolicyArns: - - !Ref SubstackCrudPolicy - - !Ref CrudPolicy - - !Ref ImageBuilderPolicy - UserName: !Ref UserName - - SubstackCrudPolicy: - Type: AWS::IAM::ManagedPolicy - Properties: - PolicyDocument: - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": "cloudformation:*", - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "serverlessrepo:GetApplication", - "serverlessrepo:GetCloudFormationTemplate", - "serverlessrepo:CreateCloudFormationTemplate" - ], - "Resource": "*" - } - ] - } - - CrudPolicy: - Type: AWS::IAM::ManagedPolicy - Properties: - PolicyDocument: - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:DescribeAccountAttributes", - "ec2:DescribeAvailabilityZones", - "ec2:DescribeInstances", - "ec2:DescribeInternetGateways", - "ec2:DescribeLaunchTemplateVersions", - "ec2:DescribeLaunchTemplates", - "ec2:DescribeNetworkInterfaces", - "ec2:DescribeRouteTables", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSubnets", - "ec2:DescribeVpcs", - "ec2:CreateTags" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateInternetGateway", - "ec2:AttachInternetGateway", - "ec2:DetachInternetGateway", - "ec2:DeleteInternetGateway" - ], - "Resource": "arn:aws:ec2:*:*:internet-gateway/*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateLaunchTemplate", - "ec2:CreateLaunchTemplateVersion", - "ec2:DeleteLaunchTemplate" - ], - "Resource": "arn:aws:ec2:*:*:launch-template/*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:AssociateRouteTable", - "ec2:DisassociateRouteTable", - "ec2:CreateRoute", - "ec2:CreateRouteTable", - "ec2:DeleteRoute", - "ec2:DeleteRouteTable" - ], - "Resource": "arn:aws:ec2:*:*:route-table/*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:AuthorizeSecurityGroupIngress", - "ec2:RevokeSecurityGroupIngress", - "ec2:CreateSecurityGroup", - "ec2:DeleteSecurityGroup" - ], - "Resource": "arn:aws:ec2:*:*:security-group/*" - }, - { - "Effect": "Allow", - "Action": "ec2:RunInstances", - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateSubnet", - "ec2:DeleteSubnet", - "ec2:AssociateRouteTable", - "ec2:DisassociateRouteTable" - ], - "Resource": "arn:aws:ec2:*:*:subnet/*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateVpc", - "ec2:CreateSecurityGroup", - "ec2:ModifyVpcAttribute", - "ec2:AttachInternetGateway", - "ec2:DetachInternetGateway", - "ec2:CreateSubnet", - "ec2:CreateRouteTable", - "ec2:DeleteVpc" - ], - "Resource": "arn:aws:ec2:*:*:vpc/*" - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateDefaultVpc", - "ec2:CreateDefaultSubnet" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "iam:CreateInstanceProfile", - "iam:GetInstanceProfile", - "iam:AddRoleToInstanceProfile", - "iam:RemoveRoleFromInstanceProfile", - "iam:DeleteInstanceProfile" - ], - "Resource": "arn:aws:iam::*:instance-profile/*" - }, - { - "Effect": "Allow", - "Action": [ - "kms:DescribeKey", - "kms:CreateGrant", - "kms:Decrypt", - "kms:Encrypt" - ], - "Resource": "arn:aws:kms:*:*:key/*" - }, - { - "Effect": "Allow", - "Action": [ - "lambda:CreateFunction", - "lambda:GetFunction", - "lambda:GetFunctionCodeSigningConfig", - "lambda:AddPermission", - "lambda:RemovePermission", - "lambda:DeleteFunction", - "lambda:InvokeFunction", - "lambda:TagResource" - ], - "Resource": "arn:aws:lambda:*:*:function:*" - }, - { - "Effect": "Allow", - "Action": [ - "logs:CreateLogGroup", - "logs:PutRetentionPolicy", - "logs:DeleteLogGroup" - ], - "Resource": "arn:aws:logs:*:*:log-group:*" - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject", - "s3:CreateBucket", - "s3:PutBucketAcl", - "s3:PutBucketLogging", - "s3:PutBucketTagging", - "s3:PutBucketVersioning" - ], - "Resource": "arn:aws:s3:::*" - }, - { - "Effect": "Allow", - "Action": [ - "ssm:GetParameter", - "ssm:PutParameter", - "ssm:DeleteParameter" - ], - "Resource": "arn:aws:ssm:*:*:parameter/*" - }, - { - "Effect": "Allow", - "Action": [ - "iam:ListPolicies", - "iam:ListInstanceProfiles", - "iam:ListRoles", - "iam:ListPolicyVersions", - "iam:ListRolePolicies", - "iam:ListAttachedRolePolicies", - "iam:ListInstanceProfileTags", - "iam:ListRoleTags", - "iam:ListInstanceProfilesForRole", - "iam:GetPolicyVersion", - "iam:GetPolicy", - "iam:GetInstanceProfile", - "iam:GetRole", - "iam:GetRolePolicy", - "iam:TagPolicy", - "iam:UntagPolicy", - "iam:TagInstanceProfile", - "iam:UntagInstanceProfile", - "iam:TagRole", - "iam:UntagRole", - "iam:CreateRole", - "iam:PassRole", - "iam:DeleteRole", - "iam:UpdateRoleDescription", - "iam:UpdateRole", - "iam:AddRoleToInstanceProfile", - "iam:RemoveRoleFromInstanceProfile", - "iam:CreateInstanceProfile", - "iam:DeleteInstanceProfile", - "iam:DetachRolePolicy", - "iam:SetDefaultPolicyVersion", - "iam:AttachRolePolicy", - "iam:UpdateAssumeRolePolicy", - "iam:PutRolePermissionsBoundary", - "iam:DeleteRolePermissionsBoundary", - "iam:CreatePolicy", - "iam:DeletePolicyVersion", - "iam:DeletePolicy", - "iam:PutRolePolicy", - "iam:DeleteRolePolicy" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "autoscaling:DescribeLifecycleHookTypes", - "autoscaling:DescribeTerminationPolicyTypes", - "autoscaling:DescribePolicies", - "autoscaling:DescribeWarmPool", - "autoscaling:DescribeScalingActivities", - "autoscaling:DescribeScalingProcessTypes", - "autoscaling:DescribeScheduledActions", - "autoscaling:DescribeAutoScalingGroups", - "autoscaling:DescribeAutoScalingInstances", - "autoscaling:DescribeLifecycleHooks", - "autoscaling:SetDesiredCapacity", - "autoscaling:PutLifecycleHook", - "autoscaling:DeleteLifecycleHook", - "autoscaling:SetInstanceProtection", - "autoscaling:CreateAutoScalingGroup", - "autoscaling:EnableMetricsCollection", - "autoscaling:UpdateAutoScalingGroup", - "autoscaling:DeleteAutoScalingGroup", - "autoscaling:PutScalingPolicy", - "autoscaling:DeletePolicy", - "autoscaling:BatchPutScheduledUpdateGroupAction", - "autoscaling:PutScheduledUpdateGroupAction", - "autoscaling:DeleteScheduledAction", - "autoscaling:PutWarmPool", - "autoscaling:DeleteWarmPool", - "autoscaling:TerminateInstanceInAutoScalingGroup", - "autoscaling:AttachInstances" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "events:DescribeRule", - "events:PutRule", - "events:PutTargets", - "events:RemoveTargets", - "events:DeleteRule" - ], - "Resource": "arn:aws:events:*:*:rule/*" - } - ] - } - - ImageBuilderPolicy: - Type: AWS::IAM::ManagedPolicy - Properties: - PolicyDocument: - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "imagebuilder:CreateComponent", - "imagebuilder:GetComponent", - "imagebuilder:DeleteComponent", - "imagebuilder:CreateImageRecipe", - "imagebuilder:GetImageRecipe", - "imagebuilder:DeleteImageRecipe", - "imagebuilder:CreateImagePipeline", - "imagebuilder:GetImagePipeline", - "imagebuilder:DeleteImagePipeline", - "imagebuilder:CreateInfrastructureConfiguration", - "imagebuilder:GetInfrastructureConfiguration", - "imagebuilder:DeleteInfrastructureConfiguration", - "imagebuilder:CreateDistributionConfiguration", - "imagebuilder:GetDistributionConfiguration", - "imagebuilder:DeleteDistributionConfiguration", - "imagebuilder:TagResource", - "imagebuilder:StartImagePipelineExecution", - "ec2:DescribeImages", - "ec2:DescribeSnapshots", - "ec2:DescribeRegions", - "ec2:DescribeVolumes", - "ec2:DescribeKeyPairs", - "ec2:DescribeInstanceTypeOfferings" - ], - "Resource": "*" - } - ] - } diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py deleted file mode 100644 index 8051b991da51..000000000000 --- a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py +++ /dev/null @@ -1,85 +0,0 @@ -import argparse -import copy -import json -import os -import sys -from urllib.request import urlopen - -import boto3 -import cfn_flip -from metadata import IMAGE_PARAMS - -current_dir = os.path.dirname(__file__) -sys.path.append(os.path.join(current_dir, "..")) - -from common_blocks.utils import replace_stack, wait - -BUILDKITE_CF_TEMPLATE_URL = ( - "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" -) - - -def format_params(*, stack_id, aws_region, ami_mapping): - params = copy.deepcopy(IMAGE_PARAMS[stack_id]) - with open( - os.path.join(current_dir, params["BootstrapScript"]), - encoding="utf-8", - ) as f: - bootstrap_script = f.read() - params["BaseImageId"] = ami_mapping[aws_region][params["BaseImageId"]] - params["BootstrapScript"] = bootstrap_script - return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()] - - -def get_ami_mapping(): - with urlopen(BUILDKITE_CF_TEMPLATE_URL) as response: - buildkite_cf_template = response.read().decode("utf-8") - cfn_obj = json.loads(cfn_flip.to_json(buildkite_cf_template)) - return cfn_obj["Mappings"]["AWSRegion2AMI"] - - -def get_full_stack_id(stack_id): - return f"buildkite-{stack_id}-worker" - - -def main(args): - with open( - os.path.join(current_dir, "ec2-image-builder-pipeline-template.yml"), - encoding="utf-8", - ) as f: - ec2_image_pipeline_template = f.read() - - ami_mapping = get_ami_mapping() - - client = boto3.client("cloudformation", region_name=args.aws_region) - promises = [] - - for stack_id in IMAGE_PARAMS: - stack_id_full = get_full_stack_id(stack_id) - print(f"Creating EC2 image builder stack {stack_id_full}...") - - params = format_params( - stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping - ) - - promise = replace_stack( - args, - client=client, - stack_name=stack_id_full, - template_body=ec2_image_pipeline_template, - params=params, - ) - promises.append(promise) - print( - f"EC2 image builder stack {stack_id_full} is in progress in the background" - ) - - for promise in promises: - wait(promise, client=client) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--aws-region", type=str, required=True) - args = parser.parse_args() - main(args) diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml deleted file mode 100644 index 8d3bafa72f08..000000000000 --- a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml +++ /dev/null @@ -1,108 +0,0 @@ ---- -AWSTemplateFormatVersion: "2010-09-09" -Description: "EC2 Image Builder pipelines to build workers" - -Parameters: - BaseImageId: - Type: String - Description: Base AMI to build a new image on top of. - - BootstrapScript: - Type: String - Description: Content of AMI customization script - - InstanceType: - Type: String - Description: Instance type for the Image Builder instances. - - InstanceOperatingSystem: - Type: String - Description: The operating system to run on the instance - AllowedValues: - - Linux - - Windows - Default: "Linux" - - VolumeSize: - Type: Number - Description: Size of EBS volume, in GiBs - -Conditions: - IsInstanceWindows: - !Equals [ !Ref InstanceOperatingSystem, "Windows" ] - -Resources: - # IAM role for the image builder instance - InstanceRole: - Type: AWS::IAM::Role - Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: "Allow" - Principal: - Service: "ec2.amazonaws.com" - Action: "sts:AssumeRole" - ManagedPolicyArns: - - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore - - arn:aws:iam::aws:policy/EC2InstanceProfileForImageBuilder - - arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess - - InstanceProfile: - Type: AWS::IAM::InstanceProfile - Properties: - Roles: - - !Ref InstanceRole - - # Component that runs the bootstrap script - BootstrapComponent: - Type: AWS::ImageBuilder::Component - Properties: - Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]] - Platform: !Ref InstanceOperatingSystem - Version: "1.0.0" - Description: Execute a bootstrap script. - Data: !Ref BootstrapScript - - Recipe: - Type: AWS::ImageBuilder::ImageRecipe - Properties: - Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]] - Components: - - ComponentArn: !Ref BootstrapComponent - ParentImage: !Ref BaseImageId - BlockDeviceMappings: - - DeviceName: !If [IsInstanceWindows, "/dev/sda1", "/dev/xvda"] - Ebs: - DeleteOnTermination: true - Encrypted: false - VolumeSize: !Ref VolumeSize - VolumeType: gp2 - Version: "1.0.0" - - Infrastructure: - Type: AWS::ImageBuilder::InfrastructureConfiguration - Properties: - Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]] - InstanceProfileName: !Ref InstanceProfile - InstanceTypes: - - !Ref InstanceType - TerminateInstanceOnFailure: true - - # Copy to this region only - Distribution: - Type: AWS::ImageBuilder::DistributionConfiguration - Properties: - Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]] - Distributions: - - Region: !Ref AWS::Region - AmiDistributionConfiguration: {} - - # Composition of the above elements - Pipeline: - Type: AWS::ImageBuilder::ImagePipeline - Properties: - Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]] - DistributionConfigurationArn: !Ref Distribution - ImageRecipeArn: !Ref Recipe - InfrastructureConfigurationArn: !Ref Infrastructure diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/linux-amd64-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/linux-amd64-gpu-bootstrap.yml deleted file mode 100644 index 88403911cbc6..000000000000 --- a/tests/buildkite/infrastructure/worker-image-pipeline/linux-amd64-gpu-bootstrap.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: BuildKiteLinuxAMD64GPUBootstrap -description: Set up worker image for linux-amd64-gpu pipeline -schemaVersion: 1.0 - -phases: - - name: build - steps: - - name: SetupStep - action: ExecuteBash - inputs: - commands: - - | - yum groupinstall -y "Development tools" - yum install -y kernel-devel-$(uname -r) - dnf install -y kernel-modules-extra - aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ . - chmod +x NVIDIA-Linux-x86_64*.run - ./NVIDIA-Linux-x86_64*.run --silent - - curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo - yum install -y nvidia-container-toolkit - yum clean expire-cache - nvidia-ctk runtime configure --runtime=docker - systemctl restart docker diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py deleted file mode 100644 index 37100209fe2e..000000000000 --- a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py +++ /dev/null @@ -1,18 +0,0 @@ -IMAGE_PARAMS = { - "linux-amd64-gpu": { - "BaseImageId": "linuxamd64", - # AMI ID is looked up from Buildkite's CloudFormation template - "BootstrapScript": "linux-amd64-gpu-bootstrap.yml", - "InstanceType": "g4dn.xlarge", - "InstanceOperatingSystem": "Linux", - "VolumeSize": "40", # in GiBs - }, - "windows-gpu": { - "BaseImageId": "windows", - # AMI ID is looked up from Buildkite's CloudFormation template - "BootstrapScript": "windows-gpu-bootstrap.yml", - "InstanceType": "g4dn.2xlarge", - "InstanceOperatingSystem": "Windows", - "VolumeSize": "120", # in GiBs - }, -} diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/run_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/run_pipelines.py deleted file mode 100644 index 9edb8b1a7c24..000000000000 --- a/tests/buildkite/infrastructure/worker-image-pipeline/run_pipelines.py +++ /dev/null @@ -1,22 +0,0 @@ -import argparse - -import boto3 -from create_worker_image_pipelines import get_full_stack_id -from metadata import IMAGE_PARAMS - - -def main(args): - cf = boto3.resource("cloudformation", region_name=args.aws_region) - builder_client = boto3.client("imagebuilder", region_name=args.aws_region) - for stack_id in IMAGE_PARAMS: - stack_id_full = get_full_stack_id(stack_id) - pipeline_arn = cf.Stack(stack_id_full).Resource("Pipeline").physical_resource_id - print(f"Running pipeline {pipeline_arn} to generate a new AMI...") - r = builder_client.start_image_pipeline_execution(imagePipelineArn=pipeline_arn) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--aws-region", type=str, required=True) - args = parser.parse_args() - main(args) diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml deleted file mode 100644 index 0348e28c8709..000000000000 --- a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: BuildKiteWindowsGPUBootstrap -description: Set up worker image for windows-gpu pipeline -schemaVersion: 1.0 - -phases: - - name: build - steps: - - name: SetupStep - action: ExecutePowerShell - inputs: - commands: - - | - $ErrorActionPreference = "Stop" - - choco --version - choco feature enable -n=allowGlobalConfirmation - - # CMake 3.29.2 - Write-Host '>>> Installing CMake 3.29.2...' - choco install cmake --version 3.29.2 --installargs "ADD_CMAKE_TO_PATH=System" - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Notepad++ - Write-Host '>>> Installing Notepad++...' - choco install notepadplusplus - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Mambaforge - Write-Host '>>> Installing Mambaforge...' - choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge - C:\tools\mambaforge\Scripts\conda.exe init --user --system - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - . "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1" - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - conda config --set auto_activate_base false - - # Install Java 11 - Write-Host '>>> Installing Java 11...' - choco install openjdk11 - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Install Maven - Write-Host '>>> Installing Maven...' - choco install maven - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Install GraphViz - Write-Host '>>> Installing GraphViz...' - choco install graphviz - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Install Visual Studio 2022 Community - Write-Host '>>> Installing Visual Studio 2022 Community...' - choco install visualstudio2022community ` - --params "--wait --passive --norestart" - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - choco install visualstudio2022-workload-nativedesktop --params ` - "--wait --passive --norestart --includeOptional" - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Install CUDA 12.4 - Write-Host '>>> Installing CUDA 12.4...' - choco install cuda --version=12.4.1.551 - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - - # Install R - Write-Host '>>> Installing R...' - choco install r.project --version=4.3.2 - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - choco install rtools --version=4.3.5550 - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } diff --git a/tests/buildkite/pipeline-mac-m1.yml b/tests/buildkite/pipeline-mac-m1.yml deleted file mode 100644 index 57b1b1d12010..000000000000 --- a/tests/buildkite/pipeline-mac-m1.yml +++ /dev/null @@ -1,13 +0,0 @@ -steps: - - block: ":rocket: Run this test job" - if: build.pull_request.id != null || build.branch =~ /^dependabot\// - - label: ":macos: Build libxgboost4j.dylib for MacOS M1" - command: "tests/buildkite/build-jvm-macos-m1.sh" - key: mac-m1-jvm - agents: - queue: mac-mini-m1 - - label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11" - command: "tests/buildkite/test-macos-m1-clang11.sh" - key: mac-m1-appleclang11 - agents: - queue: mac-mini-m1 diff --git a/tests/buildkite/pipeline-mgpu.yml b/tests/buildkite/pipeline-mgpu.yml deleted file mode 100644 index cbb573c3682c..000000000000 --- a/tests/buildkite/pipeline-mgpu.yml +++ /dev/null @@ -1,48 +0,0 @@ -env: - DOCKER_CACHE_ECR_ID: "492475357299" - DOCKER_CACHE_ECR_REGION: "us-west-2" - DISABLE_RELEASE: "1" - # Skip uploading artifacts to S3 bucket - # Also, don't build all CUDA archs; just build sm_75 -steps: - - label: ":moneybag: Enforce daily budget" - command: "tests/buildkite/enforce_daily_budget.sh" - key: enforce-daily-budget - agents: - queue: pipeline-loader - - wait - - block: ":rocket: Run this test job" - if: build.pull_request.id != null || build.branch =~ /^dependabot\// - #### -------- CONTAINER BUILD -------- - - label: ":docker: Build containers" - commands: - - "tests/buildkite/build-containers.sh gpu" - - "tests/buildkite/build-containers.sh gpu_build_rockylinux8" - - "tests/buildkite/build-containers.sh jvm_gpu_build" - key: build-containers - agents: - queue: linux-amd64-cpu - - wait - #### -------- BUILD -------- - - label: ":console: Build CUDA" - command: "tests/buildkite/build-cuda.sh" - key: build-cuda - agents: - queue: linux-amd64-cpu - - label: ":console: Build and test JVM packages with CUDA" - command: "tests/buildkite/build-jvm-packages-gpu.sh" - key: build-jvm-packages-gpu - agents: - queue: linux-amd64-mgpu - - wait - #### -------- TEST -------- - - label: ":console: Run Google Tests" - command: "tests/buildkite/test-cpp-mgpu.sh" - key: test-cpp-mgpu - agents: - queue: linux-amd64-mgpu - - label: ":console: Test Python package, 4 GPUs" - command: "tests/buildkite/test-python-gpu.sh mgpu" - key: test-python-mgpu - agents: - queue: linux-amd64-mgpu diff --git a/tests/buildkite/pipeline-nightly.yml b/tests/buildkite/pipeline-nightly.yml deleted file mode 100644 index 4d84f93a54d4..000000000000 --- a/tests/buildkite/pipeline-nightly.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Nightly CI pipeline, to test against dev versions of dependencies - -env: - DOCKER_CACHE_ECR_ID: "492475357299" - DOCKER_CACHE_ECR_REGION: "us-west-2" - DISABLE_RELEASE: "1" - # Skip uploading artifacts to S3 bucket - # Also, don't build all CUDA archs; just build sm_75 - USE_DEPS_DEV_VER: "1" - # Use dev versions of RAPIDS and other dependencies -steps: - #### -------- CONTAINER BUILD -------- - - label: ":docker: Build containers" - commands: - - "tests/buildkite/build-containers.sh gpu_build_rockylinux8" - - "tests/buildkite/build-containers.sh gpu_dev_ver" - key: build-containers - agents: - queue: linux-amd64-cpu - - wait - - - label: ":console: Build CUDA" - command: "tests/buildkite/build-cuda.sh" - key: build-cuda - agents: - queue: linux-amd64-cpu - - wait - - label: ":console: Build CUDA + RMM Nightly" - command: "tests/buildkite/build-cuda-with-rmm.sh dev" - key: build-cuda-rmm-nightly - agents: - queue: linux-amd64-cpu - - wait - - label: ":console: Test Python package, single GPU" - command: "tests/buildkite/test-python-gpu.sh gpu" - key: test-python-gpu - agents: - queue: linux-amd64-gpu - - label: ":console: Test Python package, 4 GPUs" - command: "tests/buildkite/test-python-gpu.sh mgpu" - key: test-python-mgpu - agents: - queue: linux-amd64-mgpu diff --git a/tests/buildkite/pipeline-win64.yml b/tests/buildkite/pipeline-win64.yml deleted file mode 100644 index 83a61981e716..000000000000 --- a/tests/buildkite/pipeline-win64.yml +++ /dev/null @@ -1,24 +0,0 @@ -steps: - - label: ":moneybag: Enforce daily budget" - command: "tests/buildkite/enforce_daily_budget.sh" - key: enforce-daily-budget - agents: - queue: pipeline-loader - - wait - - block: ":rocket: Run this test job" - if: build.pull_request.id != null || build.branch =~ /^dependabot\// - #### -------- BUILD -------- - - label: ":windows: Build XGBoost for Windows with CUDA" - command: "tests/buildkite/build-win64-gpu.ps1" - key: build-win64-gpu - agents: - queue: windows-cpu - - - wait - - #### -------- TEST -------- - - label: ":windows: Test XGBoost on Windows" - command: "tests/buildkite/test-win64-gpu.ps1" - key: test-win64-gpu - agents: - queue: windows-gpu diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml deleted file mode 100644 index 6c1df33b84dd..000000000000 --- a/tests/buildkite/pipeline.yml +++ /dev/null @@ -1,113 +0,0 @@ -env: - DOCKER_CACHE_ECR_ID: "492475357299" - DOCKER_CACHE_ECR_REGION: "us-west-2" -steps: - - label: ":moneybag: Enforce daily budget" - command: "tests/buildkite/enforce_daily_budget.sh" - key: enforce-daily-budget - agents: - queue: pipeline-loader - - wait - - block: ":rocket: Run this test job" - if: build.pull_request.id != null || build.branch =~ /^dependabot\// - #### -------- CONTAINER BUILD -------- - - label: ":docker: Build containers" - commands: - - "tests/buildkite/build-containers.sh cpu" - - "tests/buildkite/build-containers.sh gpu" - - "tests/buildkite/build-containers.sh gpu_build_rockylinux8" - key: build-containers - agents: - queue: linux-amd64-cpu - - wait - #### -------- BUILD -------- - - label: ":console: Run clang-tidy" - command: "tests/buildkite/run-clang-tidy.sh" - key: run-clang-tidy - agents: - queue: linux-amd64-cpu - - label: ":console: Build CPU" - command: "tests/buildkite/build-cpu.sh" - key: build-cpu - agents: - queue: linux-amd64-cpu - - label: ":console: Build CPU ARM64 + manylinux_2_28_aarch64 wheel" - command: "tests/buildkite/build-cpu-arm64.sh" - key: build-cpu-arm64 - agents: - queue: linux-arm64-cpu - - label: ":console: Build CUDA + manylinux_2_28_x86_64 wheel" - command: "tests/buildkite/build-cuda.sh" - key: build-cuda - agents: - queue: linux-amd64-cpu - - label: ":console: Build CUDA with RMM" - command: "tests/buildkite/build-cuda-with-rmm.sh stable" - key: build-cuda-with-rmm - agents: - queue: linux-amd64-cpu - - label: ":console: Build R package with CUDA" - command: "tests/buildkite/build-gpu-rpkg.sh" - key: build-gpu-rpkg - agents: - queue: linux-amd64-cpu - - label: ":console: Build JVM packages" - timeout_in_minutes: 30 - command: "tests/buildkite/build-jvm-packages.sh" - key: build-jvm-packages - agents: - queue: linux-amd64-cpu - - label: ":console: Build libxgboost4j.so for Linux ARM64 (targeting glibc 2.17)" - command: "tests/buildkite/build-jvm-linux-arm64-manylinux2014.sh" - key: build-jvm-linux-arm64-manylinux2014 - agents: - queue: linux-arm64-cpu - - label: ":console: Build libxgboost4j.so for Linux x86_64 (targeting glibc 2.17)" - command: "tests/buildkite/build-jvm-linux-x86_64-manylinux2014.sh" - key: build-jvm-linux-x86_64-manylinux2014 - agents: - queue: linux-amd64-cpu - - label: ":console: Build JVM package doc" - command: "tests/buildkite/build-jvm-doc.sh" - key: build-jvm-doc - agents: - queue: linux-amd64-cpu - - label: ":console: Build manylinux2014_x86_64 wheel" - command: "tests/buildkite/build-manylinux2014.sh x86_64" - key: build-manylinux2014-x86_64 - agents: - queue: linux-amd64-cpu - - label: ":console: Build manylinux2014_aarch64 wheel" - command: "tests/buildkite/build-manylinux2014.sh aarch64" - key: build-manylinux2014-aarch64 - agents: - queue: linux-arm64-cpu - - wait - #### -------- TEST -------- - - label: ":console: Test Python package, CPU" - command: "tests/buildkite/test-python-cpu.sh" - key: test-python-cpu - agents: - queue: linux-amd64-cpu - - label: ":console: Test Python package, CPU ARM64" - command: "tests/buildkite/test-python-cpu-arm64.sh" - key: test-python-cpu-arm64 - agents: - queue: linux-arm64-cpu - - label: ":console: Test Python package, single GPU" - command: "tests/buildkite/test-python-gpu.sh gpu" - key: test-python-gpu - agents: - queue: linux-amd64-gpu - - label: ":console: Run Google Tests" - command: "tests/buildkite/test-cpp-gpu.sh" - key: test-cpp-gpu - agents: - queue: linux-amd64-gpu - - wait - #### -------- DEPLOY JVM -------- - - label: ":console: Deploy JVM packages" - command: "tests/buildkite/deploy-jvm-packages.sh" - key: deploy-jvm-packages - agents: - queue: linux-amd64-cpu diff --git a/tests/buildkite/run-clang-tidy.sh b/tests/buildkite/run-clang-tidy.sh deleted file mode 100755 index 95ff010c20f1..000000000000 --- a/tests/buildkite/run-clang-tidy.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -echo "--- Run clang-tidy" - -source tests/buildkite/conftest.sh - -tests/ci_build/ci_build.sh clang_tidy \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ - python3 tests/ci_build/tidy.py --cuda-archs 75 diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh deleted file mode 100755 index d7197db2efce..000000000000 --- a/tests/buildkite/test-cpp-gpu.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -echo "--- Run Google Tests with CUDA, using a GPU" -buildkite-agent artifact download "build/testxgboost" . --step build-cuda -chmod +x build/testxgboost -tests/ci_build/ci_build.sh gpu --use-gpus \ - --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ - --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ - --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ - build/testxgboost - -echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled" -rm -rfv build/ -buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm -chmod +x build/testxgboost -tests/ci_build/ci_build.sh gpu --use-gpus \ - --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ - --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ - --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ - build/testxgboost --use-rmm-pool diff --git a/tests/buildkite/test-cpp-mgpu.sh b/tests/buildkite/test-cpp-mgpu.sh deleted file mode 100755 index 65614b191d04..000000000000 --- a/tests/buildkite/test-cpp-mgpu.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -# Allocate extra space in /dev/shm to enable NCCL -export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' - -echo "--- Run Google Tests with CUDA, using multiple GPUs" -buildkite-agent artifact download "build/testxgboost" . --step build-cuda -chmod +x build/testxgboost -tests/ci_build/ci_build.sh gpu --use-gpus \ - --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ - --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ - --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ - build/testxgboost --gtest_filter=*MGPU* diff --git a/tests/buildkite/test-macos-m1-clang11.sh b/tests/buildkite/test-macos-m1-clang11.sh deleted file mode 100755 index 6824cb7b14b4..000000000000 --- a/tests/buildkite/test-macos-m1-clang11.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -# Display system info -echo "--- Display system information" -set -x -system_profiler SPSoftwareDataType -sysctl -n machdep.cpu.brand_string -uname -m -set +x - -# Ensure that XGBoost can be built with Clang 11 -echo "--- Build and Test XGBoost with MacOS M1, Clang 11" -set -x -LLVM11_PATH=$(brew --prefix llvm\@11) -mkdir build -pushd build -cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \ - -DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \ - -DUSE_DMLC_GTEST=ON -ninja -v -./testxgboost diff --git a/tests/buildkite/test-python-cpu-arm64.sh b/tests/buildkite/test-python-cpu-arm64.sh deleted file mode 100755 index 68a428034073..000000000000 --- a/tests/buildkite/test-python-cpu-arm64.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -source tests/buildkite/conftest.sh - -echo "--- Test Python CPU ARM64" -buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64 -buildkite-agent artifact download "xgboost" . --step build-cpu-arm64 -chmod +x ./xgboost -tests/ci_build/ci_build.sh aarch64 tests/ci_build/test_python.sh cpu-arm64 diff --git a/tests/buildkite/test-python-cpu.sh b/tests/buildkite/test-python-cpu.sh deleted file mode 100755 index 6c53dc2821bc..000000000000 --- a/tests/buildkite/test-python-cpu.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -echo "--- Test CPU code in Python env" - -source tests/buildkite/conftest.sh - -mkdir -pv python-package/dist -buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cuda -buildkite-agent artifact download "xgboost" . --step build-cpu -chmod +x ./xgboost - -export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/cpu) -set_buildkite_env_vars_in_container -tests/ci_build/ci_build.sh cpu tests/ci_build/test_python.sh cpu diff --git a/tests/buildkite/test-python-gpu.sh b/tests/buildkite/test-python-gpu.sh deleted file mode 100755 index d7bd729a2e01..000000000000 --- a/tests/buildkite/test-python-gpu.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -if [ "$#" -lt 1 ] -then - suite='' - args='' -else - suite=$1 - shift 1 - args="$@" -fi - -source tests/buildkite/conftest.sh - -echo "--- Fetch build artifacts" -buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cuda -buildkite-agent artifact download "build/testxgboost" . --step build-cuda -chmod +x build/testxgboost - -# Allocate extra space in /dev/shm to enable NCCL -export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' - -if [[ -z "${USE_DEPS_DEV_VER-}" ]] -then - container_tag='gpu' - rapids_version=${RAPIDS_VERSION} -else - container_tag='gpu_dev_ver' - rapids_version=${DEV_RAPIDS_VERSION} -fi - -command_wrapper="tests/ci_build/ci_build.sh ${container_tag} --use-gpus --build-arg "` - `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=${rapids_version} --build-arg "` - `"NCCL_VERSION_ARG=$NCCL_VERSION" - -# Run specified test suite -case "$suite" in - gpu) - export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/gpu) - set_buildkite_env_vars_in_container - echo "--- Test XGBoost Python package, single GPU" - $command_wrapper tests/ci_build/test_python.sh $suite - ;; - - mgpu) - export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/mgpu) - set_buildkite_env_vars_in_container - echo "--- Test XGBoost Python package, 4 GPUs" - $command_wrapper tests/ci_build/test_python.sh $suite - ;; - - *) - echo "Usage: $0 {gpu|mgpu} [extra args to pass to pytest]" - exit 1 - ;; -esac diff --git a/tests/buildkite/test-win64-gpu.ps1 b/tests/buildkite/test-win64-gpu.ps1 deleted file mode 100644 index 95a51b50228d..000000000000 --- a/tests/buildkite/test-win64-gpu.ps1 +++ /dev/null @@ -1,39 +0,0 @@ -$ErrorActionPreference = "Stop" - -. tests/buildkite/conftest.ps1 - -Write-Host "--- Test XGBoost on Windows with CUDA" - -New-Item python-package/dist -ItemType Directory -ea 0 -New-Item build -ItemType Directory -ea 0 -buildkite-agent artifact download "python-package/dist/*.whl" . --step build-win64-gpu -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -buildkite-agent artifact download "build/testxgboost.exe" . --step build-win64-gpu -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -buildkite-agent artifact download "xgboost.exe" . --step build-win64-gpu -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - -nvcc --version - -Write-Host "--- Run Google Tests" -& build/testxgboost.exe -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - -Write-Host "--- Set up Python env" -conda activate -$env_name = -join("win64_", (New-Guid).ToString().replace("-", "")) -mamba env create -n ${env_name} --file=tests/ci_build/conda_env/win64_test.yml -conda activate ${env_name} -Get-ChildItem . -Filter python-package/dist/*.whl | -Foreach-Object { - & python -m pip install python-package/dist/$_ - if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -} - -Write-Host "--- Run Python tests" -python -X faulthandler -m pytest -v -s -rxXs --fulltrace tests/python -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } -Write-Host "--- Run Python tests with GPU" -python -X faulthandler -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)"` - tests/python-gpu -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } diff --git a/tests/ci_build/Dockerfile.gpu_dev_ver b/tests/ci_build/Dockerfile.gpu_dev_ver deleted file mode 100644 index d23c5e83c2c7..000000000000 --- a/tests/ci_build/Dockerfile.gpu_dev_ver +++ /dev/null @@ -1,54 +0,0 @@ -# Container to test XGBoost against dev versions of dependencies - -ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04 -ARG CUDA_VERSION_ARG -ARG RAPIDS_VERSION_ARG - # Should be first 4 digits of the dev version (e.g. 24.06) -ARG NCCL_VERSION_ARG - -# Environment -ENV DEBIAN_FRONTEND=noninteractive -SHELL ["/bin/bash", "-c"] # Use Bash as shell - -# Install all basic requirements -RUN \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ - apt-get update && \ - apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ - # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ - bash conda.sh -b -p /opt/miniforge - -ENV PATH=/opt/miniforge/bin:$PATH - -# Create new Conda environment with dev versions of cuDF, Dask, and cuPy -RUN \ - export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ - export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \ - mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \ - python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \ - "nccl>=${NCCL_SHORT_VER}" \ - dask \ - "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \ - numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \ - python-kubernetes urllib3 graphviz hypothesis loky \ - "pyspark>=3.4.0" cloudpickle cuda-python && \ - mamba clean --all --yes && \ - conda run --no-capture-output -n gpu_test pip install buildkite-test-collector - -ENV GOSU_VERSION=1.10 -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ - -# Install lightweight sudo (not bound to TTY) -RUN set -ex; \ - wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ - chmod +x /usr/local/bin/gosu && \ - gosu nobody true - -# Default entry-point to use if running locally -# It will preserve attributes of created files -COPY entrypoint.sh /scripts/ - -WORKDIR /workspace -ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64 b/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64 deleted file mode 100644 index 52baff43bb6f..000000000000 --- a/tests/ci_build/Dockerfile.jvm_manylinux2014_aarch64 +++ /dev/null @@ -1,17 +0,0 @@ -FROM quay.io/pypa/manylinux2014_aarch64 - -RUN yum update -y && yum install -y java-1.8.0-openjdk-devel - -# Install lightweight sudo (not bound to TTY) -ENV GOSU_VERSION=1.10 -RUN set -ex; \ - curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-arm64" && \ - chmod +x /usr/local/bin/gosu && \ - gosu nobody true - -# Default entry-point to use if running locally -# It will preserve attributes of created files -COPY entrypoint.sh /scripts/ - -WORKDIR /workspace -ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64 b/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64 deleted file mode 100644 index 578b85618776..000000000000 --- a/tests/ci_build/Dockerfile.jvm_manylinux2014_x86_64 +++ /dev/null @@ -1,17 +0,0 @@ -FROM quay.io/pypa/manylinux2014_x86_64 - -RUN yum update -y && yum install -y java-1.8.0-openjdk-devel ninja-build - -# Install lightweight sudo (not bound to TTY) -ENV GOSU_VERSION=1.10 -RUN set -ex; \ - curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ - chmod +x /usr/local/bin/gosu && \ - gosu nobody true - -# Default entry-point to use if running locally -# It will preserve attributes of created files -COPY entrypoint.sh /scripts/ - -WORKDIR /workspace -ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/build_jvm_doc.sh b/tests/ci_build/build_jvm_doc.sh deleted file mode 100755 index 01a91dd629b5..000000000000 --- a/tests/ci_build/build_jvm_doc.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -if [ $# -ne 1 ]; then - echo "Usage: $0 [branch name]" - exit 1 -fi - -set -e -set -x - -rm -rf build/ -cd jvm-packages - -branch_name=$1 - -# Install JVM packages in local Maven repository -mvn --no-transfer-progress install -DskipTests -# Build Scaladocs -mvn --no-transfer-progress scala:doc -DskipTests -# Build Javadocs -mvn --no-transfer-progress javadoc:javadoc -DskipTests - -# Package JVM docs in a tarball -mkdir -p tmp/scaladocs -cp -rv xgboost4j/target/reports/apidocs/ ./tmp/javadocs/ -cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/ -cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/ -cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/ - -cd tmp -tar cvjf ${branch_name}.tar.bz2 javadocs/ scaladocs/ -mv ${branch_name}.tar.bz2 .. -cd .. -rm -rfv tmp/ - -set +x -set +e diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh deleted file mode 100755 index 99681f5ca43c..000000000000 --- a/tests/ci_build/build_jvm_packages.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -set -e -set -x - -spark_version=$1 -use_cuda=$2 -gpu_arch=$3 -use_scala213=$4 - -gpu_options="" -if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then - gpu_options="$use_cuda -Pgpu" -fi - -rm -rf build/ -cd jvm-packages - -if [ "x$gpu_arch" != "x" ]; then - export GPU_ARCH_FLAG=$gpu_arch -fi - -# Purge artifacts and set correct Scala version -pushd .. -if [ "x$use_scala213" != "x" ]; then - python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts -else - python dev/change_scala_version.py --scala-version 2.12 --purge-artifacts -fi -popd - -# Build and test XGBoost4j-spark against different spark versions only for CPU and scala=2.12 -if [ "x$gpu_options" == "x" ] && [ "x$use_scala213" == "x" ]; then - mvn --no-transfer-progress clean package -Dspark.version=3.1.3 -pl xgboost4j,xgboost4j-spark - mvn --no-transfer-progress clean package -Dspark.version=3.2.4 -pl xgboost4j,xgboost4j-spark - mvn --no-transfer-progress clean package -Dspark.version=3.3.4 -pl xgboost4j,xgboost4j-spark - mvn --no-transfer-progress clean package -Dspark.version=3.4.3 -pl xgboost4j,xgboost4j-spark -fi - -mvn --no-transfer-progress clean install -Dspark.version=${spark_version} $gpu_options - -# Integration tests -if [ "x$use_cuda" == "x" ]; then - mvn --no-transfer-progress test -pl xgboost4j-example -fi - -set +x -set +e diff --git a/tests/ci_build/ci_build.sh b/tests/ci_build/ci_build.sh deleted file mode 100755 index a2f2d6063160..000000000000 --- a/tests/ci_build/ci_build.sh +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env bash -# -# Execute command within a docker container -# -# Usage: ci_build.sh [--use-gpus] -# [--dockerfile ] [-it] -# [--build-arg ] -# -# CONTAINER_TYPE: Type of the docker container used the run the build: e.g., -# (cpu | gpu) -# -# --use-gpus: Whether to grant the container access to NVIDIA GPUs. -# -# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. If -# this optional value is not supplied (via the --dockerfile -# flag), will use Dockerfile.CONTAINER_TYPE in default -# -# BUILD_ARG: (Optional) an argument to be passed to docker build -# -# COMMAND: Command to be executed in the docker container -# -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Get the command line arguments. -CONTAINER_TYPE=$( echo "$1" | tr '[:upper:]' '[:lower:]' ) -shift 1 - -# Dockerfile to be used in docker build -DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" -DOCKER_CONTEXT_PATH="${SCRIPT_DIR}" - -GPU_FLAG='' -if [[ "$1" == "--use-gpus" ]]; then - echo "Using NVIDIA GPUs" - GPU_FLAG='--gpus all' - shift 1 -fi - -if [[ "$1" == "--dockerfile" ]]; then - DOCKERFILE_PATH="$2" - DOCKER_CONTEXT_PATH=$(dirname "${DOCKERFILE_PATH}") - echo "Using custom Dockerfile path: ${DOCKERFILE_PATH}" - echo "Using custom docker build context path: ${DOCKER_CONTEXT_PATH}" - shift 2 -fi - -if [[ -n "${CI_DOCKER_EXTRA_PARAMS_INIT}" ]] -then - IFS=' ' read -r -a CI_DOCKER_EXTRA_PARAMS <<< "${CI_DOCKER_EXTRA_PARAMS_INIT}" -fi - -if [[ "$1" == "-it" ]]; then - CI_DOCKER_EXTRA_PARAMS+=('-it') - shift 1 -fi - -while [[ "$1" == "--build-arg" ]]; do - CI_DOCKER_BUILD_ARG+=" $1" - CI_DOCKER_BUILD_ARG+=" $2" - shift 2 -done - -if [[ ! -f "${DOCKERFILE_PATH}" ]]; then - echo "Invalid Dockerfile path: \"${DOCKERFILE_PATH}\"" - exit 1 -fi - -COMMAND=("$@") - -# Validate command line arguments. -if [ "$#" -lt 1 ] || [ ! -e "${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" ]; then - supported_container_types=$( ls -1 ${SCRIPT_DIR}/Dockerfile.* | \ - sed -n 's/.*Dockerfile\.\([^\/]*\)/\1/p' | tr '\n' ' ' ) - echo "Usage: $(basename $0) CONTAINER_TYPE COMMAND" - echo " CONTAINER_TYPE can be one of [${supported_container_types}]" - echo " COMMAND is a command (with arguments) to run inside" - echo " the container." - exit 1 -fi - -# Helper function to traverse directories up until given file is found. -function upsearch () { - test / == "$PWD" && return || \ - test -e "$1" && echo "$PWD" && return || \ - cd .. && upsearch "$1" -} - -# Set up WORKSPACE. Jenkins will set them for you or we pick -# reasonable defaults if you run it outside of Jenkins. -WORKSPACE="${WORKSPACE:-${SCRIPT_DIR}/../../}" - -# Determine the docker image name -DOCKER_IMG_NAME="xgb-ci.${CONTAINER_TYPE}" - -# Append cuda version if available -CUDA_VERSION=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'CUDA_VERSION_ARG=[0-9]+\.[0-9]+' | grep -o -E '[0-9]+\.[0-9]+') -# Append jdk version if available -JDK_VERSION=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'JDK_VERSION=[0-9]+' | grep -o -E '[0-9]+') -# Append cmake version if available -CMAKE_VERSION=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'CMAKE_VERSION=[0-9]+\.[0-9]+' | grep -o -E '[0-9]+\.[0-9]+') -# Append R version if available -USE_R35=$(echo "${CI_DOCKER_BUILD_ARG}" | grep -o -E 'USE_R35=[0-9]+' | grep -o -E '[0-9]+$') -if [[ ${USE_R35} == "1" ]]; then - USE_R35="_r35" -elif [[ ${USE_R35} == "0" ]]; then - USE_R35="_no_r35" -fi -DOCKER_IMG_NAME=$DOCKER_IMG_NAME$CUDA_VERSION$JDK_VERSION$CMAKE_VERSION$USE_R35 - -# Under Jenkins matrix build, the build tag may contain characters such as -# commas (,) and equal signs (=), which are not valid inside docker image names. -DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | sed -e 's/=/_/g' -e 's/,/-/g') - -# Convert to all lower-case, as per requirement of Docker image names -DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | tr '[:upper:]' '[:lower:]') - -# Bash on Ubuntu on Windows -UBUNTU_ON_WINDOWS=$([ -e /proc/version ] && grep -l Microsoft /proc/version || echo "") -# MSYS, Git Bash, etc. -MSYS=$([ -e /proc/version ] && grep -l MINGW /proc/version || echo "") - -if [[ -z "$UBUNTU_ON_WINDOWS" ]] && [[ -z "$MSYS" ]] && [[ ! "$OSTYPE" == "darwin"* ]]; then - USER_IDS="-e CI_BUILD_UID=$( id -u ) -e CI_BUILD_GID=$( id -g ) -e CI_BUILD_USER=$( id -un ) -e CI_BUILD_GROUP=$( id -gn ) -e CI_BUILD_HOME=${WORKSPACE}" -fi - -# Print arguments. -cat <=1.4.1 -- pandas -- matplotlib -- dask -- distributed -- python-graphviz -- pytest -- jsonschema -- hypothesis -- python-graphviz -- pip -- py-ubjson -- loky -- pyarrow diff --git a/tests/ci_build/deploy_jvm_packages.sh b/tests/ci_build/deploy_jvm_packages.sh deleted file mode 100755 index 2cb108c8bc6f..000000000000 --- a/tests/ci_build/deploy_jvm_packages.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -set -e -set -x - -if [ $# -ne 1 ]; then - echo "Usage: $0 [spark version]" - exit 1 -fi - -spark_version=$1 - -cd jvm-packages -rm -rf $(find . -name target) -rm -rf ../build/ - -## Deploy JVM packages to xgboost-maven-repo - -# Scala 2.12, CPU variant -mvn --no-transfer-progress deploy -Pdefault,release-to-s3 -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true -mvn clean -mvn clean -Pdefault,release-to-s3 - -# Scala 2.12, GPU variant -mvn --no-transfer-progress install -Pgpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true -mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true - -# Scala 2.13, CPU variant -pushd .. -python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts -popd -mvn --no-transfer-progress deploy -Pdefault,release-to-s3 -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true -mvn clean -mvn clean -Pdefault,release-to-s3 - -# Scala 2.13, GPU variant -mvn --no-transfer-progress install -Pgpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true -mvn --no-transfer-progress deploy -Pgpu,release-to-s3 -pl xgboost4j-spark-gpu -Dspark.version=${spark_version} -DskipTests -Dmaven.test.skip=true - -set +x -set +e diff --git a/tests/ci_build/jenkins_tools.Groovy b/tests/ci_build/jenkins_tools.Groovy deleted file mode 100644 index 1bc2574c6ac0..000000000000 --- a/tests/ci_build/jenkins_tools.Groovy +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/groovy -// -*- mode: groovy -*- - -/* Utility functions for Jenkins */ - -// Command to run command inside a docker container -dockerRun = 'tests/ci_build/ci_build.sh' - - -/** - * Creates cmake and make builds - */ -def buildFactory(buildName, conf, restricted, build_func) { - def os = conf["os"] - def device = conf["withGpu"] ? (conf["multiGpu"] ? "mgpu" : "gpu") : "cpu" - def restricted_flag = restricted ? "restricted" : "unrestricted" - def nodeReq = "${os} && ${device} && ${restricted_flag}" - def dockerTarget = conf["withGpu"] ? "gpu" : "cpu" - [ ("${buildName}") : { build_func("${buildName}", conf, nodeReq, dockerTarget) } - ] -} - -def cmakeOptions(conf) { - return ([ - conf["withGpu"] ? '-DUSE_CUDA=ON' : '-DUSE_CUDA=OFF', - conf["withNccl"] ? '-DUSE_NCCL=ON' : '-DUSE_NCCL=OFF', - conf["withOmp"] ? '-DOPEN_MP:BOOL=ON' : ''] - ).join(" ") -} - -def getBuildName(conf) { - def gpuLabel = conf['withGpu'] ? ( (conf['multiGpu'] ? "_mgpu" : "") + "_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu" - def ompLabel = conf['withOmp'] ? "_omp" : "" - def pyLabel = "_py${conf['pythonVersion']}" - return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}" -} - -return this diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh deleted file mode 100755 index a1a023046e5b..000000000000 --- a/tests/ci_build/test_python.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash -set -e - -if [ "$#" -lt 1 ] -then - suite='' - args='' -else - suite=$1 - shift 1 - args="$@" -fi - -# Install XGBoost Python package -function install_xgboost { - wheel_found=0 - pip install --upgrade pip --user - for file in python-package/dist/*.whl - do - if [ -e "${file}" ] - then - pip install --user "${file}" - wheel_found=1 - break # need just one - fi - done - if [ "$wheel_found" -eq 0 ] - then - pushd . - cd python-package - pip install --user -v . - popd - fi -} - -function setup_pyspark_envs { - export PYSPARK_DRIVER_PYTHON=`which python` - export PYSPARK_PYTHON=`which python` - export SPARK_TESTING=1 -} - -function unset_pyspark_envs { - unset PYSPARK_DRIVER_PYTHON - unset PYSPARK_PYTHON - unset SPARK_TESTING -} - -function uninstall_xgboost { - pip uninstall -y xgboost -} - -# Run specified test suite -case "$suite" in - gpu) - source activate gpu_test - set -x - install_xgboost - setup_pyspark_envs - python -c 'from cupy.cuda import jitify; jitify._init_module()' - pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu - unset_pyspark_envs - uninstall_xgboost - set +x - ;; - - mgpu) - source activate gpu_test - set -x - install_xgboost - setup_pyspark_envs - python -c 'from cupy.cuda import jitify; jitify._init_module()' - pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu - pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask - pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark - pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_federated - unset_pyspark_envs - uninstall_xgboost - set +x - ;; - - cpu) - source activate linux_cpu_test - set -x - install_xgboost - export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1 - setup_pyspark_envs - pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python - pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_with_dask - pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_with_spark - pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/test_distributed/test_federated - unset_pyspark_envs - uninstall_xgboost - set +x - ;; - - cpu-arm64) - source activate aarch64_test - set -x - install_xgboost - setup_pyspark_envs - pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python/test_basic.py tests/python/test_basic_models.py tests/python/test_model_compatibility.py - unset_pyspark_envs - uninstall_xgboost - set +x - ;; - - *) - echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64} [extra args to pass to pytest]" - exit 1 - ;; -esac