Cache Triton compilation artifacts during CI. #1
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# integration-tests.in.yml is used to generate integration-tests.yml by | ||
Check failure on line 1 in .github/workflows/integration-tests.in.yml
|
||
# expanding yaml anchors, because github actions don't support them | ||
# (https://github.com/actions/runner/issues/1182). pre-commit will do this for | ||
# you automatically. | ||
name: Integration Tests | ||
on: | ||
workflow_dispatch: | ||
pull_request: | ||
# You can name your branch dev-foo to get CI runs. | ||
branches: [main, 'dev-**'] | ||
merge_group: | ||
branches: [main, 'dev-**'] | ||
types: [checks_requested] | ||
push: | ||
branches: [main] | ||
concurrency: | ||
group: ${{ github.ref }} | ||
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }} | ||
permissions: read-all | ||
env: | ||
TRITON_BUILD_WITH_CLANG_LLD: "TRUE" | ||
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE" | ||
TRITON_DISABLE_LINE_INFO: 1 | ||
jobs: | ||
Runner-Preparation: | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 30 | ||
outputs: | ||
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }} | ||
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }} | ||
steps: | ||
- name: Decide pre-submit integration test enablement | ||
# Always enable integration tests for pre-submit pull requests. | ||
if: github.event_name == 'pull_request' | ||
run: | | ||
echo "enable_integration=true" >> $GITHUB_ENV | ||
- name: Checkout post-submit commits | ||
if: github.event_name == 'push' | ||
uses: actions/checkout@v4 | ||
with: | ||
# Only fetch two commits to check the latest changed files. | ||
fetch-depth: 2 | ||
- name: Detect post-submit changed files | ||
id: detect-change | ||
if: github.event_name == 'push' | ||
uses: tj-actions/changed-files@v44 | ||
with: | ||
# Monitor hash or version files in the cmake/ directory for changes. | ||
files: | | ||
cmake/*.txt | ||
- name: Decide post-submit integration test enablement | ||
# Only enable integration tests for post-submit when build dependency changes. | ||
if: github.event_name == 'push' && steps.detect-change.outputs.any_changed == 'true' | ||
run: | | ||
echo "enable_integration=true" >> $GITHUB_ENV | ||
- name: Prepare runner matrix | ||
id: set-matrix | ||
if: env.enable_integration == 'true' | ||
run: | | ||
if [ x"${{ github.repository }}" == x"openai/triton" ]; then | ||
echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]' | ||
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]' | ||
else | ||
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]' | ||
echo '::set-output name=matrix-HIP::["ubuntu-latest"]' | ||
fi | ||
pre-commit: | ||
name: pre-commit (code formatting) | ||
needs: Runner-Preparation | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.12' | ||
cache: 'pip' | ||
- name: Compute hash of pre-commit config | ||
id: cache-key | ||
run: | | ||
echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml)" >> $GITHUB_OUTPUT | ||
shell: bash | ||
- name: Cache pre-commit's cache dir | ||
uses: actions/cache@v4 | ||
with: | ||
# Note that we cannot use environment variables here given there is | ||
# no shell to interpret them in the paths. | ||
path: | | ||
~/.cache/pre-commit | ||
key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }} | ||
- name: Check pre-commit | ||
run: | | ||
python3 -m pip install --upgrade pre-commit | ||
# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed | ||
python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true | ||
# If first run of yapf worked and made changes reset the tree to the original state | ||
git reset --hard | ||
python3 -m pre_commit run --all-files --verbose | ||
Integration-Tests: | ||
needs: Runner-Preparation | ||
runs-on: ${{ matrix.runner }} | ||
timeout-minutes: 30 | ||
strategy: | ||
matrix: | ||
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}} | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: "true" | ||
- &compute-cache-keys-step | ||
name: Compute cache keys | ||
id: cache-key | ||
run: | | ||
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT | ||
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT | ||
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT | ||
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT | ||
shell: bash | ||
- &cache-build-dependencies-step | ||
name: Cache build dependencies | ||
uses: actions/cache@v4 | ||
with: | ||
# Note that we cannot use environment variables here given there is | ||
# no shell to interpret them in the paths. | ||
path: | | ||
~/.triton/llvm | ||
~/.triton/nvidia | ||
~/.triton/pybind11 | ||
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }} | ||
# Cache ~/.triton/cache because the vast majority of unit test time is | ||
# spent compiling. Triton won't (well, should not) use these cached files | ||
# if something internal to Triton changes, because Triton's internal | ||
# source code is part of the cache key. | ||
# | ||
# Similarly, cache ~/.cache/ccache to speed up compilation. | ||
# | ||
# On branch `main` we always start from an empty cache, i.e. we skip the | ||
# "restore" step. This is to prevent the caches from accumulating stale | ||
# files over time. | ||
- &restore-build-artifacts-step | ||
name: Restore cache of ccache and Triton compilation artifacts | ||
if: github.ref != 'refs/heads/main' | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: ~/.triton/cache ~/.cache/ccache | ||
# Restore the most recent cache entry. | ||
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}- | ||
# We expect this cache key never to hit and for us to fall back | ||
# unconditionally to the restore-key, so it doesn't actually matter | ||
# what we put here (so long as it doesn't hit an existing key). | ||
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | ||
- &inspect-cache-directory-step | ||
name: Inspect cache directory | ||
run: | | ||
mkdir -p ~/.triton | ||
ls -alh ~/.triton | ||
- name: Update PATH | ||
run: | | ||
echo "$HOME/.local/bin" >> $GITHUB_PATH | ||
- name: Install apt dependencies | ||
run: | | ||
sudo apt-get update -y | ||
sudo apt-get install -y ccache clang lld | ||
- name: Install pip dependencies | ||
run: | | ||
python3 -m pip install --upgrade pip | ||
python3 -m pip install wheel cmake==3.24 ninja pytest-xdist lit | ||
- name: Install Triton | ||
env: | ||
TRITON_BUILD_PROTON: "true" | ||
TRITON_BUILD_WITH_CCACHE: "true" | ||
CUDA_HOME: "/usr/local/cuda" | ||
run: | | ||
echo "PATH is '$PATH'" | ||
cd python | ||
python3 -m pip install --no-build-isolation -vvv '.[tests]' | ||
- &run-lit-tests-step | ||
name: Run lit tests | ||
run: | | ||
cd python | ||
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | ||
if [ ! -d "${LIT_TEST_DIR}" ]; then | ||
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1 | ||
fi | ||
lit -v "${LIT_TEST_DIR}" | ||
- name: Run python tests on CUDA | ||
run: | | ||
cd python/test/unit | ||
python3 -m pytest -vvv -n 8 --ignore=hopper/test_flashattention.py --ignore=runtime --ignore=language/test_line_info.py --ignore=language/test_subprocess.py | ||
python3 -m pytest -vvv -n 8 language/test_subprocess.py | ||
# Run runtime tests serially to avoid race condition with cache handling | ||
python3 -m pytest -vvv runtime/ | ||
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | ||
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv language/test_line_info.py | ||
# Run hopper/test_flashattention.py separately to avoid out of gpu memory | ||
python3 -m pytest -vs hopper/test_flashattention.py | ||
- name: Run interpreter tests | ||
if: ${{matrix.runner[0] == 'self-hosted' && matrix.runner[1] == 'H100'}} | ||
env: | ||
TRITON_INTERPRET: "1" | ||
run: | | ||
cd python/test/unit | ||
python3 -m pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \ | ||
language/test_random.py language/test_block_pointer.py operators/test_flash_attention.py::test_op \ | ||
--device cpu | ||
- &run-cpp-unittests-step | ||
name: Run C++ unittests | ||
run: | | ||
cd python | ||
cd "build/$(ls build | grep -i cmake)" | ||
ctest | ||
- name: Run Proton tests | ||
env: | ||
LD_LIBRARY_PATH: "/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH" | ||
run: | | ||
cd third_party/proton | ||
python3 -m pytest -vvv test | ||
# If we're on branch `main`, save the ccache Triton compilation artifacts | ||
# to the cache so they can be used by other (non-main) CI runs. | ||
# | ||
# (It wouldn't be a problem to save the cache on every run, because github | ||
# evicts cache entries LRU, but maybe this saves a bit of time in CI.) | ||
- &save-build-artifacts-step | ||
name: Save ccache and Triton compilation artifacts to cache | ||
if: github.ref == 'refs/heads/main' | ||
uses: actions/cache/save@v4 | ||
with: | ||
path: ~/.triton/cache ~/.cache/ccache | ||
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | ||
- &inspect-cache-directories-step | ||
name: Inspect cache directories | ||
run: | | ||
mkdir -p ~/.triton | ||
ls -alh ~/.triton | ||
du -sh ~/.triton/** | ||
mkdir -p ~/.cache/ccache | ||
ls -alh ~/.cache/ccache | ||
du -sh ~/.cache/ccache | ||
Integration-Tests-AMD: | ||
needs: Runner-Preparation | ||
runs-on: ${{ matrix.runner }} | ||
timeout-minutes: 30 | ||
strategy: | ||
matrix: | ||
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}} | ||
container: | ||
image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2 | ||
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: 'true' | ||
- *compute-cache-keys-step | ||
- *cache-build-dependencies-step | ||
- *restore-build-artifacts-step | ||
- *inspect-cache-directory-step | ||
- name: Update PATH | ||
run: | | ||
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH | ||
- name: Install pip dependencies | ||
run: | | ||
python3 -m pip install lit | ||
- name: Install Triton | ||
run: | | ||
echo "PATH is '$PATH'" | ||
pip uninstall -y triton | ||
cd python | ||
pip install -v -e . | ||
- *run-lit-tests-step | ||
- name: Run python tests on HIP | ||
run: | | ||
pytest --capture=tee-sys -rfs -vvv python/tutorials/06-fused-attention.py | ||
cd python/test/unit | ||
pytest --capture=tee-sys -rfs -vvv -n 32 language operators \ | ||
--ignore=language/test_conversions.py \ | ||
--ignore=language/test_line_info.py \ | ||
--ignore=operators/test_blocksparse.py | ||
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | ||
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv -n 8 language/test_line_info.py | ||
# Run runtime tests serially to avoid race condition with cache handling | ||
python3 -m pytest -vvv runtime | ||
# Run hopper tests | ||
python3 -m pytest -vs hopper/test_mixed_io.py \ | ||
hopper/test_tma_store_gemm.py | ||
- *run-cpp-unittests-step | ||
- *save-build-artifacts-step | ||
- *inspect-cache-directories-step |