Skip to content

Commit

Permalink
Direct Driver HAL (#816)
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental authored Oct 19, 2024
1 parent 4c965a0 commit fad9629
Show file tree
Hide file tree
Showing 77 changed files with 8,432 additions and 357 deletions.
3 changes: 1 addition & 2 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
/compiler/ @MaheshRavishankar @nirvedhmeshram @yzhang93 @Abhishek-Varma @jtuyls

# Runtime
/runtime/ @nirvedhmeshram
/runtime/src/iree-amd-aie/aie_runtime @makslevental
/runtime/ @makslevental

# AIE Passes
/compiler/plugins/target/AMD-AIE/aie @makslevental
Expand Down
74 changes: 35 additions & 39 deletions .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,10 @@ jobs:
git remote add origin $REPO_ADDRESS
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME
git reset --hard FETCH_HEAD
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10
- name: Install deps
run: |
dnf install -y almalinux-release-devel epel-release
yum remove -y openssl-devel zlib-devel || true
yum install -y protobuf-devel protobuf-compiler tmate
git -c submodule."third_party/torch-mlir".update=none \
-c submodule."third_party/stablehlo".update=none \
-c submodule."third_party/XRT".update=none \
submodule update --init --recursive --depth 1 --single-branch -j 10
- name: Python deps
run: |
Expand All @@ -69,6 +66,11 @@ jobs:
key: ${{ env.CACHE_KEY }}
restore-keys: linux-build-test-cpp-

- name: Peano dep
run: |
bash build_tools/download_peano.sh
echo "PEANO_INSTALL_DIR=$PWD/llvm-aie" >> $GITHUB_ENV
- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
Expand Down Expand Up @@ -147,60 +149,54 @@ jobs:
source .venv/bin/activate
pip install -r tests/requirements.txt
- name: Query device info
run: |
source .venv/bin/activate
echo "aie-metadata"
python build_tools/ci/amdxdna_driver_utils/amdxdna_ioctl.py --aie-metadata
echo "aie-version"
python build_tools/ci/amdxdna_driver_utils/amdxdna_ioctl.py --aie-version
echo "XRT_LITE_N_CORE_ROWS=$(python build_tools/ci/amdxdna_driver_utils/amdxdna_ioctl.py --num-rows)" >> $GITHUB_ENV
echo "XRT_LITE_N_CORE_COLS=$(python build_tools/ci/amdxdna_driver_utils/amdxdna_ioctl.py --num-cols)" >> $GITHUB_ENV
- name : E2E comparison of AIE to llvm-cpu
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
python build_tools/ci/cpu_comparison/run.py \
test_aie_vs_cpu \
$PWD/iree-install \
$PWD/llvm-aie \
--xrt-dir /opt/xilinx/xrt \
--vitis-dir /opt/Xilinx/Vitis/2024.2 \
--reset-npu-between-runs -v
--reset-npu-between-runs -v \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS
- name: E2E correctness matmul test
run: |
# Without this additional line an error like
#
# [XRT] ERROR: Failed to allocate host memory buffer (mmap(len=10616832, prot=3, flags=8193, offset=4294967296)
# failed (err=11): Resource temporarily unavailable), make sure host bank is enabled (see xbutil configure --host-mem)
# iree-amd-aie/runtime/src/iree-amd-aie/driver/xrt/direct_allocator.cc:179: RESOURCE_EXHAUSTED; could not allocate
# memory for buffer; while invoking C++ function matmul_test.generate_random_matrix; while calling import;
#
# might be observed when too much memory is allocated. This
# error was seen when running a bf16->f32 matmul with m=n=k=2304.
#
# This line was suggested at https://github.com/Xilinx/mlir-air/issues/566
#
# Note that this is only half of the fix. It is also necessary that
# the machine that CI is running on has permission to run this line.
#
# This permission can be adding by adding the line
# ```
# %github ALL=(ALL) NOPASSWD: /usr/bin/prlimit *
# ```
#
# to the file /etc/sudoers.d/github, which can be done by running
# ```
# sudo visudo -f /etc/sudoers.d/github
# ```
# on the github CI machine.
# https://stackoverflow.com/a/17567422
# shim_xdna::bo::map_drm_bo does an mmap with MAP_LOCKED
# which can fail if limit is to low
sudo prlimit -lunlimited --pid $$
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
bash build_tools/ci/run_matmul_test.sh \
test_matmuls \
iree-install \
$PWD/llvm-aie \
/opt/xilinx/xrt \
/opt/Xilinx/Vitis/2024.2
- name: Python tests
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
pytest -v tests \
--capture=tee-sys \
--iree-install-dir=$PWD/iree-install \
--peano-install-dir=$PWD/llvm-aie
--peano-install-dir=$PWD/llvm-aie \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS
- name: XRT-LITE tests
run: |
DEVICE_TEST_DIR="$PWD/iree-install/device_tests"
for t in $(ls $DEVICE_TEST_DIR); do
$DEVICE_TEST_DIR/$t --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS
done
5 changes: 4 additions & 1 deletion .github/workflows/ci-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ jobs:
git remote add origin $REPO_ADDRESS
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME
git reset --hard FETCH_HEAD
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10
git -c submodule."third_party/torch-mlir".update=none \
-c submodule."third_party/stablehlo".update=none \
-c submodule."third_party/XRT".update=none \
submodule update --init --recursive --depth 1 --single-branch -j 10
- uses: actions/setup-python@v4
with:
Expand Down
20 changes: 15 additions & 5 deletions .github/workflows/ci-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ jobs:
git remote add origin $REPO_ADDRESS
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME
git reset --hard FETCH_HEAD
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10
git -c submodule."third_party/torch-mlir".update=none \
-c submodule."third_party/stablehlo".update=none \
-c submodule."src/runtime_src/core/common/aiebu".update=none \
submodule update --init --recursive --depth 1 --single-branch -j 10
- name: Setup Cpp
uses: aminya/setup-cpp@v1
Expand All @@ -87,14 +90,18 @@ jobs:
key: ${{ env.CACHE_KEY }}
restore-keys: windows-build-test-cpp-

- name: Peano dep
run: |
.\build_tools\download_peano.ps1
Add-Content -Path $env:GITHUB_ENV -Value "PEANO_INSTALL_DIR=$PWD\llvm-aie"
- name: Build packages
run: |
$env:cache_dir = "${{ env.CACHE_DIR }}"
$env:CCACHE_COMPILERCHECK = "string:$(clang-cl.exe --version)"
.\build_tools\build_llvm.ps1
# Remove-Item -Path "$pwd\llvm-build" -Force
$env:llvm_install_dir = "$pwd\llvm-install"
echo $env:llvm_install_dir
.\build_tools.\build_test_cpp.ps1
- name: Create artifacts
Expand Down Expand Up @@ -170,6 +177,7 @@ jobs:
shell: bash
run: |
source .venv/Scripts/activate
export DEVICE_HAL=xrt
bash build_tools/ci/run_matmul_test.sh \
/c/test_matmuls \
$PWD/iree-install \
Expand All @@ -182,7 +190,8 @@ jobs:
python build_tools/ci/cpu_comparison/run.py \
/c/test_aie_vs_cpu \
$PWD/iree-install \
$PWD/llvm-aie -v
$PWD/llvm-aie -v \
--device-hal=xrt
- name: Python tests
run: |
Expand All @@ -191,5 +200,6 @@ jobs:
mkdir temp
pytest tests -sv `
--basetemp=$PWD\temp `
--iree-install-dir="$PWD/iree-install" `
--peano-install-dir="$PWD/llvm-aie"
--iree-install-dir="$PWD\iree-install" `
--peano-install-dir="$PWD\llvm-aie" `
--device-hal=xrt
Loading

0 comments on commit fad9629

Please sign in to comment.