diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index b14c26c91..100ea0eca 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -70,9 +70,7 @@ jobs: - name: Create artifacts if: ${{ !cancelled() }} run: | - rm -f iree-install/bin/clang* - rm -f iree-install/bin/llvm-link* - tar cf iree-dist-linux.tar -C iree-install . -C ../iree-build tools/testing/e2e/iree-e2e-matmul-test + tar cf iree-dist-linux.tar -C iree-install . - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -95,7 +93,7 @@ jobs: strategy: fail-fast: false matrix: - runs-on: [linux-phoenix-20240606, linux-phoenix-20240819] + runs-on: [linux-phoenix] runs-on: ${{ matrix.runs-on }} env: XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic @@ -152,6 +150,7 @@ jobs: # on the guthub CI machine. sudo prlimit -lunlimited --pid $$ + source /opt/xilinx/xrt/setup.sh bash build_tools/ci/run_matmul_test.sh \ test_matmuls \ iree-install \ @@ -163,20 +162,14 @@ jobs: - name : Smoke E2E comparison flag test run: | source .venv/bin/activate - # install requirements - # TODO(newling) make requirements.txt file - pip install numpy source /opt/xilinx/xrt/setup.sh python3 build_tools/ci/cpu_comparison/run_test.py \ test_aie_vs_cpu \ iree-install \ $PWD/llvm-aie \ - /opt/xilinx/xrt \ - /opt/Xilinx/Vitis/2024.2 \ - --reset_npu_between_runs=0 \ - --test_set='Smoke' \ - --do_not_run_aie=1 \ - --verbose=0 + --xrt-dir /opt/xilinx/xrt \ + --test-set='Smoke' \ + --do-not-run-aie # Assert that output.log is empty (because verbose=0) if [ -s output.log ]; then @@ -190,23 +183,20 @@ jobs: - name : E2E comparison of AIE to llvm-cpu run: | source .venv/bin/activate - # install requirements - # TODO(newling) make requirements.txt file - pip install numpy source /opt/xilinx/xrt/setup.sh python3 build_tools/ci/cpu_comparison/run_test.py \ test_aie_vs_cpu \ - iree-install \ + $PWD/iree-install \ $PWD/llvm-aie \ - /opt/xilinx/xrt \ - /opt/Xilinx/Vitis/2024.2 + --xrt-dir /opt/xilinx/xrt \ + --vitis-dir /opt/Xilinx/Vitis/2024.2 \ + --reset-npu-between-runs -v - name: Printing IR from aie2xclbin run: | source .venv/bin/activate + source /opt/xilinx/xrt/setup.sh bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \ iree-install \ print_ir_aie2xclbin_results \ - $PWD/llvm-aie \ - /opt/xilinx/xrt \ - /opt/Xilinx/Vitis/2024.2 + $PWD/llvm-aie diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index c299a73ed..4804d14eb 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -51,7 +51,7 @@ jobs: - name: Setup Cpp uses: aminya/setup-cpp@v1 with: - compiler: msvc + compiler: llvm vcvarsall: true cmake: true ninja: true @@ -86,9 +86,7 @@ jobs: - name: Create artifacts if: ${{ !cancelled() }} run: | - rm -f iree-install/bin/clang* - rm -f iree-install/bin/llvm-link* - tar cf iree-dist-windows.tar -C iree-install . -C ../iree-build tools/testing/e2e/iree-e2e-matmul-test.exe + tar cf iree-dist-windows.tar -C iree-install . - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -104,3 +102,59 @@ jobs: with: path: ${{ env.CACHE_DIR }} key: windows-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }} + + test_windows: + name: E2E Test windows + runs-on: windows-phoenix + needs: build_and_ctest + strategy: + fail-fast: true + steps: + - name: "Checking out repository" # for test scripts + uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + with: + submodules: false # not required for testbench + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: windows_x86_64_release_packages + + - name: Extract artifact + run: | + mkdir iree-install + tar -xf iree-dist-windows.tar -C iree-install + bash build_tools/download_peano.sh + + - name: Create venv and install dependencies + run: | + python -m venv .venv + source .venv/Scripts/activate + pip install -r tests/matmul/requirements.txt + + - name: E2E correctness matmul test + run: | + source .venv/Scripts/activate + export XILINX_XRT=/c/Xilinx/XRT + bash build_tools/ci/run_matmul_test.sh \ + /c/test_matmuls \ + $PWD/iree-install \ + $PWD/llvm-aie + + - name : E2E comparison of AIE to llvm-cpu + run: | + source .venv/Scripts/activate + export XILINX_XRT=/c/Xilinx/XRT + python build_tools/ci/cpu_comparison/run_test.py \ + /c/test_aie_vs_cpu \ + $PWD/iree-install \ + $PWD/llvm-aie -v + + - name: Printing IR from aie2xclbin + run: | + source .venv/Scripts/activate + export XILINX_XRT=/c/Xilinx/XRT + bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \ + $PWD/iree-install \ + /c/print_ir_aie2xclbin_results \ + $PWD/llvm-aie diff --git a/.gitmodules b/.gitmodules index 11197c42f..235562936 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,8 @@ [submodule "third_party/XRT"] path = third_party/XRT - url = https://github.com/Xilinx/XRT.git + url = https://github.com/nod-ai/XRT.git shallow = true + branch = iree-amd-aie-patches [submodule "third_party/mlir-air"] path = third_party/mlir-air url = https://github.com/nod-ai/mlir-air.git diff --git a/README.md b/README.md index 85a170671..e151aca19 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +[![CI Linux](https://github.com/nod-ai/iree-amd-aie/actions/workflows/ci-linux.yml/badge.svg)](https://github.com/nod-ai/iree-amd-aie/actions/workflows/ci-linux.yml) +[![CI Windows](https://github.com/nod-ai/iree-amd-aie/actions/workflows/ci-windows.yml/badge.svg)](https://github.com/nod-ai/iree-amd-aie/actions/workflows/ci-windows.yml) +[![CI MacOS](https://github.com/nod-ai/iree-amd-aie/actions/workflows/ci-macos.yml/badge.svg)](https://github.com/nod-ai/iree-amd-aie/actions/workflows/ci-macos.yml) + # AMD AIE Plugin for IREE This repository contains an early-phase IREE compiler and runtime plugin for diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh index 53d190f0f..65c61332a 100644 --- a/build_tools/ci/build_test_cpp.sh +++ b/build_tools/ci/build_test_cpp.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -eu -o errtrace +set -eux -o errtrace this_dir="$(cd $(dirname $0) && pwd)" repo_root="$(cd $this_dir/../.. && pwd)" @@ -32,6 +32,9 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then export CMAKE_TOOLCHAIN_FILE="$this_dir/linux_default_toolchain.cmake" export CC=clang export CXX=clang++ +elif [[ "$OSTYPE" == "msys"* ]]; then + export CC=clang-cl.exe + export CXX=clang-cl.exe fi export CCACHE_DIR="${cache_dir}/ccache" export CCACHE_MAXSIZE="700M" @@ -57,8 +60,6 @@ echo '{ cd $iree_dir CMAKE_ARGS="\ - -S $iree_dir \ - -B $build_dir \ -GNinja \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$install_dir \ @@ -76,14 +77,24 @@ CMAKE_ARGS="\ -DIREE_INPUT_STABLEHLO=OFF \ -DIREE_INPUT_TORCH=OFF \ -DCMAKE_OBJECT_PATH_MAX=4096 \ - -DIREE_CMAKE_PLUGIN_PATHS=$PWD/../iree-amd-aie" + -DIREE_CMAKE_PLUGIN_PATHS=$repo_root" if [[ "$OSTYPE" != "darwin"* ]]; then - CMAKE_ARGS="$CMAKE_ARGS -DIREE_EXTERNAL_HAL_DRIVERS=xrt" + cmake $CMAKE_ARGS \ + -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \ + -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \ + -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \ + -DCMAKE_C_COMPILER="${CC}" \ + -DCMAKE_CXX_COMPILER="${CXX}" \ + -DLLVM_TARGET_ARCH=X86 \ + -DLLVM_TARGETS_TO_BUILD=X86 \ + -DIREE_EXTERNAL_HAL_DRIVERS=xrt \ + -S $iree_dir -B $build_dir +else + cmake $CMAKE_ARGS \ + -S $iree_dir -B $build_dir fi -cmake $CMAKE_ARGS - echo "Building all" echo "------------" cmake --build "$build_dir" -- -k 0 @@ -99,10 +110,14 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j elif [[ "$OSTYPE" == "darwin"* ]]; then ctest --test-dir "$build_dir" -R amd-aie -E "pack_peel_pipeline_matmul|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5 -else +elif [[ "$OSTYPE" == "msys"* ]]; then # hack while windows is flaky to get past failing tests ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j --repeat until-pass:5 fi # Show ccache stats. ccache --show-stats + +rm -f "$install_dir"/bin/clang* +rm -f "$install_dir"/bin/llvm-link* +cp "$build_dir"/tools/testing/e2e/iree-e2e-matmul-test "$install_dir"/bin diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py index 1cc3b53fa..3ff4a05a5 100755 --- a/build_tools/ci/cpu_comparison/run_test.py +++ b/build_tools/ci/cpu_comparison/run_test.py @@ -4,6 +4,7 @@ import argparse import os +import platform import re import subprocess import time @@ -46,6 +47,10 @@ def find_executable(install_dir: Path, executable_name): install_dir / "bin", install_dir / "tools", ] + + if platform.system() == "Windows": + executable_name += ".exe" + for directory in search_dirs: executable_path = directory / executable_name if executable_path.is_file(): @@ -55,22 +60,37 @@ def find_executable(install_dir: Path, executable_name): ) -def shell_out(cmd: list, workdir=None, verbose=False, raiseOnError=True): +def shell_out(cmd: list, workdir=None, verbose: int = 0, raise_on_error=True, env=None): if workdir is None: workdir = Path.cwd() + workdir = Path(workdir) + os.chdir(workdir) if not isinstance(cmd, list): cmd = [cmd] for i, c in enumerate(cmd): if isinstance(c, Path): cmd[i] = str(c) - env = os.environ + if env is None: + env = {} + + env = {**env, **os.environ} + if verbose: - _cmd = " ".join([f"{k}={v}" for k, v in env.items()]) + " " + " ".join(cmd) + _cmd = " ".join(cmd) + if verbose > 1: + _cmd = " ".join([f"{k}={v}" for k, v in env.items()]) + " " + _cmd print(f"Running the following command:\n{_cmd}") - handle = subprocess.run(cmd, capture_output=True, cwd=workdir, env=env) - stderr_decode = handle.stderr.decode("utf-8").strip() - stdout_decode = handle.stdout.decode("utf-8").strip() + handle = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + ) + stdout, stderr = handle.communicate() + stderr_decode = stderr.decode("utf-8").strip() + stdout_decode = stdout.decode("utf-8").strip() if verbose: if stdout_decode: print("Standard output from script:") @@ -78,11 +98,11 @@ def shell_out(cmd: list, workdir=None, verbose=False, raiseOnError=True): if stderr_decode: print("Standard error from script:") print(stderr_decode) - if not raiseOnError and handle.returncode != 0: + if not raise_on_error and handle.returncode != 0: print( f"Error executing script, error code was {handle.returncode}. Not raising an error." ) - if raiseOnError and handle.returncode != 0: + if raise_on_error and handle.returncode != 0: raise RuntimeError( f"Error executing script, error code was {handle.returncode}" ) @@ -246,6 +266,7 @@ def __init__( self.iree_run_exe = iree_run_exe self.return_on_fail = return_on_fail self.verbose = verbose + self.xdna_datetime = None self.reset_npu_between_runs = reset_npu_between_runs self.do_not_run_aie = do_not_run_aie self.additional_aie_compilation_flags = additional_aie_compilation_flags @@ -256,12 +277,6 @@ def __init__( self.xrt_hash = "undetermined" self.xrt_release = "undetermined" self.peano_commit_hash = "undetermined" - xrt_bin_dir = xrt_dir / "bin" - xrt_smi_exe = xrt_bin_dir / "xrt-smi" - if not xrt_smi_exe.exists(): - xrt_smi_exe = xrt_bin_dir / "xbutil" - if not xrt_smi_exe.exists(): - raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}") self.reset_npu_script = file_dir.parent / "reset_npu.sh" if reset_npu_between_runs and not self.reset_npu_script.exists(): @@ -269,6 +284,24 @@ def __init__( f"The file {self.reset_npu_script} does not exist, and reset_npu_script=True" ) + # Populated at runtime + self.failures = [] + + if not isinstance(self.verbose, bool) and not isinstance(self.verbose, int): + raise ValueError( + f"verbose must be a boolean or integer, not {type(verbose)}" + ) + + if not xrt_dir: + return + + xrt_bin_dir = xrt_dir / "bin" + xrt_smi_exe = xrt_bin_dir / "xrt-smi" + if not xrt_smi_exe.exists(): + xrt_smi_exe = xrt_bin_dir / "xbutil" + if not xrt_smi_exe.exists(): + raise RuntimeError(f"Neither xrt-smi nor xbutil found in {xrt_bin_dir}") + # Get the string output of the xrt-smi 'examine' command. Expect the # string to look something like: # @@ -317,11 +350,10 @@ def __init__( # Try and get the peano commit hash. This is a bit of a hack, if it fails # peano_commit_has is left as "undetermined". - self.peano_commit_hash = "undetermined" peano_clang_path = peano_dir / "bin" / "clang" if peano_clang_path.exists(): _, clang_v_output = shell_out( - [peano_clang_path, "-v"], verbose=self.verbose, raiseOnError=False + [peano_clang_path, "-v"], verbose=self.verbose, raise_on_error=False ) peano_commit_hash = re.findall( r"clang version \d+\.\d+\.\d+ \(https://github.com/Xilinx/llvm-aie (\w+)\)", @@ -331,14 +363,6 @@ def __init__( if peano_commit_hash: self.peano_commit_hash = peano_commit_hash[0] - # Populated at runtime - self.failures = [] - - if not isinstance(self.verbose, bool) and not isinstance(self.verbose, int): - raise ValueError( - f"verbose must be a boolean or integer, not {type(verbose)}" - ) - def __str__(self): return dedent( f""" @@ -498,6 +522,7 @@ def aie_vs_llvm_cpu( return name = name_from_mlir_filename(test_file) + print(f"Running {name} test") input_args = generate_inputs(test_file, config.output_dir, seed) @@ -604,7 +629,7 @@ def run(self, config): generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32") aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0) - if config.xdna_datetime and config.xdna_datetime < 20240819: + if config.xdna_datetime and config.xdna_datetime < 20240801: for name in [ "two_matmul_switching", "matmul_f32_8_8_4", @@ -630,9 +655,10 @@ def run(self, config): generate_matmul_test( test_name, template_name, 1024, 1024, 512, "bf16", "f32" ) - aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=True - ) + if config.vitis_dir: + aie_vs_llvm_cpu( + config, test_name, tile_pipeline="pack-peel", use_ukernel=True + ) aie_vs_llvm_cpu( config, test_name, tile_pipeline="pack-peel", use_ukernel=False ) @@ -676,7 +702,7 @@ def run(self, config): ) -def getTestPartition(): +def get_test_partition(): return [ConvolutionSet(), MatmulSet(), SmokeSet()] @@ -741,9 +767,10 @@ def all_tests( verify_determinism() # Verify a very basic script runs before running the more complex tests - shell_out(["pwd"], verbose=config.verbose) + if platform.system() != "Windows": + shell_out(["pwd"], verbose=config.verbose) - partition = getTestPartition() + partition = get_test_partition() partition_names = [p.name for p in partition] map_to_partition = {p.name: p for p in partition} if "All" in test_set: @@ -757,8 +784,6 @@ def all_tests( partition = map_to_partition[test] partition.run(config) - # for p in partition: - if config.failures: # Convert the list of failed tests into a map: test name to the # number of failures (config.failures list may contain duplicates) @@ -783,55 +808,49 @@ def all_tests( parser.add_argument("output_dir", type=abs_path) parser.add_argument("iree_install_dir", type=abs_path) parser.add_argument("peano_install_dir", type=abs_path) - parser.add_argument("xrt_dir", type=abs_path) - parser.add_argument("vitis_dir", type=abs_path) + parser.add_argument("--xrt-dir", type=abs_path) + parser.add_argument("--vitis-dir", type=abs_path) # TODO(newling) make bool options boolean, not integer (tried but had issues) parser.add_argument( - "--return_on_fail", + "--return-on-fail", nargs="?", default=1, type=int, - help=( - "If 0, then the script will continue running even if a test fails, " - "enumerating all failures. Otherwise the script will exit on the first failure." + help=dedent( + """ + If 0, then the script will continue running even if a test fails, + enumerating all failures. Otherwise the script will exit on the first failure. + """ ), ) - parser.add_argument( - "--verbose", - nargs="?", - default=1, - type=int, - help="If 0, then print statements are suppressed, otherwise they are printed.", - ) + parser.add_argument("-v", "--verbose", action="count", default=0) parser.add_argument( - "--reset_npu_between_runs", - nargs="?", - default=1, - type=int, + "--reset-npu-between-runs", + action="store_true", help=( - "If 0 then the NPU is not reset between runs, otherwise it is reset. " + "If passed then the NPU is not reset between runs, otherwise it is reset. " "Resetting between runs can in theory help avoid certain types of " "errors in parts of the stack which these tests are not designed to catch." ), ) parser.add_argument( - "--do_not_run_aie", - nargs="?", - default=0, - type=int, - help=( - "If 1, then the AIE backend will not be run. This is useful for " - "ensuring that everything up to the AIE run and numerical comparison " - "is working correctly, for example if you are not on a device with " - "working AIE HW and runtime." + "--do-not-run-aie", + action="store_true", + help=dedent( + """ + If passed, then the AIE backend will not be run. This is useful for + ensuring that everything up to the AIE run and numerical comparison + is working correctly, for example if you are not on a device with + working AIE HW and runtime." + """ ), ) - partition = getTestPartition() + partition = get_test_partition() partition_names = [p.name for p in partition] partition_names_and_all = partition_names + ["All"] help_string = ( @@ -840,19 +859,22 @@ def all_tests( ) parser.add_argument( - "--test_set", + "--test-set", type=str, help=help_string, default="All", ) parser.add_argument( - "--additional_aie_compilation_flags", + "--additional-aie-compilation-flags", type=str, - help=( - "Additional flags to pass to the AIE compiler, for all tests. " - "Example, do print the IR between passes during compilation you might have: " - ' --additional_aie_compilation_flags="--mlir-print-ir-before-all --mlir-print-ir-module-scope --aie2xclbin-print-ir-before-all --aie2xclbin-print-ir-module-scope"' + help=dedent( + """ + Additional flags to pass to the AIE compiler, for all tests. + Example, do print the IR between passes during compilation you might have: + --additional_aie_compilation_flags="--mlir-print-ir-before-all --mlir-print-ir-module-scope + --aie2xclbin-print-ir-before-all --aie2xclbin-print-ir-module-scope"' + """ ), default="", ) diff --git a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh index a0772efe4..edc70df08 100755 --- a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh +++ b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh @@ -9,11 +9,12 @@ set -euo pipefail # Check for the number of provided arguments -if [ "$#" -ne 2 ] && [ "$#" -ne 5 ]; then +if [ "$#" -ne 3 ] && [ "$#" -ne 5 ]; then echo -e "Illegal number of parameters: $#." \ "\n For 2 parameters:" \ "\n 1) " \ "\n 2) " \ + "\n 3) " \ "\n For 5 parameters:" \ "\n 1) " \ "\n 2) " \ @@ -35,9 +36,9 @@ OUTPUT=`realpath "${2}"` mkdir -p ${OUTPUT} # The CI case: -if [ "$#" -eq 2 ]; then - echo "Assuming that this is the 'CI case' as 2 parameters were provided." - PEANO=/opt/llvm-aie +if [ "$#" -eq 3 ]; then + echo "Assuming that this is the 'CI case' as 3 parameters were provided." + PEANO="$3" XRT=/opt/xilinx/xrt VITIS=/opt/Xilinx/Vitis/2024.2 fi @@ -47,7 +48,6 @@ echo "xchesscc: $(find $VITIS -name xchesscc)" # The local set-paths-manually case: if [ "$#" -eq 5 ]; then - PEANO="$3" XRT="$4" VITIS="$5" fi @@ -85,16 +85,11 @@ fi if [ -d "${XRT}" ]; then XRT=`realpath "${XRT}"` -else - echo "XRT does not exist: ${XRT}" - exit 1 + source $XRT/setup.sh fi if [ -d "${VITIS}" ]; then VITIS=${VITIS} -else - echo "VITIS does not exist: ${VITIS}" - exit 1 fi # There might be a FileCheck program in the IREE_INSTALL_DIR. Check. @@ -108,8 +103,6 @@ else exit 1 fi -source $XRT/setup.sh - THIS="$(cd $(dirname $0) && pwd)" SOURCE_MLIR_FILE="${THIS}/linalg_matmul_f32.mlir" diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index c195f876f..ad7bead47 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -64,12 +64,19 @@ fi # Search for iree-compile and iree-e2e-matmul-test in the user provided directory. IREE_COMPILE_EXE="" TEST_RUNNER="" -for dir in "${IREE_INSTALL_DIR}" "${IREE_INSTALL_DIR}/bin" "${IREE_INSTALL_DIR}/tools"; do +for dir in "${IREE_INSTALL_DIR}" "${IREE_INSTALL_DIR}/bin" "${IREE_INSTALL_DIR}/tools" "${IREE_INSTALL_DIR}/tools/testing/e2e"; do + echo "Looking in $dir" if [ -f "${dir}/iree-compile" ]; then IREE_COMPILE_EXE="${dir}/iree-compile" fi - if [ -f "${dir}/testing/e2e/iree-e2e-matmul-test" ]; then - TEST_RUNNER="${dir}/testing/e2e/iree-e2e-matmul-test" + if [ -f "${dir}/iree-compile.exe" ]; then + IREE_COMPILE_EXE="${dir}/iree-compile.exe" + fi + if [ -f "${dir}/iree-e2e-matmul-test" ]; then + TEST_RUNNER="${dir}/iree-e2e-matmul-test" + fi + if [ -f "${dir}/iree-e2e-matmul-test.exe" ]; then + TEST_RUNNER="${dir}/iree-e2e-matmul-test.exe" fi done @@ -101,9 +108,8 @@ if [ -z "${4-}" ]; then else XRT_DIR=`realpath "$4"` fi -if [ ! -d "${XRT_DIR}" ]; then - echo "No directory '${XRT_DIR}' (argument 4) found." - exit 1 +if [ -d "$XRT_DIR" ]; then + source $XRT_DIR/setup.sh fi # Parameter 5) @@ -112,10 +118,6 @@ if [ -z "${5-}" ]; then else VITIS=`realpath "$5"` fi -if [ ! -d "${VITIS}" ]; then - echo "No directory '${VITIS}' (argument 5) found." - exit 1 -fi THIS_DIR="$(cd $(dirname $0) && pwd)" ROOT_DIR="$(cd $THIS_DIR/../.. && pwd)" @@ -127,7 +129,7 @@ if [ ! -f "${GENERATOR}" ]; then exit 1 fi -IREE_PYTHON3_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE:-python3}" +IREE_PYTHON3_EXECUTABLE="${IREE_PYTHON3_EXECUTABLE:-python}" if [ -z "$IREE_PYTHON3_EXECUTABLE" ]; then echo "IREE_PYTHON3_EXECUTABLE is not set." exit 1 @@ -137,12 +139,12 @@ fi GITHUB_ACTIONS="${GITHUB_ACTIONS:-false}" -source $XRT_DIR/setup.sh # Circumvent xclbin security (no longer needed as of April 2024 XDNA driver) export XRT_HACK_UNSECURE_LOADING_XCLBIN=1 cd ${OUTPUT_DIR} +export MATMUL_TESTS_RUN=0 export MATMUL_TESTS_FAILS=0 ############################################################################### @@ -352,6 +354,8 @@ function run_matmul_test() { exit 1 fi + export MATMUL_TESTS_RUN=$(( $MATMUL_TESTS_RUN+1 )) + # Re-enable exit on failure: set -e @@ -398,19 +402,18 @@ function run_matmul_test() { --iree-amd-aie-show-invoked-commands" if [ $use_ukernel -ne 0 ]; then - compilation_flags="${compilation_flags} \ --iree-amdaie-enable-ukernels=all" fi + set +e + echo "**** Generating matmul .vmfb file for ${name} ****" ${IREE_COMPILE_EXE} "${matmul_ir}" \ ${compilation_flags} -o "${matmul_vmfb}" - compileResult=$? - # Handle cases other than when compilation is expected to, and does, succeed: if [ $expect_compile_failure -ne 0 ]; then if [ $compileResult -ne 0 ]; then @@ -418,22 +421,30 @@ function run_matmul_test() { return 0 else echo "Expected compilation failure, got compilation success." - exit 1 + export MATMUL_TESTS_FAILS=$(( $MATMUL_TESTS_FAILS+1 )) + return fi else if [ $compileResult -ne 0 ]; then echo "Expected compilation success, got compilation failure." - exit 1 + export MATMUL_TESTS_FAILS=$(( $MATMUL_TESTS_FAILS+1 )) + return fi fi # Renable exit on failure: - set -e echo "**** Generating calls .vmfb file for ${name} ****" ${IREE_COMPILE_EXE} "${calls_ir}" \ --iree-hal-target-backends=${target_backend} \ -o "${calls_vmfb}" + return_status=$? + if [ $return_status -ne 0 ]; then + echo "'${name}' matmul compile failed!" + export MATMUL_TESTS_FAILS=$(( $MATMUL_TESTS_FAILS+1 )) + return + fi + compiled_time=$(date +%s%3N) echo "**** Running '${name}' matmul tests ****" @@ -444,13 +455,11 @@ function run_matmul_test() { --device=${device} \ --max_elements_to_check=${max_elements_to_check}" - set +e - total_num_runs=$(( num_repeat_runs * num_corruption_repeat_runs)) echo "**** Running '${name}' matmul test ${total_num_runs} times (command ${COMMAND}) ****" for i in $(seq 1 $num_repeat_runs); do # Only reset NPU in CI to facilitate easier local testing without sudo access. - if [ "${GITHUB_ACTIONS}" = true ]; then + if [[ "$OSTYPE" == "linux-gnu"* ]] && [ "${GITHUB_ACTIONS}" = true ]; then echo "Reset NPU" bash $THIS_DIR/reset_npu.sh fi @@ -527,12 +536,14 @@ run_matmul_test \ --use_ukernel "0" \ --num_repeat_runs "2" -run_matmul_test \ - --name_prefix "ukern" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "256" --k "256" --n "256" \ - --use_ukernel "1" +if [ -d "$VITIS" ]; then + run_matmul_test \ + --name_prefix "ukern" \ + --lhs_rhs_type "bf16" \ + --acc_type "f32" \ + --m "256" --k "256" --n "256" \ + --use_ukernel "1" +fi # Disabled until the following issue is resolved: # https://github.com/Xilinx/llvm-aie/issues/102 @@ -801,6 +812,13 @@ run_matmul_test_on_shapes ${i32_shapes_small[@]} \ --acc_type "i32" \ --num_repeat_runs "10" +i32_shapes_medium=( + '1024x1024x1024' +) +if [ "$OSTYPE" != "msys" ]; then + i32_shapes_medium+=('1536x2048x1536') +fi + run_matmul_test_on_shapes ${i32_shapes_medium[@]} \ --name_prefix "medium" \ --lower_to_aie_pipeline "objectFifo" \ @@ -866,49 +884,58 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ --acc_type "i32" \ --num_repeat_runs "2" -run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \ - --name_prefix "small" \ - --lower_to_aie_pipeline "objectFifo" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --num_repeat_runs "2" \ - --use_ukernel "1" - -run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ - --name_prefix "medium" \ - --lower_to_aie_pipeline "objectFifo" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --num_repeat_runs "2" \ - --use_ukernel "1" +if [ -d "$VITIS" ]; then + run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \ + --name_prefix "small" \ + --lower_to_aie_pipeline "objectFifo" \ + --tile_pipeline "pack-peel" \ + --lhs_rhs_type "bf16" \ + --acc_type "f32" \ + --num_repeat_runs "2" \ + --use_ukernel "1" + + run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ + --name_prefix "medium" \ + --lower_to_aie_pipeline "objectFifo" \ + --tile_pipeline "pack-peel" \ + --lhs_rhs_type "bf16" \ + --acc_type "f32" \ + --num_repeat_runs "2" \ + --use_ukernel "1" +fi ################################################################### # Chess tests ################################################################### -run_matmul_test \ - --name_prefix "chess_i32_matmul" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "32" \ - --n "32" \ - --k "32" \ - --use_chess "1" \ - --num_repeat_runs "10" +if [ -d "$VITIS" ]; then + + run_matmul_test \ + --name_prefix "chess_i32_matmul" \ + --lhs_rhs_type "i32" \ + --acc_type "i32" \ + --m "32" \ + --n "32" \ + --k "32" \ + --use_chess "1" \ + --num_repeat_runs "10" + + run_matmul_test \ + --name_prefix "chess_bf16_ukernel" \ + --lhs_rhs_type "bf16" \ + --acc_type "f32" \ + --m "64" \ + --n "64" \ + --k "64" \ + --use_chess "1" \ + --num_repeat_runs "10" \ + --use_ukernel "1" -run_matmul_test \ - --name_prefix "chess_bf16_ukernel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "64" \ - --n "64" \ - --k "64" \ - --use_chess "1" \ - --num_repeat_runs "10" \ - --use_ukernel "1" +fi + +echo "\n\n" +echo "$MATMUL_TESTS_RUN matmul tests run!" if [ $MATMUL_TESTS_FAILS -ne 0 ]; then echo "$MATMUL_TESTS_FAILS matmul tests failed! Scroll up and look for the 🦄 and 🐞..." exit 1 diff --git a/build_tools/download_peano.sh b/build_tools/download_peano.sh index 02b14a28e..70c8693f5 100644 --- a/build_tools/download_peano.sh +++ b/build_tools/download_peano.sh @@ -1,5 +1,5 @@ #!/bin/bash -RELEASE=19.0.0.2024072901+debfcac7 -pip download -q llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly -unzip -q llvm_aie*whl +RELEASE=19.0.0.2024082221+90abe71b +pip download llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly +unzip llvm_aie*whl diff --git a/cmake/iree_aie_bootgen.cmake b/cmake/iree_aie_bootgen.cmake index 526ed9a6f..a834c9427 100644 --- a/cmake/iree_aie_bootgen.cmake +++ b/cmake/iree_aie_bootgen.cmake @@ -26,28 +26,12 @@ replace_string_in_file("${_BOOTGEN_SOURCE_DIR}/main.cpp" file(GLOB _bootgen_sources "${_BOOTGEN_SOURCE_DIR}/*.c" "${_BOOTGEN_SOURCE_DIR}/*.cpp") add_library(iree-aie-bootgen STATIC ${_bootgen_sources}) -if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") +if(WIN32) target_compile_definitions(iree-aie-bootgen PUBLIC YY_NO_UNISTD_H) -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") - set(_bootgen_c_warning_ignores - -Wno-cast-qual - -Wno-covered-switch-default - -Wno-date-time - -Wno-deprecated-declarations - -Wno-deprecated-register - -Wno-dynamic-class-memaccess - -Wno-format - -Wno-implicit-fallthrough - -Wno-incompatible-function-pointer-types - -Wno-incompatible-pointer-types-discards-qualifiers - -Wno-misleading-indentation - -Wno-pointer-bool-conversion - -Wno-sign-compare - -Wno-tautological-overlap-compare - -Wno-unused) - set(_bootgen_cxx_warning_ignores - -Wno-deprecated-copy -Wno-non-virtual-dtor -Wno-overloaded-virtual - -Wno-register -Wno-reorder -Wno-suggest-override) +endif() +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") + set(_bootgen_c_warning_ignores -w) + set(_bootgen_cxx_warning_ignores -w -Wno-register) endif() target_compile_options(iree-aie-bootgen PRIVATE diff --git a/cmake/iree_aie_rt.cmake b/cmake/iree_aie_rt.cmake index 96b9791d2..dcdb7c204 100644 --- a/cmake/iree_aie_rt.cmake +++ b/cmake/iree_aie_rt.cmake @@ -254,9 +254,7 @@ set_target_properties( PROPERTIES COMPILE_OPTIONS "${_aie_runtime_compile_options}") target_compile_definitions(xaiengine PRIVATE ${XAIE_DEBUG} __AIECDO__ XAIE_FEATURE_ALL) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") - set(xaiengine_c_warning_ignores - -Wno-unused-but-set-variable - -Wno-incompatible-pointer-types) + set(xaiengine_c_warning_ignores -w) target_compile_options(xaiengine PRIVATE ${xaiengine_c_warning_ignores}) endif() # For diff --git a/cmake/iree_aie_xrt.cmake b/cmake/iree_aie_xrt.cmake index 0811fbda4..9606573f8 100644 --- a/cmake/iree_aie_xrt.cmake +++ b/cmake/iree_aie_xrt.cmake @@ -27,7 +27,8 @@ FetchContent_Declare( GIT_PROGRESS TRUE DOWNLOAD_NO_EXTRACT FALSE # prevents configure from rerunning all the time - URL_HASH MD5=84bc7c861606dc66bcfbeb660fcddfd2) + DOWNLOAD_EXTRACT_TIMESTAMP TRUE + URL_HASH MD5=84BC7C861606DC66BCFBEB660FCDDFD2) FetchContent_MakeAvailable(Boost) set(IREE_AIE_BOOST_LIBS any @@ -53,10 +54,6 @@ set(IREE_XRT_SOURCE_DIR "${IREE_AMD_AIE_SOURCE_DIR}/third_party/XRT/src") set(_xclbinutil_source_dir ${IREE_XRT_SOURCE_DIR}/runtime_src/tools/xclbinutil) -# remove ssl dep -replace_string_in_file(${_xclbinutil_source_dir}/XclBinUtilMain.cxx - "bValidateSignature == true" "false") - # transformcdo target if(NOT WIN32) replace_string_in_file(${_xclbinutil_source_dir}/aie-pdi-transform/src/CMakeLists.txt @@ -66,7 +63,7 @@ endif() # otherwise the various stois that read these will explode... # XRT/src/runtime_src/tools/xclbinutil/XclBinClass.cxx#L55 -file(READ ${IREE_XRT_SOURCE_DIR}/CMakeLists.txt _xrt_cmake_file_contents) +file(READ ${IREE_XRT_SOURCE_DIR}/CMake/settings.cmake _xrt_cmake_file_contents) string(REGEX MATCH "XRT_VERSION_MAJOR ([0-9]+)" XRT_VERSION_MAJOR ${_xrt_cmake_file_contents}) # note CMAKE_MATCH_0 is the whole match... set(XRT_VERSION_MAJOR ${CMAKE_MATCH_1}) @@ -87,24 +84,6 @@ configure_file(${IREE_XRT_SOURCE_DIR}/CMake/config/version.h.in ${IREE_XRT_SOURCE_DIR}/runtime_src/core/common/gen/version.h) configure_file(${IREE_XRT_SOURCE_DIR}/CMake/config/version.h.in ${IREE_XRT_SOURCE_DIR}/runtime_src/core/common/api/version.h) -replace_string_in_file(${IREE_XRT_SOURCE_DIR}/runtime_src/core/common/query.h - "#include " "#include ") - -set(_noop_xclbin_sig_cxx " -#include \"XclBinSignature.h\" -void signXclBinImage(const std::string& _fileOnDisk, - const std::string& _sPrivateKey, - const std::string& _sCertificate, - const std::string& _sDigestAlgorithm, - bool _bEnableDebugOutput) {} -void verifyXclBinImage(const std::string& _fileOnDisk, - const std::string& _sCertificate, - bool _bEnableDebugOutput) {} -void dumpSignatureFile(const std::string& _fileOnDisk, - const std::string& _signatureFile) {} -void getXclBinPKCSStats(const std::string& _xclBinFile, - XclBinPKCSImageStats& _xclBinPKCSImageStats) {}") -file(WRITE "${_xclbinutil_source_dir}/XclBinSignature.cxx" "${_noop_xclbin_sig_cxx}") file( GLOB @@ -151,20 +130,30 @@ target_include_directories(iree-aie-xclbinutil ${_xclbinutil_source_dir}) target_compile_options(iree-aie-xclbinutil PRIVATE - $<$:-fexceptions -frtti> - $<$:/EHsc /GR>) + $<$:-fexceptions -frtti -w> + $<$:/EHsc /GR /w>) set_target_properties(iree-aie-xclbinutil PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tools") -iree_install_targets( + +# iree_install_targets has EXCLUDE_FROM_ALL +install( TARGETS iree-aie-xclbinutil + EXPORT IREEExported-Runtime COMPONENT IREETools-Runtime - EXPORT_SET Runtime -) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + # ############################################################################## # xrt_coreutil # ############################################################################## +message(STATUS "building XRT core libs") + +set(XRT_AIE_BUILD "yes") +set(XRT_ENABLE_AIE "yes") +set(XRT_NATIVE_BUILD "yes") +add_definitions(-DXRT_ENABLE_AIE -DXRT_AIE_BUILD) + # send xrt_coreutil to trash so it doesn't get installed set(XRT_INSTALL_LIB_DIR "$ENV{TMP}") set(XRT_INSTALL_BIN_DIR "$ENV{TMP}") @@ -180,8 +169,8 @@ set(_core_libs core_common_library_objects core_common_api_library_objects core_common_xdp_profile_objects - xrt_coreutil -) + xrt_coreutil) + foreach(_core_lib IN LISTS _core_libs) target_include_directories(${_core_lib} PUBLIC ${IREE_XRT_SOURCE_DIR}/runtime_src/core/include @@ -192,7 +181,13 @@ foreach(_core_lib IN LISTS _core_libs) target_compile_definitions(${_core_lib} PUBLIC -DBOOST_BIND_GLOBAL_PLACEHOLDERS) target_compile_options(${_core_lib} PRIVATE - $<$:-fexceptions -frtti> - $<$:/EHsc /GR>) + $<$:-fexceptions -frtti -w> + $<$:/EHsc /GR /w>) target_link_libraries(${_core_lib} PUBLIC $) endforeach() + +install( + TARGETS xrt_coreutil + EXPORT IREEExported-Runtime + COMPONENT IREETools-Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp index d2a6e4e49..12769f2c9 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp +++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp @@ -840,14 +840,6 @@ static void configureAIEVecCommonLegalizations(ConversionTarget &target) { [](arith::SubFOp op) { return !isa(op.getType()); }); } -static void configureAIEVecV1Legalizations(ConversionTarget &target) { - target.addDynamicallyLegalOp( - [](arith::MulIOp op) { return !isa(op.getType()); }); - target.addDynamicallyLegalOp( - [](arith::MulFOp op) { return !isa(op.getType()); }); - target.addLegalDialect(); -} - static void configureAIEVecV2Legalizations(ConversionTarget &target) { target.addLegalOp(); target.addLegalOp(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp index c746c0877..a769a3e7c 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.cpp @@ -18,20 +18,6 @@ static const char kPackingConfigAttrName[] = "packing_config"; -namespace mlir::iree_compiler { - -/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of 64-bit -/// integer type whose values is obtained from `values`. -static ArrayAttr getIndexArrayAttr(MLIRContext *context, - ArrayRef values) { - return ArrayAttr::get( - context, llvm::map_to_vector(values, [&](int64_t value) -> Attribute { - return IntegerAttr::get(IndexType::get(context), APInt(64, value)); - })); -} - -} // namespace mlir::iree_compiler - namespace mlir::iree_compiler::AMDAIE { //===----------------------------------------------------------------------===// @@ -84,11 +70,6 @@ namespace mlir::iree_compiler { // Helpers for forming `amdaie.packing_config_level` attribute. // ===----------------------------------------------------------------------===// -static AMDAIE::PermLevelAttr getPermLevelAttr( - MLIRContext *context, ArrayRef permLevelVal) { - return AMDAIE::PermLevelAttr::get(context, permLevelVal); -} - static AMDAIE::PermLevelsAttr getPermLevelsAttr( MLIRContext *context, ArrayRef> permLevelsVal) { SmallVector permLevels; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 21afa7b97..455416f2d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -7,11 +7,12 @@ #include "XCLBinGen.h" #include -#include #include #include #include #include +// ReSharper disable once CppUnusedIncludeDirective +#include #include #include "AMDAIETargets.h" @@ -179,7 +180,10 @@ FailureOr findVitis(std::optional &vitisDir, static FailureOr findAMDAIETool(std::string toolName, const Path &amdAIEInstallDir) { - Path toolBinExe = ""; +#if defined(_WIN32) + toolName += ".exe"; +#endif // _WIN32 + Path toolBinExe; if (!amdAIEInstallDir.empty()) { toolBinExe = amdAIEInstallDir / toolName; if (std::filesystem::exists(toolBinExe)) return toolBinExe; @@ -195,7 +199,7 @@ static FailureOr findAMDAIETool(std::string toolName, if (std::filesystem::exists(toolBinExe)) return toolBinExe; llvm::errs() << "Could not find " << toolName - << ". Check your --iree-amd-aie-install-dir flag"; + << ". Check your --iree-amd-aie-install-dir flag\n"; return failure(); } @@ -276,12 +280,21 @@ std::optional dumpStrToDisk(const std::string &payload, return {}; } -// Returns either: -// -- the output of running the tool, if run without failure, or -// -- an empty optional, if the tool fails to run. -static std::optional runTool( - const std::string &program, const std::vector &args, +bool hasEnding(std::string const &fullString, std::string const &ending) { + if (fullString.length() >= ending.length()) { + return fullString.compare(fullString.length() - ending.length(), + ending.length(), ending) == 0; + } + return false; +} + +LogicalResult runTool( + const std::string &program_, const std::vector &args, bool verbose, std::optional> env = std::nullopt) { + std::string program = program_; +#if defined(_WIN32) + if (!hasEnding(program_, ".exe")) program = program_ + ".exe"; +#endif // _WIN32 if (verbose) { llvm::outs() << "\nRun: "; if (env) @@ -294,14 +307,11 @@ static std::optional runTool( // Check that 'program' is a valid path, if not, fail immediately. if (!std::filesystem::exists(program)) { llvm::errs() << "Program " << program << " does not exist\n"; - return {}; + return failure(); } // Run the program, piping any output to a temporary file (we only want to // print to terminal if verbose is true). - std::string errMsg; - sys::ProcessStatistics stats; - std::optional optStats(stats); SmallVector pArgs = {program}; pArgs.append(args.begin(), args.end()); SmallVector temporaryPath; @@ -313,20 +323,35 @@ static std::optional runTool( if (errorCode) { llvm::errs() << "Failed to create temporary file: " << errorCode.message() << "\n"; - return {}; + return failure(); } } + SmallVector> redirects; +#ifdef _WIN32 + redirects = {{}, {}, {}}; + // Explicit type but this never actually constructs an ArrayRef + std::optional> envSmallVec = std::nullopt; +#else std::string temporaryPathStr = std::string(temporaryPath.begin(), temporaryPath.size()); StringRef temporaryPathRef(temporaryPathStr); - auto tp = std::optional(temporaryPathRef); llvm::SmallVector envSmallVec; if (env) envSmallVec.append(env->begin(), env->end()); + auto tp = std::optional(temporaryPathRef); + redirects = {tp, tp, tp}; +#endif + + bool executionFailed; + std::string errMsg; + sys::ProcessStatistics stats; + std::optional optStats(stats); int result = sys::ExecuteAndWait(program, pArgs, envSmallVec, - /* redirects */ {tp, tp, tp}, 0, 0, &errMsg, - nullptr, &optStats); + /* redirects */ redirects, + /*SecondsToWait*/ 0, /*MemoryLimit*/ 0, + &errMsg, &executionFailed, &optStats); +#ifndef _WIN32 auto maybeOutputFromFile = [&]() -> std::optional { std::ifstream t(temporaryPathRef.str()); std::stringstream buffer; @@ -340,9 +365,9 @@ static std::optional runTool( if (!maybeOutputFromFile) { llvm::errs() << "Failed to open temporary file " << temporaryPathRef.str() << "\n"; - return {}; } const std::string &outputFromFile = maybeOutputFromFile.value(); +#endif if (verbose) { float totalTime = std::chrono::duration_cast>( @@ -352,17 +377,21 @@ static std::optional runTool( llvm::outs() << "\n" << exitStatusStr << " in totalTime " << totalTime << " [s]. Exit code=" << result << "\n"; +#ifndef _WIN32 llvm::outs() << outputFromFile << "\n"; +#endif } - if (result != 0) { + if (result) { llvm::errs() << "Failed to run tool: " << program << ". Error: '" << errMsg - << "'\n" - << outputFromFile; - return {}; + << "'\n"; +#ifndef _WIN32 + llvm::errs() << outputFromFile; +#endif + return failure(); } - return outputFromFile; + return success(); } static LogicalResult assembleFileUsingChess( @@ -378,12 +407,7 @@ static LogicalResult assembleFileUsingChess( args.emplace_back("-o"); args.emplace_back(outputFile); std::vector env = makeChessEnv(vitisDir, npuVersion); - if (!runTool(xChessCCExe, args, verbose, env)) { - llvm::errs() << "Failed to assemble " << inputFile << " with chess"; - return failure(); - } - - return success(); + return runTool(xChessCCExe, args, verbose, env); } std::vector makePeanoOptArgs() { @@ -435,11 +459,7 @@ static LogicalResult assembleFileUsingPeano( args.emplace_back("-o"); args.emplace_back(outputFile); if (verbose) args.emplace_back("-v"); - if (!runTool((peanoDir / "bin" / "clang").string(), args, verbose)) { - llvm::errs() << "Failed to assemble " << outputFile << ".o with peano"; - return failure(); - } - return success(); + return runTool((peanoDir / "bin" / "clang").string(), args, verbose); } static_assert(std::is_same_v vitisDir, const std::string &targetArch, bool verbose, Path peanoDir, const std::string &npuVersion, const std::optional &ukernel) { @@ -583,49 +603,45 @@ static LogicalResult generateCoreElfFiles( chessArgs.emplace_back("-o"); chessArgs.emplace_back(elfFile.string()); std::vector env = makeChessEnv(*vitisDir, npuVersion); - if (!runTool(xChessCCExe, chessArgs, verbose, env)) { - llvm::errs() << "Failed to link with xbridge"; - return failure(); - } - } else { - Path ldscriptPath = tempDir / (elfFileName + ".ld"); - { - auto ldscriptOutput = - openOutputFile(ldscriptPath.string(), &errorMessage); - if (!ldscriptOutput) { - llvm::errs() << "Failed to open ldscript file because: " - << errorMessage; - return failure(); - } - if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript( - deviceOp, ldscriptOutput->os(), col, row))) { - llvm::errs() << "failed to generate ld script for core (" << col - << "," << row << ")"; - return failure(); - } - ldscriptOutput->keep(); - } + return runTool(xChessCCExe, chessArgs, verbose, env); + } - std::string targetLower = StringRef(targetArch).lower(); - std::vector flags; - flags.emplace_back(objFile); - if (ukernel && (ukernel == "mm" || ukernel == "all")) { - flags.emplace_back(mmObjectFilePath->string()); + Path ldscriptPath = tempDir / (elfFileName + ".ld"); + { + auto ldscriptOutput = + openOutputFile(ldscriptPath.string(), &errorMessage); + if (!ldscriptOutput) { + llvm::errs() << "Failed to open ldscript file because: " + << errorMessage; + return failure(); } - flags.emplace_back("--target=" + targetLower + "-none-unknown-elf"); - flags.emplace_back("-Wl,--gc-sections"); - flags.emplace_back("-Wl,--orphan-handling=error"); - flags.emplace_back("-Wl,-T," + ldscriptPath.string()); - flags.emplace_back("-o"); - flags.emplace_back(elfFile.string()); - if (verbose) flags.emplace_back("-v"); - // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected - // automatically into the ld.lld invocation - if (!runTool((peanoDir / "bin" / "clang").string(), flags, verbose)) { - llvm::errs() << "failed to link elf file for core(" << col << "," << row - << ")"; + if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript( + deviceOp, ldscriptOutput->os(), col, row))) { + llvm::errs() << "failed to generate ld script for core (" << col << "," + << row << ")\n"; return failure(); } + ldscriptOutput->keep(); + } + + std::string targetLower = StringRef(targetArch).lower(); + std::vector flags; + flags.emplace_back(objFile); + if (ukernel && (ukernel == "mm" || ukernel == "all")) { + flags.emplace_back(mmObjectFilePath->string()); + } + flags.emplace_back("--target=" + targetLower + "-none-unknown-elf"); + flags.emplace_back("-Wl,--gc-sections"); + flags.emplace_back("-Wl,--orphan-handling=error"); + flags.emplace_back("-Wl,-T," + ldscriptPath.string()); + flags.emplace_back("-o"); + flags.emplace_back(elfFile.string()); + if (verbose) flags.emplace_back("-v"); + // we run clang (ie cc) so that libc, libm, crt0/1 paths are injected + // automatically into the ld.lld invocation + if (failed( + runTool((peanoDir / "bin" / "clang").string(), flags, verbose))) { + return failure(); } } return success(); @@ -637,7 +653,6 @@ static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, const Path &tempDir) { auto copy = cast(deviceOp.getParentOp()->clone()); deviceOp = *copy.getOps().begin(); - std::string errorMessage; PassManager passManager(context, AIE::DeviceOp::getOperationName()); applyConfigToPassManager(passManager, printIRBeforeAll, printIRAfterAll, @@ -878,75 +893,68 @@ static LogicalResult generateXCLBin( FailureOr xclbinutilBin = findAMDAIETool("iree-aie-xclbinutil", amdAIEInstallDir); - { - if (inputXclbin) { - // Create aie_partition.json. - Path aieInputPartitionJsonFile = tempDir / "aie_input_partition.json"; - std::string inputPartArg = - "AIE_PARTITION:JSON:" + aieInputPartitionJsonFile.string(); - std::vector inputFlags{"--dump-section", inputPartArg, - "--force", "--input", *inputXclbin}; - - if (!succeeded(xclbinutilBin) || - !runTool(xclbinutilBin.value().string(), inputFlags, verbose)) { - llvm::errs() << "failed to execute xclbinutil"; - return failure(); - } - auto aieInputPartitionOut = - openInputFile(aieInputPartitionJsonFile.string(), &errorMessage); - if (!aieInputPartitionOut) { - llvm::errs() << "failed to open aie_input_partition.json because: " - << errorMessage; - return failure(); - } - Expected aieInputPartitionOutValue = - llvm::json::parse(aieInputPartitionOut->getBuffer()); - json::Array *aieInputPartionPDIs; - aieInputPartionPDIs = aieInputPartitionOutValue->getAsObject() - ->getObject("aie_partition") - ->getArray("PDIs"); - auto aiePartitionOut = - openInputFile(aiePartitionJsonFile.string(), &errorMessage); - if (!aiePartitionOut) { - llvm::errs() << "failed to open aie aie_input_partition.json for " - "output because: " - << errorMessage; - return failure(); - } - llvm::Expected aiePartitionOutValue = - llvm::json::parse(aiePartitionOut->getBuffer()); - json::Array *aiePartionPDIs; - aiePartionPDIs = aiePartitionOutValue->getAsObject() - ->getObject("aie_partition") - ->getArray("PDIs"); - aieInputPartionPDIs->insert(aieInputPartionPDIs->end(), - aiePartionPDIs->begin(), - aiePartionPDIs->end()); - // rewrite aie partion json file - if (auto maybeErr = - dumpStrToDisk(formatv("{0:2}", *aieInputPartitionOutValue), - aiePartitionJsonFile.string()); - maybeErr.has_value()) { - llvm::errs() - << "failed to dump to disk aie_input_partition.json because: " - << errorMessage; - return failure(); - } - flags.insert(flags.end(), {"--input", *inputXclbin}); - } else { - flags.insert(flags.end(), {"--add-replace-section", memArg}); - } - flags.insert(flags.end(), {"--add-kernel", kernelsJsonFile.string(), - "--add-replace-section", partArg, "--force", - "--output", std::string(Output)}); + if (failed(xclbinutilBin)) return failure(); - if (!succeeded(xclbinutilBin) || - !runTool(xclbinutilBin.value().string(), flags, verbose)) { + if (!inputXclbin) { + flags.insert(flags.end(), {"--add-replace-section", memArg}); + } else { + // Create aie_partition.json. + Path aieInputPartitionJsonFile = tempDir / "aie_input_partition.json"; + std::string inputPartArg = + "AIE_PARTITION:JSON:" + aieInputPartitionJsonFile.string(); + std::vector inputFlags{"--dump-section", inputPartArg, + "--force", "--input", *inputXclbin}; + + if (failed(runTool(xclbinutilBin.value().string(), inputFlags, verbose))) { llvm::errs() << "failed to execute xclbinutil"; return failure(); } + auto aieInputPartitionOut = + openInputFile(aieInputPartitionJsonFile.string(), &errorMessage); + if (!aieInputPartitionOut) { + llvm::errs() << "failed to open aie_input_partition.json because: " + << errorMessage; + return failure(); + } + Expected aieInputPartitionOutValue = + llvm::json::parse(aieInputPartitionOut->getBuffer()); + json::Array *aieInputPartionPDIs; + aieInputPartionPDIs = aieInputPartitionOutValue->getAsObject() + ->getObject("aie_partition") + ->getArray("PDIs"); + auto aiePartitionOut = + openInputFile(aiePartitionJsonFile.string(), &errorMessage); + if (!aiePartitionOut) { + llvm::errs() << "failed to open aie aie_input_partition.json for " + "output because: " + << errorMessage; + return failure(); + } + llvm::Expected aiePartitionOutValue = + llvm::json::parse(aiePartitionOut->getBuffer()); + json::Array *aiePartionPDIs; + aiePartionPDIs = aiePartitionOutValue->getAsObject() + ->getObject("aie_partition") + ->getArray("PDIs"); + aieInputPartionPDIs->insert(aieInputPartionPDIs->end(), + aiePartionPDIs->begin(), aiePartionPDIs->end()); + // rewrite aie partion json file + if (auto maybeErr = + dumpStrToDisk(formatv("{0:2}", *aieInputPartitionOutValue), + aiePartitionJsonFile.string()); + maybeErr.has_value()) { + llvm::errs() + << "failed to dump to disk aie_input_partition.json because: " + << errorMessage; + return failure(); + } + flags.insert(flags.end(), {"--input", *inputXclbin}); } - return success(); + flags.insert(flags.end(), {"--add-kernel", kernelsJsonFile.string(), + "--add-replace-section", partArg, "--force", + "--output", std::string(Output)}); + + return runTool(xclbinutilBin.value().string(), flags, verbose); } static std::string chesshack(const std::string &input) { @@ -1019,8 +1027,8 @@ struct RemoveAlignment2FromLLVMLoadPass static LogicalResult generateUnifiedObject( MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, - bool timing, bool useChess, bool verbose, Path tempDir, - std::optional vitisDir, const std::string &targetArch, Path peanoDir, + bool timing, bool useChess, bool verbose, Path &tempDir, + std::optional vitisDir, const std::string &targetArch, Path &peanoDir, const std::string &npuVersion) { assert(deviceOp->getParentOp() && isa(deviceOp->getParentOp()) && "DeviceOp must be in a module parent"); @@ -1045,13 +1053,16 @@ static LogicalResult generateUnifiedObject( llvm::outs() << "\n"; } - if (failed(pm.run(moduleOpCopy))) - return deviceOp.emitOpError("Failed to lower to LLVM"); + if (failed(pm.run(moduleOpCopy))) { + llvm::errs() << "Failed to lower to LLVM"; + return failure(); + } llvm::LLVMContext llvmContext; auto llvmModule = translateModuleToLLVMIR(moduleOpCopy, llvmContext); if (!llvmModule) { - return deviceOp.emitOpError("Failed to translate module to LLVMIR"); + llvm::errs() << "Failed to translate module to LLVMIR"; + return failure(); } std::string inputLLStr; @@ -1066,7 +1077,7 @@ static LogicalResult generateUnifiedObject( std::string inputLLChessHackedStr = chesshack(inputLLStr); FailureOr maybeVitisDir = findVitis(vitisDir, npuVersion); if (failed(maybeVitisDir)) return failure(); - FailureOr chessIntrinsicsObjFile = assembleStringUsingChess( + FailureOr chessIntrinsicsObjFile = assembleStringUsingChess( /*inputFileStr=*/inputLLChessHackedStr, /*inputFileName=*/"input.chesshacked.ll", /*outputFileName=*/outputFile, @@ -1098,18 +1109,18 @@ static LogicalResult generateUnifiedObject( std::vector peanoArgs = makePeanoOptArgs(); args.reserve(args.size() + peanoArgs.size()); args.insert(args.end(), peanoArgs.begin(), peanoArgs.end()); - if (!runTool(peanoOptBin.string(), args, verbose)) { + if (failed(runTool(peanoOptBin.string(), args, verbose))) { llvm::errs() << "Failed to optimize ll with peano"; return failure(); } - if (!runTool( + if (failed(runTool( peanoLLCBin.string(), {OptLLVMIRFile.string(), "-O2", "--march=" + StringRef(targetArch).lower(), "--function-sections", "--filetype=obj", "-o", std::string(outputFile)}, - verbose)) { - llvm::errs() << "Failed to assemble ll with peano"; + verbose))) { + llvm::errs() << "Failed to assemble ll with peano\n"; return failure(); } } @@ -1156,8 +1167,10 @@ LogicalResult aie2xclbin( PassManager pm(ctx, AIE::DeviceOp::getOperationName()); applyConfigToPassManager(pm, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing); - if (failed(pm.run(deviceOp))) - return deviceOp.emitOpError(": NPU Instruction pipeline failed"); + if (failed(pm.run(deviceOp))) { + llvm::errs() << ": NPU Instruction pipeline failed"; + return failure(); + } FailureOr> maybeNpuInstructions = getNpuInstructions(deviceOp); @@ -1171,32 +1184,47 @@ LogicalResult aie2xclbin( auto output = openOutputFile(outputNPU, &errorMessage); if (!output) { llvm::errs() << "Failed to open npu_instructions.txt for writing because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } for (uint32_t w : npuInstructions) output->os() << llvm::format("%08X\n", w); output->keep(); - Path unifiedObj = Path(tempDir) / "input.o"; + Path tempDirPath{tempDir}; + tempDirPath.make_preferred(); + Path peanoDirPath{peanoDir}; + peanoDirPath.make_preferred(); + std::optional vitisDirPath{vitisDir}; + if (vitisDirPath) vitisDirPath->make_preferred(); + + Path unifiedObj = tempDirPath / "input.o"; if (failed(generateUnifiedObject( ctx, deviceOp, unifiedObj.string(), printIRBeforeAll, printIRAfterAll, - printIRModuleScope, timing, useChess, verbose, tempDir, vitisDir, - targetArch, peanoDir, npuVersion))) - return deviceOp.emitOpError("Failed to generate unified object"); + printIRModuleScope, timing, useChess, verbose, tempDirPath, + vitisDirPath, targetArch, peanoDirPath, npuVersion))) { + llvm::errs() << "Failed to generate unified object\n"; + return failure(); + } - if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDir, - useChess, vitisDir, targetArch, verbose, - peanoDir, npuVersion, ukernel))) - return deviceOp.emitOpError("Failed to generate core ELF file(s)"); + if (failed(generateCoreElfFiles(deviceOp, unifiedObj.string(), tempDirPath, + useChess, vitisDirPath, targetArch, verbose, + peanoDir, npuVersion, ukernel))) { + llvm::errs() << "Failed to generate core ELF file(s)\n"; + return failure(); + } if (failed(generateCDO(ctx, deviceOp, printIRBeforeAll, printIRAfterAll, - printIRModuleScope, timing, tempDir))) - return deviceOp.emitOpError("Failed to generate CDO"); + printIRModuleScope, timing, tempDirPath))) { + llvm::errs() << "Failed to generate CDO\n"; + return failure(); + } - if (failed(generateXCLBin(outputXCLBin, tempDir, xclBinKernelID, + if (failed(generateXCLBin(outputXCLBin, tempDirPath, xclBinKernelID, xclBinKernelName, xclBinInstanceName, - amdAIEInstallDir, verbose, InputXCLBin))) - return deviceOp.emitOpError("Failed to generate XCLBin"); + amdAIEInstallDir, verbose, InputXCLBin))) { + llvm::errs() << "Failed to generate XCLBin\n"; + return failure(); + } return success(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp index b3b3f8caa..6d7b3f7af 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELocalizeLogicalObjectFifo.cpp @@ -52,11 +52,6 @@ scf::ForallOp getThreadMappedForallAncestor(Operation *op) { return getMappedForallAncestor(op); } -scf::ForallOp getThreadOrBlockMappedForallAncestor(Operation *op) { - return getMappedForallAncestor(op); -} - class AMDAIELocalizeLogicalObjectfifoPass : public impl::AMDAIELocalizeLogicalObjectfifoBase< AMDAIELocalizeLogicalObjectfifoPass> { diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEVectorization.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEVectorization.cpp index fa661b784..cec0c635b 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEVectorization.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEVectorization.cpp @@ -100,6 +100,7 @@ void AMDAIEVectorizationPass::runOnOperation() { RewritePatternSet vectorizationPatterns(funcOp.getContext()); vector::populateVectorReductionToContractPatterns(vectorizationPatterns); + vector::populateSinkVectorOpsPatterns(vectorizationPatterns); // Including this pattern prevents broadcasting in vector.transfer_read ops vector::populateVectorTransferPermutationMapLoweringPatterns( diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_reference_to_allocation.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_reference_to_allocation.mlir index 8bd685873..9145088dd 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_reference_to_allocation.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_reference_to_allocation.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-amdaie-create-reference-to-allocation, iree-hoist-statically-bound-allocations))" %s | FileCheck %s +// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-amdaie-create-reference-to-allocation, iree-codegen-hoist-statically-bound-allocations))" %s | FileCheck %s // CHECK-LABEL: func.func @single_alloc // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<8x16xi32, 2 : i32> diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc index 44773bfdf..b59f7b725 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc @@ -213,6 +213,10 @@ LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc, const Path &elfPath, bool aieSim) { auto devInst = const_cast(&deviceModel.devInst); + if (!std::filesystem::exists(elfPath)) { + llvm::errs() << "elf doesn't exist: " << elfPath.string() << "\n"; + return failure(); + } TRY_XAIE_API_LOGICAL_RESULT(XAie_LoadElf, devInst, tileLoc, elfPath.string().c_str(), /*loadSym*/ aieSim); diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h index 5f7623289..94aabf6d4 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h @@ -102,7 +102,7 @@ BOTH_OSTREAM_OPS_FORALL_CDO_TYPES(OSTREAM_OP_DECL, BOTH_OSTREAM_OP) void initializeCDOGenerator(byte_ordering endianness, bool cdoDebug); /// Generates one of the aie_cdo*.bins. Takes a callback that makes the actual -/// calls to aie-rt but envelopes it with a prolog and an epilogue of calls to +/// calls to aie-rt but envelops it with a prolog and an epilogue of calls to /// cdo-driver that: /// /// 1. Starts the "cdo filestream" (literally just fopens a file) @@ -113,13 +113,13 @@ void initializeCDOGenerator(byte_ordering endianness, bool cdoDebug); /// CDO, checksum, etc. /// 5. Finishes the CDO(fcloses the file) /// -/// Note, all of the cdo APIs are simple and available at +/// Note, all the cdo APIs are simple and available at /// iree-amd-aie/third_party/bootgen/cdo-driver/cdo_driver.c LogicalResult generateCDOBinary(const std::filesystem::path &outputPath, const std::function &cb); /// "Loads" an elf which will be loaded to the program memory of a tile. Loads -/// is in quotes because where/how the elf is actaully loaded is determined by +/// is in quotes because where/how the elf is actually loaded is determined by /// the aie-rt backend; the CDO backend copies the elf byte by byte into the /// CDO. LogicalResult addElfToTile(const AMDAIEDeviceModel &deviceModel, @@ -131,7 +131,7 @@ LogicalResult resetUnResetCore(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc); /// Sets/programs locks with explicit initializers; note initialize here is a -/// misnomer because "unintialized" locks actually have their counters +/// misnomer because "uninitialized" locks actually have their counters /// initialized to zero anyway by the hardware. LogicalResult initializeLock(const AMDAIEDeviceModel &deviceModel, const Lock &lock); diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h index c485bb497..774549862 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h @@ -414,7 +414,7 @@ static_assert(XAIE_OK == 0); LLVM_DEBUG(llvm::dbgs().flush()); \ if (auto r = API(__VA_ARGS__)) \ llvm::report_fatal_error(llvm::Twine(#API " failed with ") + \ - to_string(r)); \ + to_string(r) + "\n"); \ } while (0) #define TRY_XAIE_API_LOGICAL_RESULT(API, ...) \ @@ -424,7 +424,7 @@ static_assert(XAIE_OK == 0); LLVM_DEBUG(llvm::dbgs() << "\n"); \ LLVM_DEBUG(llvm::dbgs().flush()); \ if (auto r = API(__VA_ARGS__)) { \ - llvm::errs() << #API " failed with " << r; \ + llvm::errs() << #API " failed with " << r << "\n"; \ return failure(); \ } \ } while (0) diff --git a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc index 8ae44b877..582a789fa 100644 --- a/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc +++ b/runtime/src/iree-amd-aie/driver/xrt/native_executable.cc @@ -180,12 +180,23 @@ iree_status_t iree_hal_xrt_native_executable_create( std::unique_ptr xclbin; try { xclbin = std::make_unique(xclbinVector); - } catch (std::runtime_error& e) { + } catch (std::exception& e) { return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN load error: %s", e.what()); } - device->register_xclbin(*xclbin); - xrt::hw_context context(*device, xclbin->get_uuid()); + try { + device->register_xclbin(*xclbin); + } catch (std::exception& e) { + return iree_make_status(IREE_STATUS_INTERNAL, "XCLBIN register error: %s", + e.what()); + } + xrt::hw_context context; + try { + context = {*device, xclbin->get_uuid()}; + } catch (std::exception& e) { + return iree_make_status(IREE_STATUS_INTERNAL, + "xrt::hw_context context: %s", e.what()); + } uint32_t asm_instr_index = flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal); iree_amd_aie_hal_xrt_AsmInstDef_table_t asminst_def = @@ -202,7 +213,8 @@ iree_status_t iree_hal_xrt_native_executable_create( // the second argument to the kernel and we can use group id 1. int group_id = 1; instr = std::make_unique(*device, num_instr * sizeof(uint32_t), - XCL_BO_FLAGS_CACHEABLE, group_id); + XCL_BO_FLAGS_CACHEABLE, + kernel->group_id(group_id)); } catch (...) { iree_hal_executable_destroy((iree_hal_executable_t*)executable); IREE_TRACE_ZONE_END(z0); @@ -267,8 +279,11 @@ static void iree_hal_xrt_native_executable_destroy( for (iree_host_size_t i = 0; i < executable->entry_point_count; ++i) { try { +#ifndef _WIN32 + // causes segmentation fault on windows delete executable->entry_points[i].kernel; delete executable->entry_points[i].instr; +#endif // TODO(jornt): deleting the xclbin here will result in a corrupted size // error in XRT. It looks like the xclbin needs to stay alive while the // device is alive if it has been registered. diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc index 5e01954f3..0a2a199c1 100644 --- a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc +++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc @@ -58,13 +58,6 @@ void iree_hal_xrt_device_params_initialize( out_params->arena_block_size = 32 * 1024; } -const iree_hal_xrt_device_params_t* iree_hal_xrt_device_params( - const iree_hal_device_t* base_device) { - const iree_hal_xrt_device_t* device = - iree_hal_xrt_device_const_cast(base_device); - return &device->params; -} - static iree_status_t iree_hal_xrt_device_create_internal( iree_string_view_t identifier, xrt::device *xrt_device, const iree_hal_xrt_device_params_t* params, iree_allocator_t host_allocator, diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h index 7c887610c..e55db9962 100644 --- a/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h +++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_device.h @@ -31,10 +31,6 @@ iree_status_t iree_hal_xrt_device_create( xrt::device* device, iree_allocator_t host_allocator, iree_hal_device_t** out_device); -// Returns the parameters used for creating the device. -const iree_hal_xrt_device_params_t* iree_hal_xrt_device_params( - const iree_hal_device_t* device); - #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc index b9c8aaacd..6dd8feb0a 100644 --- a/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc +++ b/runtime/src/iree-amd-aie/driver/xrt/xrt_driver.cc @@ -41,7 +41,7 @@ typedef struct iree_hal_xrt_driver_t { // Parameters used to control device behavior. iree_hal_xrt_device_params_t device_params; - xrt::device *device; + xrt::device* device; } iree_hal_xrt_driver_t; @@ -55,12 +55,6 @@ static iree_hal_xrt_driver_t* iree_hal_xrt_driver_cast( return (iree_hal_xrt_driver_t*)base_value; } -static const iree_hal_xrt_driver_t* iree_hal_xrt_driver_const_cast( - const iree_hal_driver_t* base_value) { - IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_xrt_driver_vtable); - return (const iree_hal_xrt_driver_t*)base_value; -} - static iree_status_t iree_hal_xrt_device_check_params( const iree_hal_xrt_device_params_t* params) { if (params->arena_block_size < 4096) { @@ -86,15 +80,25 @@ iree_status_t iree_hal_xrt_driver_create_internal( (char*)driver + iree_sizeof_struct(*driver)); driver->device_params = *device_params; - int device_count = xrt::system::enumerate_devices(); - if (IREE_UNLIKELY(device_count == 0)) { - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "No XRT devices found"); + try { + if (IREE_UNLIKELY(xrt::system::enumerate_devices() == 0)) { + return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + "No XRT devices found"); + } + } catch (std::exception& e) { + return iree_make_status(IREE_STATUS_INTERNAL, + "xrt::system::enumerate_devices failed: %s", + e.what()); } // Get handle to xrt device - global_device = xrt::device(0); - driver->device = &global_device; - *out_driver = (iree_hal_driver_t*)driver; + try { + global_device = xrt::device(0); + driver->device = &global_device; + } catch (std::exception& e) { + return iree_make_status(IREE_STATUS_INTERNAL, "xrt::device(0) failed: %s", + e.what()); + } + *out_driver = reinterpret_cast(driver); return iree_ok_status(); } @@ -122,13 +126,13 @@ static void iree_hal_xrt_driver_destroy(iree_hal_driver_t* base_driver) { iree_allocator_free(host_allocator, driver); IREE_TRACE_ZONE_END(z0); - return; } + static iree_status_t iree_hal_xrt_driver_dump_device_info( iree_hal_driver_t* base_driver, iree_hal_device_id_t device_id, iree_string_builder_t* builder) { iree_hal_xrt_driver_t* driver = iree_hal_xrt_driver_cast(base_driver); - xrt::device *device = driver->device; + xrt::device* device = driver->device; IREE_RETURN_IF_ERROR( iree_string_builder_append_cstring(builder, "\n- Platform:")); @@ -147,7 +151,7 @@ static iree_status_t iree_hal_xrt_driver_dump_device_info( // |out_device_info| must point to valid memory and additional data will be // appended to |buffer_ptr| and the new pointer is returned. static iree_status_t iree_hal_xrt_populate_device_info( - xrt::device *device, uint8_t* buffer_ptr, uint8_t** out_buffer_ptr, + xrt::device* device, uint8_t* buffer_ptr, uint8_t** out_buffer_ptr, iree_hal_device_info_t* out_device_info) { *out_buffer_ptr = buffer_ptr; @@ -183,7 +187,7 @@ static iree_status_t iree_hal_xrt_driver_query_available_devices( iree_host_size_t* out_device_info_count, iree_hal_device_info_t** out_device_infos) { iree_hal_xrt_driver_t* driver = iree_hal_xrt_driver_cast(base_driver); - xrt::device *device = driver->device; + xrt::device* device = driver->device; // Allocate the return infos and populate with the devices. iree_hal_device_info_t* device_infos = NULL; iree_host_size_t single_info_size = diff --git a/sync_deps.py b/sync_deps.py index 9f8444748..0d0e470c7 100644 --- a/sync_deps.py +++ b/sync_deps.py @@ -7,7 +7,7 @@ ### Update with: shark-workspace pin PINNED_VERSIONS = { - "iree": "5a48912c52f65ead960bec9bfde5a836d1b02ab2", + "iree": "60b65f30c932eaf967922785253a85a1aa14cebb", } ORIGINS = { diff --git a/tests/matmul/requirements.txt b/tests/matmul/requirements.txt index 62116914f..3f48cf4a4 100644 --- a/tests/matmul/requirements.txt +++ b/tests/matmul/requirements.txt @@ -1,3 +1,4 @@ PyYAML>=5.4.1 requests>=2.28.0 -enum_tools==0.6.4 \ No newline at end of file +enum_tools==0.6.4 +numpy<2 diff --git a/tests/samples/conv_pipeline_e2e.mlir b/tests/samples/conv_pipeline_e2e.mlir index 7c6957017..ffe8222f8 100644 --- a/tests/samples/conv_pipeline_e2e.mlir +++ b/tests/samples/conv_pipeline_e2e.mlir @@ -24,7 +24,7 @@ func.func @conv_2d_nhwc_hwcf_q(%arg0: tensor<2x14x14x32xi8>, %arg1: tensor<3x3x3 return %2 : tensor<2x12x12x64xi32> } -// CHECK-LABEL: hal.executable.export public @conv_2d_nhwc_hwcf_q_dispatch_0_conv_2d_nhwc_hwcf_q_2x12x12x64x3x3x32_i8xi8xi32xi32xi32 +// CHECK-LABEL: hal.executable.export public @conv_2d_nhwc_hwcf_q_dispatch_0_conv_2d_nhwc_hwcf_2x12x12x64x3x3x32_i8xi8xi32 // CHECK: aie.device(npu1_4col) // CHECK: aie.shim_dma_allocation // CHECK: aie.shim_dma_allocation diff --git a/third_party/XRT b/third_party/XRT index 8d070495d..a9fdf618c 160000 --- a/third_party/XRT +++ b/third_party/XRT @@ -1 +1 @@ -Subproject commit 8d070495d092a2e773f2360cbff4fa29138da67d +Subproject commit a9fdf618ceba32d28bbf6715a5ee627a51a74b24