From f21d029d181ebd65671efab1a58d904839177226 Mon Sep 17 00:00:00 2001 From: James Lin Date: Thu, 14 Sep 2023 10:32:31 -0500 Subject: [PATCH] Add more combined precision tosa tests (#637) Add the following passing element-wise tosa tests: - i32xi32_add_elem (lane=32) - i32xi32_mul_elem (lane=16) - i32xi32_sel (lane=16) - i32xi32_sub_elem (lane=32) Add the following passing combined precision element-wise tosa tests: - i8xi16_add_elem (lane=32) - i8xi16_sub_elem (lane=32) - i8xi32_add_elem (lane=32) - i8xi32_sub_elem (lane=32) - i16xi32_add_elem_v16 (lane=16) - i16xi32_sub_elem_v16 (lane=16) Add the following XFAIL combined precision element-wise tosa tests: - i16xi32_add_elem_v32 (lane=32) - i16xi32_sub_elem_v32 (lane=32) --- .../TOSA/i16xi32_add_elem_v16/defines.h | 4 ++ .../Dialect/TOSA/i16xi32_add_elem_v16/dut.cc | 20 ++++++ .../i16xi32_add_elem.mlir | 22 +++++++ .../TOSA/i16xi32_add_elem_v16/testbench.cc | 57 ++++++++++++++++++ .../TOSA/i16xi32_add_elem_v32/defines.h | 4 ++ .../i16xi32_add_elem.mlir | 23 +++++++ .../TOSA/i16xi32_add_elem_v32/testbench.cc | 57 ++++++++++++++++++ .../TOSA/i16xi32_sub_elem_v16/defines.h | 4 ++ .../Dialect/TOSA/i16xi32_sub_elem_v16/dut.cc | 20 ++++++ .../i16xi32_sub_elem.mlir | 22 +++++++ .../TOSA/i16xi32_sub_elem_v16/testbench.cc | 57 ++++++++++++++++++ .../TOSA/i16xi32_sub_elem_v32/defines.h | 4 ++ .../i16xi32_sub_elem.mlir | 23 +++++++ .../TOSA/i16xi32_sub_elem_v32/testbench.cc | 57 ++++++++++++++++++ .../Dialect/TOSA/i32xi32_add_elem/defines.h | 4 ++ .../Dialect/TOSA/i32xi32_add_elem/dut.cc | 20 ++++++ .../i32xi32_add_elem/i32xi32_add_elem.mlir | 19 ++++++ .../TOSA/i32xi32_add_elem/testbench.cc | 55 +++++++++++++++++ .../Dialect/TOSA/i32xi32_mul_elem/defines.h | 4 ++ .../Dialect/TOSA/i32xi32_mul_elem/dut.cc | 18 ++++++ .../i32xi32_mul_elem/i32xi32_mul_elem.mlir | 19 ++++++ .../TOSA/i32xi32_mul_elem/testbench.cc | 55 +++++++++++++++++ .../Dialect/TOSA/i32xi32_sel/defines.h | 4 ++ .../Dialect/TOSA/i32xi32_sel/dut.cc | 18 ++++++ .../Dialect/TOSA/i32xi32_sel/i32xi32_sel.mlir | 20 ++++++ .../Dialect/TOSA/i32xi32_sel/testbench.cc | 57 ++++++++++++++++++ .../Dialect/TOSA/i32xi32_sub_elem/defines.h | 4 ++ .../Dialect/TOSA/i32xi32_sub_elem/dut.cc | 20 ++++++ .../i32xi32_sub_elem/i32xi32_sub_elem.mlir | 19 ++++++ .../TOSA/i32xi32_sub_elem/testbench.cc | 55 +++++++++++++++++ .../Dialect/TOSA/i8xi16_add_elem/defines.h | 4 ++ .../Dialect/TOSA/i8xi16_add_elem/dut.cc | 20 ++++++ .../TOSA/i8xi16_add_elem/i8xi16_add_elem.mlir | 23 +++++++ .../Dialect/TOSA/i8xi16_add_elem/testbench.cc | 57 ++++++++++++++++++ .../Dialect/TOSA/i8xi32_add_elem/defines.h | 4 ++ .../Dialect/TOSA/i8xi32_add_elem/dut.cc | 20 ++++++ .../TOSA/i8xi32_add_elem/i8xi32_add_elem.mlir | 22 +++++++ .../Dialect/TOSA/i8xi32_add_elem/testbench.cc | 57 ++++++++++++++++++ .../Dialect/TOSA/i8xi32_sub_elem/defines.h | 4 ++ .../Dialect/TOSA/i8xi32_sub_elem/dut.cc | 20 ++++++ .../TOSA/i8xi32_sub_elem/i8xi32_sub_elem.mlir | 22 +++++++ .../Dialect/TOSA/i8xi32_sub_elem/testbench.cc | 57 ++++++++++++++++++ .../.bf16xbf16_mul_elem.mlir.swp | Bin 12288 -> 0 bytes 43 files changed, 1075 insertions(+) create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/defines.h create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/i16xi32_add_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/defines.h create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/i16xi32_add_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/defines.h create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/i16xi32_sub_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/defines.h create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/i16xi32_sub_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_add_elem/defines.h create mode 100644 test/Integration/Dialect/TOSA/i32xi32_add_elem/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_add_elem/i32xi32_add_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i32xi32_add_elem/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_mul_elem/defines.h create mode 100644 test/Integration/Dialect/TOSA/i32xi32_mul_elem/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_mul_elem/i32xi32_mul_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i32xi32_mul_elem/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sel/defines.h create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sel/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sel/i32xi32_sel.mlir create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sel/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sub_elem/defines.h create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sub_elem/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sub_elem/i32xi32_sub_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i32xi32_sub_elem/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i8xi16_add_elem/defines.h create mode 100644 test/Integration/Dialect/TOSA/i8xi16_add_elem/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i8xi16_add_elem/i8xi16_add_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i8xi16_add_elem/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i8xi32_add_elem/defines.h create mode 100644 test/Integration/Dialect/TOSA/i8xi32_add_elem/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i8xi32_add_elem/i8xi32_add_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i8xi32_add_elem/testbench.cc create mode 100644 test/Integration/Dialect/TOSA/i8xi32_sub_elem/defines.h create mode 100644 test/Integration/Dialect/TOSA/i8xi32_sub_elem/dut.cc create mode 100644 test/Integration/Dialect/TOSA/i8xi32_sub_elem/i8xi32_sub_elem.mlir create mode 100644 test/Integration/Dialect/TOSA/i8xi32_sub_elem/testbench.cc delete mode 100644 test/unit_tests/aievec_tests/bf16xbf16_mul_elem_2/.bf16xbf16_mul_elem.mlir.swp diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/defines.h b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/dut.cc b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/dut.cc new file mode 100644 index 0000000000..4396f3fb46 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int16_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 16; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(64, 64) + { + v16int16 v8 = *(v16int16 *)(v1 + v7); + v16int32 v9 = *(v16int32 *)(v2 + v7); + v16acc64 v10 = ups_to_v16acc64(v8, 0); + v16acc64 v11 = ups_to_v16acc64(v9, 0); + v16acc64 v12 = add(v10, v11); + v16int32 v13 = srs_to_v16int32(v12, 0); + *(v16int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/i16xi32_add_elem.mlir b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/i16xi32_add_elem.mlir new file mode 100644 index 0000000000..49a69c2f8f --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/i16xi32_add_elem.mlir @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32> + %2 = "tosa.add"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/testbench.cc b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/testbench.cc new file mode 100644 index 0000000000..0b0a363378 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int16_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] + in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/defines.h b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/i16xi32_add_elem.mlir b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/i16xi32_add_elem.mlir new file mode 100644 index 0000000000..ab617c85da --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/i16xi32_add_elem.mlir @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// XFAIL: * +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32> + %2 = "tosa.add"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/testbench.cc b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/testbench.cc new file mode 100644 index 0000000000..0b0a363378 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int16_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] + in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/defines.h b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/dut.cc b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/dut.cc new file mode 100644 index 0000000000..9764d4dcc0 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int16_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 16; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(64, 64) + { + v16int16 v8 = *(v16int16 *)(v1 + v7); + v16int32 v9 = *(v16int32 *)(v2 + v7); + v16acc64 v10 = ups_to_v16acc64(v8, 0); + v16acc64 v11 = ups_to_v16acc64(v9, 0); + v16acc64 v12 = sub(v10, v11); + v16int32 v13 = srs_to_v16int32(v12, 0); + *(v16int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/i16xi32_sub_elem.mlir b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/i16xi32_sub_elem.mlir new file mode 100644 index 0000000000..0af7ad905c --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/i16xi32_sub_elem.mlir @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32> + %2 = "tosa.sub"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/testbench.cc b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/testbench.cc new file mode 100644 index 0000000000..fd55c97690 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int16_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] - in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/defines.h b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/i16xi32_sub_elem.mlir b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/i16xi32_sub_elem.mlir new file mode 100644 index 0000000000..5025eb58dd --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/i16xi32_sub_elem.mlir @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// XFAIL: * +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32> + %2 = "tosa.sub"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/testbench.cc b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/testbench.cc new file mode 100644 index 0000000000..fd55c97690 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int16_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] - in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_add_elem/defines.h b/test/Integration/Dialect/TOSA/i32xi32_add_elem/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_add_elem/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i32xi32_add_elem/dut.cc b/test/Integration/Dialect/TOSA/i32xi32_add_elem/dut.cc new file mode 100644 index 0000000000..320ac22fec --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_add_elem/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int32_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 32; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(32, 32) + { + v32int32 v8 = *(v32int32 *)(v1 + v7); + v32int32 v9 = *(v32int32 *)(v2 + v7); + v32acc32 v10 = v32acc32(v8); + v32acc32 v11 = v32acc32(v9); + v32acc32 v12 = add(v10, v11); + v32int32 v13 = v32int32(v12); + *(v32int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i32xi32_add_elem/i32xi32_add_elem.mlir b/test/Integration/Dialect/TOSA/i32xi32_add_elem/i32xi32_add_elem.mlir new file mode 100644 index 0000000000..df09b385db --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_add_elem/i32xi32_add_elem.mlir @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi32>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %1 = "tosa.add"(%arg0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %1 : tensor<1024xi32> + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_add_elem/testbench.cc b/test/Integration/Dialect/TOSA/i32xi32_add_elem/testbench.cc new file mode 100644 index 0000000000..f64c807f70 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_add_elem/testbench.cc @@ -0,0 +1,55 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int32_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int32_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = (int32_t)(in0[k] + in1[k]); + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_mul_elem/defines.h b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i32xi32_mul_elem/dut.cc b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/dut.cc new file mode 100644 index 0000000000..31ecd284a7 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/dut.cc @@ -0,0 +1,18 @@ +// clang-format off +void dut(int32_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 16; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(64, 64) + { + v16int32 v8 = *(v16int32 *)(v1 + v7); + v16int32 v9 = *(v16int32 *)(v2 + v7); + v16acc64 v10 = mul_elem_16_2(v9, broadcast_zero_s32(), v8, undef_v16int32()); + v16int32 v11 = srs_to_v16int32(v10, 0); + *(v16int32 *)(v3 + v7) = v11; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i32xi32_mul_elem/i32xi32_mul_elem.mlir b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/i32xi32_mul_elem.mlir new file mode 100644 index 0000000000..e2b31d2d92 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/i32xi32_mul_elem.mlir @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi32>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %1 = "tosa.mul"(%arg0,%arg1) {shift = 0 : i32} : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %1 : tensor<1024xi32> + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_mul_elem/testbench.cc b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/testbench.cc new file mode 100644 index 0000000000..2f4711ba5d --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_mul_elem/testbench.cc @@ -0,0 +1,55 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int32_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int32_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = (int32_t)(in0[k] * in1[k]); + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_sel/defines.h b/test/Integration/Dialect/TOSA/i32xi32_sel/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sel/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i32xi32_sel/dut.cc b/test/Integration/Dialect/TOSA/i32xi32_sel/dut.cc new file mode 100644 index 0000000000..9f7a85d54a --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sel/dut.cc @@ -0,0 +1,18 @@ +// clang-format off +void dut(int32_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 16; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(64, 64) + { + v16int32 v8 = *(v16int32 *)(v1 + v7); + v16int32 v9 = *(v16int32 *)(v2 + v7); + uint32_t v10 = gt(v8, v9); + v16int32 v11 = sel(v9, v8, v10); + *(v16int32 *)(v3 + v7) = v11; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i32xi32_sel/i32xi32_sel.mlir b/test/Integration/Dialect/TOSA/i32xi32_sel/i32xi32_sel.mlir new file mode 100644 index 0000000000..daecc9cf26 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sel/i32xi32_sel.mlir @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi32>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.greater"(%arg0, %arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi1>) + %1 = "tosa.select"(%0, %arg0, %arg1) : (tensor<1024xi1>, tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %1 : tensor<1024xi32> + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_sel/testbench.cc b/test/Integration/Dialect/TOSA/i32xi32_sel/testbench.cc new file mode 100644 index 0000000000..33e1ed88ee --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sel/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int32_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int32_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = (in0[k] > in1[k]) ? in0[k] : in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_sub_elem/defines.h b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i32xi32_sub_elem/dut.cc b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/dut.cc new file mode 100644 index 0000000000..952c43ead1 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int32_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 32; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(32, 32) + { + v32int32 v8 = *(v32int32 *)(v1 + v7); + v32int32 v9 = *(v32int32 *)(v2 + v7); + v32acc32 v10 = v32acc32(v8); + v32acc32 v11 = v32acc32(v9); + v32acc32 v12 = sub(v10, v11); + v32int32 v13 = v32int32(v12); + *(v32int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i32xi32_sub_elem/i32xi32_sub_elem.mlir b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/i32xi32_sub_elem.mlir new file mode 100644 index 0000000000..7b90f7b3c3 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/i32xi32_sub_elem.mlir @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi32>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %1 = "tosa.sub"(%arg0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %1 : tensor<1024xi32> + } +} diff --git a/test/Integration/Dialect/TOSA/i32xi32_sub_elem/testbench.cc b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/testbench.cc new file mode 100644 index 0000000000..c0d3c49565 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i32xi32_sub_elem/testbench.cc @@ -0,0 +1,55 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int32_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int32_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = (int32_t)(in0[k] - in1[k]); + } +} diff --git a/test/Integration/Dialect/TOSA/i8xi16_add_elem/defines.h b/test/Integration/Dialect/TOSA/i8xi16_add_elem/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi16_add_elem/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i8xi16_add_elem/dut.cc b/test/Integration/Dialect/TOSA/i8xi16_add_elem/dut.cc new file mode 100644 index 0000000000..a50be97583 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi16_add_elem/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int8_t * restrict v1, int16_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 32; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(32, 32) + { + v32int8 v8 = *(v32int8 *)(v1 + v7); + v32int16 v9 = *(v32int16 *)(v2 + v7); + v32acc32 v10 = ups_to_v32acc32(v8, 0); + v32acc32 v11 = ups_to_v32acc32(v9, 0); + v32acc32 v12 = add(v10, v11); + v32int32 v13 = v32int32(v12); + *(v32int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i8xi16_add_elem/i8xi16_add_elem.mlir b/test/Integration/Dialect/TOSA/i8xi16_add_elem/i8xi16_add_elem.mlir new file mode 100644 index 0000000000..51cc3a1d79 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi16_add_elem/i8xi16_add_elem.mlir @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi8>, %arg1: tensor<1024xi16>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi8>) -> tensor<1024xi32> + %1 = "tosa.cast"(%arg1) : (tensor<1024xi16>) -> tensor<1024xi32> + %2 = "tosa.add"(%0,%1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i8xi16_add_elem/testbench.cc b/test/Integration/Dialect/TOSA/i8xi16_add_elem/testbench.cc new file mode 100644 index 0000000000..5aaaaa517d --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi16_add_elem/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int8_t *restrict in0, int16_t *restrict in1, int32_t *restrict out0); +void dut_ref(int8_t *in0, int16_t *in1, int32_t *out0); + +alignas(32) int8_t g_in0[IN0_SIZE]; +alignas(32) int16_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int8_t *in0, int16_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] + in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i8xi32_add_elem/defines.h b/test/Integration/Dialect/TOSA/i8xi32_add_elem/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_add_elem/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i8xi32_add_elem/dut.cc b/test/Integration/Dialect/TOSA/i8xi32_add_elem/dut.cc new file mode 100644 index 0000000000..bfa7be41f8 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_add_elem/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int8_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 32; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(32, 32) + { + v32int8 v8 = *(v32int8 *)(v1 + v7); + v32int32 v9 = *(v32int32 *)(v2 + v7); + v32acc32 v10 = ups_to_v32acc32(v8, 0); + v32acc32 v11 = v32acc32(v9); + v32acc32 v12 = add(v10, v11); + v32int32 v13 = v32int32(v12); + *(v32int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i8xi32_add_elem/i8xi32_add_elem.mlir b/test/Integration/Dialect/TOSA/i8xi32_add_elem/i8xi32_add_elem.mlir new file mode 100644 index 0000000000..05cde3d3db --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_add_elem/i8xi32_add_elem.mlir @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi8>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi8>) -> tensor<1024xi32> + %2 = "tosa.add"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i8xi32_add_elem/testbench.cc b/test/Integration/Dialect/TOSA/i8xi32_add_elem/testbench.cc new file mode 100644 index 0000000000..580f71870c --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_add_elem/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int8_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int8_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int8_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int8_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] + in1[k]; + } +} diff --git a/test/Integration/Dialect/TOSA/i8xi32_sub_elem/defines.h b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/defines.h new file mode 100644 index 0000000000..b0366ff425 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/defines.h @@ -0,0 +1,4 @@ +#pragma once +constexpr unsigned const IN0_SIZE = 1024; +constexpr unsigned const IN1_SIZE = 1024; +constexpr unsigned const OUT0_SIZE = 1024; diff --git a/test/Integration/Dialect/TOSA/i8xi32_sub_elem/dut.cc b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/dut.cc new file mode 100644 index 0000000000..0b2ad6ffd7 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/dut.cc @@ -0,0 +1,20 @@ +// clang-format off +void dut(int8_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) { + size_t v4 = 0; + size_t v5 = 1024; + size_t v6 = 32; + for (size_t v7 = v4; v7 < v5; v7 += v6) + chess_prepare_for_pipelining + chess_loop_range(32, 32) + { + v32int8 v8 = *(v32int8 *)(v1 + v7); + v32int32 v9 = *(v32int32 *)(v2 + v7); + v32acc32 v10 = ups_to_v32acc32(v8, 0); + v32acc32 v11 = v32acc32(v9); + v32acc32 v12 = sub(v10, v11); + v32int32 v13 = v32int32(v12); + *(v32int32 *)(v3 + v7) = v13; + } + return; +} +// clang-format on diff --git a/test/Integration/Dialect/TOSA/i8xi32_sub_elem/i8xi32_sub_elem.mlir b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/i8xi32_sub_elem.mlir new file mode 100644 index 0000000000..b4deda9fb1 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/i8xi32_sub_elem.mlir @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir +// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir +// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir +// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc +// RUN: mkdir -p data +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: tensor<1024xi8>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) { + %0 = "tosa.cast"(%arg0) : (tensor<1024xi8>) -> tensor<1024xi32> + %2 = "tosa.sub"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>) + return %2 : tensor<1024xi32> + } +} + diff --git a/test/Integration/Dialect/TOSA/i8xi32_sub_elem/testbench.cc b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/testbench.cc new file mode 100644 index 0000000000..004679fca0 --- /dev/null +++ b/test/Integration/Dialect/TOSA/i8xi32_sub_elem/testbench.cc @@ -0,0 +1,57 @@ +#include "../common/testbench.h" +#include "defines.h" +#include +#include +#include +#include +void dut(int8_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +void dut_ref(int8_t *in0, int32_t *in1, int32_t *out0); + +alignas(32) int8_t g_in0[IN0_SIZE]; +alignas(32) int32_t g_in1[IN1_SIZE]; +alignas(32) int32_t g_out0[OUT0_SIZE]; +alignas(32) int32_t g_out0Ref[OUT0_SIZE]; + +int main(int argc, char *argv[]) { + // XXX Figure out how to use argv with xme_ca_udm_dbg -A + std::string dataDir(TO_STR(DATA_DIR)); + srand(10); + std::generate(g_in0, g_in0 + IN0_SIZE, + [&]() { return random_integer(); }); + std::generate(g_in1, g_in1 + IN1_SIZE, + [&]() { return random_integer(); }); + + writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); + writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); + + chess_memory_fence(); + auto cyclesBegin = chess_cycle_count(); + dut(g_in0, g_in1, g_out0); + auto cyclesEnd = chess_cycle_count(); + chess_memory_fence(); + + auto cycleCount = (int)(cyclesEnd - cyclesBegin); + reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); + + writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); + + dut_ref(g_in0, g_in1, g_out0Ref); + writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); + + bool ok = true; + ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); + + if (ok) + printf("TEST PASSED\n"); + else + printf("TEST FAILED\n"); + + return ok ? 0 : 1; +} + +// in0, in1, out0 are in C4 layout. +void dut_ref(int8_t *in0, int32_t *in1, int32_t *out0) { + for (unsigned k = 0; k < OUT0_SIZE; k += 1) { + out0[k] = in0[k] - in1[k]; + } +} diff --git a/test/unit_tests/aievec_tests/bf16xbf16_mul_elem_2/.bf16xbf16_mul_elem.mlir.swp b/test/unit_tests/aievec_tests/bf16xbf16_mul_elem_2/.bf16xbf16_mul_elem.mlir.swp deleted file mode 100644 index a3e84af8a975dd463a194ac11d469395ddf7eb87..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2&2G~`5XZM%p@2Z#hd@wKySC$0MO35$N)Z(T6;cogA~HNul<7Hlmv6v1YgXWOREXEba`@){ zzHl1|_ds^Cs&L7!m-49FSN70;JFSGqv*=Z7MNgbm~V^>*h zJnEG+xuJsLLoaI04d~{jRpO~crJig5IB1c#JyYRHCa@0)Op3PKa%khH8YkiSu{-;a z0dJWJFaajO1egF5U;<2l3G812rkD`Var*~`2u=*!iCyjRksBt!1egF5U;<2l2`~XB zzyz286JP>N;6EhbheF&sD8xGy{r^Au{l9fYh|{RUsF#O@xQ|*yT}GWpeV!EJ73u|Q z3iag>+M({Ercj^I-$&F3)O!^9e~aRa2`~XBzyz286JP>NfC(^xe<1Ko0FVH*GL_Ei z?)CrxR?eDSTSog?87b{;>ewh~&dxy7*K6(ry0;&~{Lt_MR@X*tcVPxnm1-4TXgTg| zJyLrGTnQcU(MjLJ0^A6iE%em>Vg1OuJ+@W@gVyd2ORC#l0F&k{XXH^@yvH_>w%Hq)~=+SKS|2jD=FtMlfqKyI8#=X>w0fQ%^@yCA4;PN zXwAV^oqxUC(GdiBhIQ1L0mpLcOHDhp8`>^fM^zSBbc2gwWv1w?ou7iDnbt37+?_M@ zew@W@(mbpZg&kqruy}20@%lWhE_GL7JSpwaH$GK}duf8TyAE0Tf_7ItcozEQ0_) zBflt=28`^2#I)96x`gRXpdc`vZ?|%}*#)_50VYJX>^+qQeJcn6PXn8=Tl6CHg0_x` z@Ju(9HgfyTmnNs8czozI$jsnLDbvAjnlwNr`KH3E`TF&Nr<9JYKlQ}NubdX9Nyq34 P`$I3}FDMF-`6!8RxbBgF