Skip to content

Commit

Permalink
Add more combined precision tosa tests (#637)
Browse files Browse the repository at this point in the history
Add the following passing element-wise tosa tests:
- i32xi32_add_elem (lane=32)
- i32xi32_mul_elem (lane=16)
- i32xi32_sel (lane=16)
- i32xi32_sub_elem (lane=32)

Add the following passing combined precision element-wise tosa tests:
- i8xi16_add_elem (lane=32)
- i8xi16_sub_elem (lane=32)
- i8xi32_add_elem (lane=32)
- i8xi32_sub_elem (lane=32)
- i16xi32_add_elem_v16 (lane=16)
- i16xi32_sub_elem_v16 (lane=16)

Add the following XFAIL combined precision element-wise tosa tests:
- i16xi32_add_elem_v32 (lane=32)
- i16xi32_sub_elem_v32 (lane=32)
  • Loading branch information
jamestcl-amd authored Sep 14, 2023
1 parent 9834100 commit f21d029
Show file tree
Hide file tree
Showing 43 changed files with 1,075 additions and 0 deletions.
4 changes: 4 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/defines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once
constexpr unsigned const IN0_SIZE = 1024;
constexpr unsigned const IN1_SIZE = 1024;
constexpr unsigned const OUT0_SIZE = 1024;
20 changes: 20 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/dut.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// clang-format off
void dut(int16_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) {
size_t v4 = 0;
size_t v5 = 1024;
size_t v6 = 16;
for (size_t v7 = v4; v7 < v5; v7 += v6)
chess_prepare_for_pipelining
chess_loop_range(64, 64)
{
v16int16 v8 = *(v16int16 *)(v1 + v7);
v16int32 v9 = *(v16int32 *)(v2 + v7);
v16acc64 v10 = ups_to_v16acc64(v8, 0);
v16acc64 v11 = ups_to_v16acc64(v9, 0);
v16acc64 v12 = add(v10, v11);
v16int32 v13 = srs_to_v16int32(v12, 0);
*(v16int32 *)(v3 + v7) = v13;
}
return;
}
// clang-format on
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Copyright (C) 2023, Advanced Micro Devices, Inc.

// REQUIRES: valid_xchess_license
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc
// RUN: mkdir -p data
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
// CHECK: TEST PASSED

module {
func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) {
%0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32>
%2 = "tosa.add"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>)
return %2 : tensor<1024xi32>
}
}

57 changes: 57 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_add_elem_v16/testbench.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "../common/testbench.h"
#include "defines.h"
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0);
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0);

alignas(32) int16_t g_in0[IN0_SIZE];
alignas(32) int32_t g_in1[IN1_SIZE];
alignas(32) int32_t g_out0[OUT0_SIZE];
alignas(32) int32_t g_out0Ref[OUT0_SIZE];

int main(int argc, char *argv[]) {
// XXX Figure out how to use argv with xme_ca_udm_dbg -A
std::string dataDir(TO_STR(DATA_DIR));
srand(10);
std::generate(g_in0, g_in0 + IN0_SIZE,
[&]() { return random_integer<int16_t>(); });
std::generate(g_in1, g_in1 + IN1_SIZE,
[&]() { return random_integer<int32_t>(); });

writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt");
writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt");

chess_memory_fence();
auto cyclesBegin = chess_cycle_count();
dut(g_in0, g_in1, g_out0);
auto cyclesEnd = chess_cycle_count();
chess_memory_fence();

auto cycleCount = (int)(cyclesEnd - cyclesBegin);
reportCycleCount(cycleCount, dataDir + "/cycle_count.txt");

writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt");

dut_ref(g_in0, g_in1, g_out0Ref);
writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt");

bool ok = true;
ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE);

if (ok)
printf("TEST PASSED\n");
else
printf("TEST FAILED\n");

return ok ? 0 : 1;
}

// in0, in1, out0 are in C4 layout.
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) {
for (unsigned k = 0; k < OUT0_SIZE; k += 1) {
out0[k] = in0[k] + in1[k];
}
}
4 changes: 4 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/defines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once
constexpr unsigned const IN0_SIZE = 1024;
constexpr unsigned const IN1_SIZE = 1024;
constexpr unsigned const OUT0_SIZE = 1024;
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Copyright (C) 2023, Advanced Micro Devices, Inc.

// XFAIL: *
// REQUIRES: valid_xchess_license
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc
// RUN: mkdir -p data
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
// CHECK: TEST PASSED

module {
func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) {
%0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32>
%2 = "tosa.add"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>)
return %2 : tensor<1024xi32>
}
}

57 changes: 57 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_add_elem_v32/testbench.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "../common/testbench.h"
#include "defines.h"
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0);
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0);

alignas(32) int16_t g_in0[IN0_SIZE];
alignas(32) int32_t g_in1[IN1_SIZE];
alignas(32) int32_t g_out0[OUT0_SIZE];
alignas(32) int32_t g_out0Ref[OUT0_SIZE];

int main(int argc, char *argv[]) {
// XXX Figure out how to use argv with xme_ca_udm_dbg -A
std::string dataDir(TO_STR(DATA_DIR));
srand(10);
std::generate(g_in0, g_in0 + IN0_SIZE,
[&]() { return random_integer<int16_t>(); });
std::generate(g_in1, g_in1 + IN1_SIZE,
[&]() { return random_integer<int32_t>(); });

writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt");
writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt");

chess_memory_fence();
auto cyclesBegin = chess_cycle_count();
dut(g_in0, g_in1, g_out0);
auto cyclesEnd = chess_cycle_count();
chess_memory_fence();

auto cycleCount = (int)(cyclesEnd - cyclesBegin);
reportCycleCount(cycleCount, dataDir + "/cycle_count.txt");

writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt");

dut_ref(g_in0, g_in1, g_out0Ref);
writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt");

bool ok = true;
ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE);

if (ok)
printf("TEST PASSED\n");
else
printf("TEST FAILED\n");

return ok ? 0 : 1;
}

// in0, in1, out0 are in C4 layout.
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) {
for (unsigned k = 0; k < OUT0_SIZE; k += 1) {
out0[k] = in0[k] + in1[k];
}
}
4 changes: 4 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/defines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once
constexpr unsigned const IN0_SIZE = 1024;
constexpr unsigned const IN1_SIZE = 1024;
constexpr unsigned const OUT0_SIZE = 1024;
20 changes: 20 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/dut.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// clang-format off
void dut(int16_t * restrict v1, int32_t * restrict v2, int32_t * restrict v3) {
size_t v4 = 0;
size_t v5 = 1024;
size_t v6 = 16;
for (size_t v7 = v4; v7 < v5; v7 += v6)
chess_prepare_for_pipelining
chess_loop_range(64, 64)
{
v16int16 v8 = *(v16int16 *)(v1 + v7);
v16int32 v9 = *(v16int32 *)(v2 + v7);
v16acc64 v10 = ups_to_v16acc64(v8, 0);
v16acc64 v11 = ups_to_v16acc64(v9, 0);
v16acc64 v12 = sub(v10, v11);
v16int32 v13 = srs_to_v16int32(v12, 0);
*(v16int32 *)(v3 + v7) = v13;
}
return;
}
// clang-format on
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Copyright (C) 2023, Advanced Micro Devices, Inc.

// REQUIRES: valid_xchess_license
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc
// RUN: mkdir -p data
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
// CHECK: TEST PASSED

module {
func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) {
%0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32>
%2 = "tosa.sub"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>)
return %2 : tensor<1024xi32>
}
}

57 changes: 57 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_sub_elem_v16/testbench.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "../common/testbench.h"
#include "defines.h"
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0);
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0);

alignas(32) int16_t g_in0[IN0_SIZE];
alignas(32) int32_t g_in1[IN1_SIZE];
alignas(32) int32_t g_out0[OUT0_SIZE];
alignas(32) int32_t g_out0Ref[OUT0_SIZE];

int main(int argc, char *argv[]) {
// XXX Figure out how to use argv with xme_ca_udm_dbg -A
std::string dataDir(TO_STR(DATA_DIR));
srand(10);
std::generate(g_in0, g_in0 + IN0_SIZE,
[&]() { return random_integer<int16_t>(); });
std::generate(g_in1, g_in1 + IN1_SIZE,
[&]() { return random_integer<int32_t>(); });

writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt");
writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt");

chess_memory_fence();
auto cyclesBegin = chess_cycle_count();
dut(g_in0, g_in1, g_out0);
auto cyclesEnd = chess_cycle_count();
chess_memory_fence();

auto cycleCount = (int)(cyclesEnd - cyclesBegin);
reportCycleCount(cycleCount, dataDir + "/cycle_count.txt");

writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt");

dut_ref(g_in0, g_in1, g_out0Ref);
writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt");

bool ok = true;
ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE);

if (ok)
printf("TEST PASSED\n");
else
printf("TEST FAILED\n");

return ok ? 0 : 1;
}

// in0, in1, out0 are in C4 layout.
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) {
for (unsigned k = 0; k < OUT0_SIZE; k += 1) {
out0[k] = in0[k] - in1[k];
}
}
4 changes: 4 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/defines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once
constexpr unsigned const IN0_SIZE = 1024;
constexpr unsigned const IN1_SIZE = 1024;
constexpr unsigned const OUT0_SIZE = 1024;
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Copyright (C) 2023, Advanced Micro Devices, Inc.

// XFAIL: *
// REQUIRES: valid_xchess_license
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc
// RUN: mkdir -p data
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
// CHECK: TEST PASSED

module {
func.func @dut(%arg0: tensor<1024xi16>, %arg1: tensor<1024xi32>) -> (tensor<1024xi32>) {
%0 = "tosa.cast"(%arg0) : (tensor<1024xi16>) -> tensor<1024xi32>
%2 = "tosa.sub"(%0,%arg1) : (tensor<1024xi32>, tensor<1024xi32>) -> (tensor<1024xi32>)
return %2 : tensor<1024xi32>
}
}

57 changes: 57 additions & 0 deletions test/Integration/Dialect/TOSA/i16xi32_sub_elem_v32/testbench.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "../common/testbench.h"
#include "defines.h"
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
void dut(int16_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0);
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0);

alignas(32) int16_t g_in0[IN0_SIZE];
alignas(32) int32_t g_in1[IN1_SIZE];
alignas(32) int32_t g_out0[OUT0_SIZE];
alignas(32) int32_t g_out0Ref[OUT0_SIZE];

int main(int argc, char *argv[]) {
// XXX Figure out how to use argv with xme_ca_udm_dbg -A
std::string dataDir(TO_STR(DATA_DIR));
srand(10);
std::generate(g_in0, g_in0 + IN0_SIZE,
[&]() { return random_integer<int16_t>(); });
std::generate(g_in1, g_in1 + IN1_SIZE,
[&]() { return random_integer<int32_t>(); });

writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt");
writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt");

chess_memory_fence();
auto cyclesBegin = chess_cycle_count();
dut(g_in0, g_in1, g_out0);
auto cyclesEnd = chess_cycle_count();
chess_memory_fence();

auto cycleCount = (int)(cyclesEnd - cyclesBegin);
reportCycleCount(cycleCount, dataDir + "/cycle_count.txt");

writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt");

dut_ref(g_in0, g_in1, g_out0Ref);
writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt");

bool ok = true;
ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE);

if (ok)
printf("TEST PASSED\n");
else
printf("TEST FAILED\n");

return ok ? 0 : 1;
}

// in0, in1, out0 are in C4 layout.
void dut_ref(int16_t *in0, int32_t *in1, int32_t *out0) {
for (unsigned k = 0; k < OUT0_SIZE; k += 1) {
out0[k] = in0[k] - in1[k];
}
}
4 changes: 4 additions & 0 deletions test/Integration/Dialect/TOSA/i32xi32_add_elem/defines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once
constexpr unsigned const IN0_SIZE = 1024;
constexpr unsigned const IN1_SIZE = 1024;
constexpr unsigned const OUT0_SIZE = 1024;
Loading

0 comments on commit f21d029

Please sign in to comment.