From c452afec474ca3cc0145e0d247b1eb36002f608c Mon Sep 17 00:00:00 2001 From: effrey-liu <2318266514@qq.com> Date: Wed, 30 Oct 2024 17:39:14 +0800 Subject: [PATCH] add example to BuddyNext --- examples/BuddyNext/makefile | 36 ++++++++ .../BuddyNext/next-const-add-simplify.mlir | 92 +++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 examples/BuddyNext/next-const-add-simplify.mlir diff --git a/examples/BuddyNext/makefile b/examples/BuddyNext/makefile index ca326df793..d0dda9a9f5 100644 --- a/examples/BuddyNext/makefile +++ b/examples/BuddyNext/makefile @@ -297,3 +297,39 @@ next-eliminate-identity-run: -reconcile-unrealized-casts | \ ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + +next-const-add-simplify-run: + @${MLIR_OPT} ./next-const-add-simplify.mlir \ + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \ + ${MLIR_OPT} \ + -arith-expand \ + -eliminate-empty-tensors \ + -empty-tensor-to-alloc-tensor \ + -one-shot-bufferize \ + -convert-linalg-to-affine-loops \ + -affine-loop-fusion \ + -lower-affine \ + -func-bufferize \ + -arith-bufferize \ + -tensor-bufferize \ + -buffer-deallocation \ + -finalizing-bufferize \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-arith-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} + + + diff --git a/examples/BuddyNext/next-const-add-simplify.mlir b/examples/BuddyNext/next-const-add-simplify.mlir new file mode 100644 index 0000000000..0e6d187f1f --- /dev/null +++ b/examples/BuddyNext/next-const-add-simplify.mlir @@ -0,0 +1,92 @@ +// RUN: buddy-opt %s \ +// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \ +// RUN: | buddy-opt \ +// RUN: -arith-expand \ +// RUN: -eliminate-empty-tensors \ +// RUN: -empty-tensor-to-alloc-tensor \ +// RUN: -one-shot-bufferize \ +// RUN: -convert-linalg-to-affine-loops \ +// RUN: -affine-loop-fusion \ +// RUN: -lower-affine \ +// RUN: -func-bufferize \ +// RUN: -arith-bufferize \ +// RUN: -tensor-bufferize \ +// RUN: -buffer-deallocation \ +// RUN: -finalizing-bufferize \ +// RUN: -convert-vector-to-scf \ +// RUN: -expand-strided-metadata \ +// RUN: -convert-vector-to-llvm \ +// RUN: -memref-expand \ +// RUN: -arith-expand \ +// RUN: -convert-arith-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-openmp-to-llvm \ +// RUN: -convert-arith-to-llvm \ +// RUN: -convert-math-to-llvm \ +// RUN: -convert-math-to-libm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +module { + func.func private @printMemrefF32(tensor<*xf32>) + func.func private @rtclock() -> f64 + + func.func @const_add_original() { + %t0_original = call @rtclock() : () -> f64 + + %0 = "tosa.const"() <{value = dense<3.5> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %1 = "tosa.const"() <{value = dense<3.5> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32> + %2 = tosa.add %0, %1 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3 = tosa.reshape %2 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + + %t1_original = call @rtclock() : () -> f64 + %tensor_unranked = tensor.cast %3 : tensor<32x40x128xf32> to tensor<*xf32> + + // All the elements of the MemRef are the same, + // only check the first line to verify the correctness. + // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [32, 40, 128] strides = [5120, 128, 1] data = + // CHECK-NEXT: [ + // CHECK-SAME: [ + // CHECK-SAME: [7{{(, 7)*}}], + + // Print results. + call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> () + // Print timings. + + %t_original = arith.subf %t1_original, %t0_original : f64 + vector.print str "original operation time: " + vector.print %t_original : f64 + return + } + + func.func @const_add_optimized() { + %t0_optimized = call @rtclock() : () -> f64 + + %0 = "tosa.const"() <{value = dense<7.000000e+00> : tensor<32x40x128xf32>}> : () -> tensor<32x40x128xf32> + %t1_optimized = call @rtclock() : () -> f64 + + %tensor_unranked = tensor.cast %0 : tensor<32x40x128xf32> to tensor<*xf32> + + // Print results. + call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> () + // Print timings. + + %t_optimized = arith.subf %t1_optimized, %t0_optimized : f64 + vector.print str "optimized operation time: " + vector.print %t_optimized : f64 + return + } + + + func.func @main() { + call @const_add_original() : () -> () + call @const_add_optimized() : () -> () + + return + } +}