From c452afec474ca3cc0145e0d247b1eb36002f608c Mon Sep 17 00:00:00 2001
From: effrey-liu <2318266514@qq.com>
Date: Wed, 30 Oct 2024 17:39:14 +0800
Subject: [PATCH] add example to BuddyNext

---
 examples/BuddyNext/makefile                   | 36 ++++++++
 .../BuddyNext/next-const-add-simplify.mlir    | 92 +++++++++++++++++++
 2 files changed, 128 insertions(+)
 create mode 100644 examples/BuddyNext/next-const-add-simplify.mlir

diff --git a/examples/BuddyNext/makefile b/examples/BuddyNext/makefile
index ca326df793..d0dda9a9f5 100644
--- a/examples/BuddyNext/makefile
+++ b/examples/BuddyNext/makefile
@@ -297,3 +297,39 @@ next-eliminate-identity-run:
         -reconcile-unrealized-casts | \
     ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
         -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
+
+next-const-add-simplify-run:
+	@${MLIR_OPT} ./next-const-add-simplify.mlir \
+        -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \
+    ${MLIR_OPT} \
+        -arith-expand \
+        -eliminate-empty-tensors \
+        -empty-tensor-to-alloc-tensor \
+        -one-shot-bufferize \
+        -convert-linalg-to-affine-loops \
+        -affine-loop-fusion \
+        -lower-affine \
+        -func-bufferize \
+        -arith-bufferize \
+        -tensor-bufferize \
+        -buffer-deallocation \
+        -finalizing-bufferize \
+        -convert-vector-to-scf \
+        -expand-strided-metadata \
+        -convert-vector-to-llvm \
+        -memref-expand \
+        -arith-expand \
+        -convert-arith-to-llvm \
+        -finalize-memref-to-llvm \
+        -convert-scf-to-cf \
+        -convert-openmp-to-llvm \
+        -convert-arith-to-llvm \
+        -convert-math-to-llvm \
+        -convert-math-to-libm  \
+        -convert-func-to-llvm \
+        -reconcile-unrealized-casts | \
+    ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
+        -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
+
+
+
diff --git a/examples/BuddyNext/next-const-add-simplify.mlir b/examples/BuddyNext/next-const-add-simplify.mlir
new file mode 100644
index 0000000000..0e6d187f1f
--- /dev/null
+++ b/examples/BuddyNext/next-const-add-simplify.mlir
@@ -0,0 +1,92 @@
+// RUN: buddy-opt %s \
+// RUN:     -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \
+// RUN: | buddy-opt \
+// RUN:     -arith-expand \
+// RUN:     -eliminate-empty-tensors \
+// RUN:     -empty-tensor-to-alloc-tensor \
+// RUN:     -one-shot-bufferize \
+// RUN:     -convert-linalg-to-affine-loops \
+// RUN:     -affine-loop-fusion \
+// RUN:     -lower-affine \
+// RUN:     -func-bufferize \
+// RUN:     -arith-bufferize \
+// RUN:     -tensor-bufferize \
+// RUN:     -buffer-deallocation \
+// RUN:     -finalizing-bufferize \
+// RUN:     -convert-vector-to-scf \
+// RUN:     -expand-strided-metadata \
+// RUN:     -convert-vector-to-llvm \
+// RUN:     -memref-expand \
+// RUN:     -arith-expand \
+// RUN:     -convert-arith-to-llvm \
+// RUN:     -finalize-memref-to-llvm \
+// RUN:     -convert-scf-to-cf \
+// RUN:     -convert-openmp-to-llvm \
+// RUN:     -convert-arith-to-llvm \
+// RUN:     -convert-math-to-llvm \
+// RUN:     -convert-math-to-libm  \
+// RUN:     -convert-func-to-llvm \
+// RUN:     -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
+// RUN:     -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
+// RUN:     -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
+
+module {
+    func.func private @printMemrefF32(tensor<*xf32>)
+    func.func private @rtclock() -> f64
+
+    func.func @const_add_original() {
+        %t0_original = call @rtclock() : () -> f64 
+
+        %0 = "tosa.const"() <{value = dense<3.5> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32>
+        %1 = "tosa.const"() <{value = dense<3.5> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32>
+        %2 = tosa.add %0, %1 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32>
+        %3 = tosa.reshape %2 {new_shape = array<i64: 32, 40, 128>} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32>
+        
+        %t1_original = call @rtclock() : () -> f64
+        %tensor_unranked = tensor.cast %3 : tensor<32x40x128xf32> to tensor<*xf32>
+
+        // All the elements of the MemRef are the same,
+        // only check the first line to verify the correctness.
+        // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [32, 40, 128] strides = [5120, 128, 1] data = 
+        // CHECK-NEXT: [
+        // CHECK-SAME: [
+        // CHECK-SAME: [7{{(, 7)*}}],
+
+        // Print results.
+        call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
+        // Print timings.
+
+        %t_original = arith.subf %t1_original, %t0_original : f64
+        vector.print str "original operation time: "
+        vector.print %t_original : f64
+        return 
+    }
+
+    func.func @const_add_optimized() {
+        %t0_optimized = call @rtclock() : () -> f64
+
+        %0 = "tosa.const"() <{value = dense<7.000000e+00> : tensor<32x40x128xf32>}> : () -> tensor<32x40x128xf32>
+        %t1_optimized = call @rtclock() : () -> f64
+
+        %tensor_unranked = tensor.cast %0 : tensor<32x40x128xf32> to tensor<*xf32>
+
+        // Print results.
+        call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
+        // Print timings.
+
+        %t_optimized = arith.subf %t1_optimized, %t0_optimized : f64
+        vector.print str "optimized operation time: "
+        vector.print %t_optimized : f64
+        return 
+    }
+
+
+    func.func @main() {
+        call @const_add_original() : () -> ()
+        call @const_add_optimized() : () -> ()
+
+        return 
+    }
+}