Skip to content

Commit

Permalink
Merge branch 'main' into optim-pass
Browse files Browse the repository at this point in the history
  • Loading branch information
effrey-liu committed Oct 30, 2024
2 parents bda1df8 + 86cd5af commit 7fce630
Show file tree
Hide file tree
Showing 5 changed files with 262 additions and 2 deletions.
3 changes: 2 additions & 1 deletion examples/BuddyLeNet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ $ cmake -G Ninja .. \
-DCMAKE_BUILD_TYPE=RELEASE \
-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
-DPython3_EXECUTABLE=$(which python3) \
-DBUDDY_MLIR_ENABLE_DIP_LIB=ON
-DBUDDY_MLIR_ENABLE_DIP_LIB=ON \
-DBUDDY_ENABLE_PNG=ON
$ ninja
$ ninja check-buddy
```
Expand Down
67 changes: 67 additions & 0 deletions examples/BuddyNext/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,39 @@ next-sigmoid-run:
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

next-eliminate-add-zero-run:
@${MLIR_OPT} ./next-eliminate-add-zero.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \
${MLIR_OPT} \
-arith-expand \
-eliminate-empty-tensors \
-empty-tensor-to-alloc-tensor \
-one-shot-bufferize \
-convert-linalg-to-affine-loops \
-affine-loop-fusion \
-lower-affine \
-func-bufferize \
-arith-bufferize \
-tensor-bufferize \
-buffer-deallocation \
-finalizing-bufferize \
-convert-vector-to-scf \
-expand-strided-metadata \
-convert-vector-to-llvm \
-memref-expand \
-arith-expand \
-convert-arith-to-llvm \
-finalize-memref-to-llvm \
-convert-scf-to-cf \
-convert-openmp-to-llvm \
-convert-arith-to-llvm \
-convert-math-to-llvm \
-convert-math-to-libm \
-convert-func-to-llvm \
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

next-rope-run:
@${MLIR_OPT} ./next-rope.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \
Expand Down Expand Up @@ -230,3 +263,37 @@ next-rope-run:
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}


next-eliminate-identity-run:
@${MLIR_OPT} ./next-eliminate-identity.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | \
${MLIR_OPT} \
-arith-expand \
-eliminate-empty-tensors \
-empty-tensor-to-alloc-tensor \
-one-shot-bufferize \
-convert-linalg-to-affine-loops \
-affine-loop-fusion \
-lower-affine \
-func-bufferize \
-arith-bufferize \
-tensor-bufferize \
-buffer-deallocation \
-finalizing-bufferize \
-convert-vector-to-scf \
-expand-strided-metadata \
-convert-vector-to-llvm \
-memref-expand \
-arith-expand \
-convert-arith-to-llvm \
-finalize-memref-to-llvm \
-convert-scf-to-cf \
-convert-openmp-to-llvm \
-convert-arith-to-llvm \
-convert-math-to-llvm \
-convert-math-to-libm \
-convert-func-to-llvm \
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
96 changes: 96 additions & 0 deletions examples/BuddyNext/next-eliminate-add-zero.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// RUN: buddy-opt %s \
// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \
// RUN: | buddy-opt \
// RUN: -arith-expand \
// RUN: -eliminate-empty-tensors \
// RUN: -empty-tensor-to-alloc-tensor \
// RUN: -one-shot-bufferize \
// RUN: -convert-linalg-to-affine-loops \
// RUN: -affine-loop-fusion \
// RUN: -lower-affine \
// RUN: -func-bufferize \
// RUN: -arith-bufferize \
// RUN: -tensor-bufferize \
// RUN: -buffer-deallocation \
// RUN: -finalizing-bufferize \
// RUN: -convert-vector-to-scf \
// RUN: -expand-strided-metadata \
// RUN: -convert-vector-to-llvm \
// RUN: -memref-expand \
// RUN: -arith-expand \
// RUN: -convert-arith-to-llvm \
// RUN: -finalize-memref-to-llvm \
// RUN: -convert-scf-to-cf \
// RUN: -convert-openmp-to-llvm \
// RUN: -convert-arith-to-llvm \
// RUN: -convert-math-to-llvm \
// RUN: -convert-math-to-libm \
// RUN: -convert-func-to-llvm \
// RUN: -reconcile-unrealized-casts \
// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
// RUN: | FileCheck %s
module {
func.func private @printMemrefF32(tensor<*xf32>)
func.func private @rtclock() -> f64

func.func @uvue_original() {
%t0_original = call @rtclock() : () -> f64

%84 = arith.constant dense<2.0> : tensor<1x32x40x128xf32>
%92 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x32x40x128xf32>}> : () -> tensor<1x32x40x128xf32>
%93 = tosa.add %84, %92 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32>
%94 = tosa.reshape %93 {new_shape = array<i64: 32, 40, 128>} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32>

%t1_original = call @rtclock() : () -> f64
%tensor_unranked = tensor.cast %94 : tensor<32x40x128xf32> to tensor<*xf32>

// All the elements of the MemRef are the same,
// only check the first line to verify the correctness.
// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [32, 40, 128] strides = [5120, 128, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [2{{(, 2)*}}],

// Print results.
call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
// Print timings.

%t_original = arith.subf %t1_original, %t0_original : f64
vector.print str "original operation time: "
vector.print %t_original : f64
return
}

func.func @uve_optimized() {
%t0_optimized = call @rtclock() : () -> f64

%84 = arith.constant dense<2.0> : tensor<1x32x40x128xf32>
%94 = tosa.reshape %84 {new_shape = array<i64: 32, 40, 128>} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32>
%t1_optimized = call @rtclock() : () -> f64

%tensor_unranked = tensor.cast %94 : tensor<32x40x128xf32> to tensor<*xf32>



// Print results.
call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
// Print timings.

%t_optimized = arith.subf %t1_optimized, %t0_optimized : f64
vector.print str "optimized operation time: "
vector.print %t_optimized : f64
return
}


func.func @main() {
%84 = arith.constant dense<2.0> : tensor<1x32x40x128xf32>

call @uvue_original() : () -> ()
call @uve_optimized() : () -> ()

return
}
}
96 changes: 96 additions & 0 deletions examples/BuddyNext/next-eliminate-identity.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// RUN: buddy-opt %s \
// RUN: -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" \
// RUN: | buddy-opt \
// RUN: -arith-expand \
// RUN: -eliminate-empty-tensors \
// RUN: -empty-tensor-to-alloc-tensor \
// RUN: -one-shot-bufferize \
// RUN: -convert-linalg-to-affine-loops \
// RUN: -affine-loop-fusion \
// RUN: -lower-affine \
// RUN: -func-bufferize \
// RUN: -arith-bufferize \
// RUN: -tensor-bufferize \
// RUN: -buffer-deallocation \
// RUN: -finalizing-bufferize \
// RUN: -convert-vector-to-scf \
// RUN: -expand-strided-metadata \
// RUN: -convert-vector-to-llvm \
// RUN: -memref-expand \
// RUN: -arith-expand \
// RUN: -convert-arith-to-llvm \
// RUN: -finalize-memref-to-llvm \
// RUN: -convert-scf-to-cf \
// RUN: -convert-openmp-to-llvm \
// RUN: -convert-arith-to-llvm \
// RUN: -convert-math-to-llvm \
// RUN: -convert-math-to-libm \
// RUN: -convert-func-to-llvm \
// RUN: -reconcile-unrealized-casts \
// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
// RUN: | FileCheck %s
module {
func.func private @printMemrefF32(tensor<*xf32>)
func.func private @rtclock() -> f64

func.func @ie_original() {
%t0_original = call @rtclock() : () -> f64

%119 = arith.constant dense<1.0> : tensor<1x40x32x128xf32>
%120 = tosa.identity %119 : (tensor<1x40x32x128xf32>) -> tensor<1x40x32x128xf32>
%121 = tosa.reshape %120 {new_shape = array<i64: 1, 40, 4096>} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32>
%t1_original = call @rtclock() : () -> f64

%tensor_unranked = tensor.cast %121 : tensor<1x40x4096xf32> to tensor<*xf32>
// All the elements of the MemRef are the same,
// only check the first line to verify the correctness.
// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [1{{(, 1)*}}],

// Print results.
call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
// Print timings.

%t_original = arith.subf %t1_original, %t0_original : f64
vector.print str "original operation time: "
vector.print %t_original : f64
return
}

func.func @ie_optimized() {
%t0_optimized = call @rtclock() : () -> f64

%119 = arith.constant dense<1.0> : tensor<1x40x32x128xf32>
%121 = tosa.reshape %119 {new_shape = array<i64: 1, 40, 4096>} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32>
%t1_optimized = call @rtclock() : () -> f64

%tensor_unranked = tensor.cast %121 : tensor<1x40x4096xf32> to tensor<*xf32>
// All the elements of the MemRef are the same,
// only check the first line to verify the correctness.
// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [1{{(, 1)*}}],

// Print results.
call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
// Print timings.

%t_optimized = arith.subf %t1_optimized, %t0_optimized : f64
vector.print str "optimized operation time: "
vector.print %t_optimized : f64
return
}

func.func @main() {

call @ie_original() : () -> ()
call @ie_optimized() : () -> ()

return
}
}
2 changes: 1 addition & 1 deletion tests/Interface/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ if(BUDDY_ENABLE_OPENCV)
include_directories(${OpenCV_INCLUDE_DIRS})
endif()

if(BUDDY_MLIR_ENABLE_DIP_LIB OR BUDDY_ENABLE_OPENCV)
if(BUDDY_MLIR_ENABLE_DIP_LIB AND BUDDY_ENABLE_OPENCV)
set(DIP_LIBS ${JPEG_LIBRARY} ${PNG_LIBRARY})
_add_test_executable(buddy-image-container-test
ImageContainerTest.cpp
Expand Down

0 comments on commit 7fce630

Please sign in to comment.