diff --git a/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir b/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir index 58c9142398..ca132cb863 100644 --- a/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir +++ b/examples/BuddyMatmul/linalg-batchmatmul-f32.mlir @@ -18,11 +18,24 @@ // RUN: | FileCheck %s func.func private @printMemrefF32(memref<*xf32>) +func.func private @rtclock() -> f64 func.func @batch_matmul(%arg0: memref, %arg1: memref, %arg2: memref) { + %t_start = call @rtclock() : () -> f64 + linalg.batch_matmul ins(%arg0, %arg1 : memref, memref) outs(%arg2 : memref) + + %t_end = call @rtclock() : () -> f64 + %time = arith.subf %t_end, %t_start : f64 + + %printed_output = memref.cast %arg2 : memref to memref<*xf32> + call @printMemrefF32(%printed_output) : (memref<*xf32>) -> () + + // Print timings. + vector.print %time : f64 + return } @@ -54,29 +67,21 @@ func.func @main(){ %m1 = call @alloc_f32(%c1, %c576, %c1024, %f3) : (index, index, index, f32) -> memref %m2 = call @alloc_f32(%c1, %c1, %c1024, %f0) : (index, index, index, f32) -> memref - call @batch_matmul(%m0, %m1, %m2) : (memref, memref, memref) -> () - - %printed_m2 = memref.cast %m2 : memref to memref<*xf32> - // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 1, 1024] strides = [1024, 1024, 1] data = // CHECK-NEXT: [ // CHECK: [ // CHECK: [3456{{(, 3456)*}}] - call @printMemrefF32(%printed_m2) : (memref<*xf32>) -> () + call @batch_matmul(%m0, %m1, %m2) : (memref, memref, memref) -> () %m3 = call @alloc_f32(%c1, %c1, %c1024, %f2) : (index, index, index, f32) -> memref %m4 = call @alloc_f32(%c1, %c1024, %c1000, %f3) : (index, index, index, f32) -> memref %m5 = call @alloc_f32(%c1, %c1, %c1000, %f0) : (index, index, index, f32) -> memref - call @batch_matmul(%m3, %m4, %m5) : (memref, memref, memref) -> () - - %printed_m5 = memref.cast %m5 : memref to memref<*xf32> - // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 1, 1000] strides = [1000, 1000, 1] data = // CHECK-NEXT: [ // CHECK: [ // CHECK: [6144{{(, 6144)*}}] - call @printMemrefF32(%printed_m5) : (memref<*xf32>) -> () + call @batch_matmul(%m3, %m4, %m5) : (memref, memref, memref) -> () return } diff --git a/examples/BuddyMatmul/makefile b/examples/BuddyMatmul/makefile index 0940d608da..1242f22467 100644 --- a/examples/BuddyMatmul/makefile +++ b/examples/BuddyMatmul/makefile @@ -11,6 +11,7 @@ OPT_FLAG := -O0 ifeq ($(shell uname),Linux) MLIR_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_runner_utils.so MLIR_C_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_c_runner_utils.so +LIB_OMP := ${LLVM_BUILD_DIR}/lib/libomp.so MTRIPLE := x86_64-unknown-linux-gnu else ifeq ($(shell uname),Darwin) MLIR_RUNNER_UTILS := ${LLVM_BUILD_DIR}/lib/libmlir_runner_utils.dylib @@ -36,6 +37,52 @@ linalg-batchmatmul-f32-run: ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} +linalg-batchmatmul-f32-omp-lower: + @${BUDDY_OPT} ./linalg-batchmatmul-f32.mlir \ + -batchmatmul-optimize \ + -convert-linalg-to-affine-loops \ + -affine-parallelize \ + -lower-affine \ + -convert-scf-to-openmp \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts \ + -o log.mlir + +linalg-batchmatmul-f32-omp-run: + @${BUDDY_OPT} ./linalg-batchmatmul-f32.mlir \ + -batchmatmul-optimize \ + -convert-linalg-to-affine-loops \ + -affine-parallelize \ + -lower-affine \ + -convert-scf-to-openmp \ + -convert-vector-to-scf \ + -expand-strided-metadata \ + -convert-vector-to-llvm \ + -memref-expand \ + -arith-expand \ + -convert-arith-to-llvm \ + -finalize-memref-to-llvm \ + -convert-scf-to-cf \ + -convert-openmp-to-llvm \ + -convert-math-to-llvm \ + -convert-math-to-libm \ + -convert-func-to-llvm \ + -reconcile-unrealized-casts | \ + ${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \ + -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS} \ + -shared-libs=${LIB_OMP} + linalg-matmul-transpose-b-f32-run: @${BUDDY_OPT} ./linalg-transposematmulb-f32.mlir\ -matmul-transpose-b-vectorization \