Skip to content

Commit

Permalink
The GPU OP-enabled version
Browse files Browse the repository at this point in the history
  • Loading branch information
WuXintong123 committed Oct 15, 2024
1 parent 59e4cd8 commit 82b92f8
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 28 deletions.
78 changes: 64 additions & 14 deletions examples/BuddyLeNet/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_custom_command(
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/buddy-lenet-import.py
COMMENT "Generating forward.mlir, subgraph1.mlir and parameter files"
)
Expand All @@ -17,43 +17,93 @@ add_custom_command(
COMMENT "Building forward.o"
VERBATIM)

# add_custom_command(
# OUTPUT subgraph0.ll
# COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
# --transform-preload-library -transform-library-paths="transform.mlir"
# --transform-interpreter -entry-point="codegen" |
# ${BUDDY_BINARY_DIR}/buddy-opt --pass-pipeline "builtin.module(func.func(nvgpu-optimize-shared-memory))" |
# ${BUDDY_BINARY_DIR}/buddy-opt
# -arith-expand
# -eliminate-empty-tensors
# -empty-tensor-to-alloc-tensor
# -linalg-bufferize
# -convert-linalg-to-affine-loops
# -affine-loop-fusion
# -affine-parallelize
# -lower-affine
# -canonicalize
# -func-bufferize
# -arith-bufferize
# -tensor-bufferize
# -buffer-deallocation
# -finalizing-bufferize
# -canonicalize
# -gpu-launch-sink-index-computations
# -canonicalize
# -legalize-shmem-outlining
# -canonicalize
# -convert-memcpy-to-gpu
# -gpu-async-region
# -canonicalize
# -convert-scf-to-cf
# -memref-expand
# -finalize-memref-to-llvm
# -convert-arith-to-llvm
# -convert-vector-to-llvm
# -convert-gpu-to-nvvm
# -has-redux=1
# -llvm-request-c-wrappers
# -canonicalize
# -cse
# -sccp |
# ${LLVM_TOOLS_BINARY_DIR}/mlir-opt
# --test-lower-to-nvvm -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
# DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
# COMMENT "Building subgraph0.ll"
# VERBATIM
# )

add_custom_command(
OUTPUT subgraph0.ll
COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm -func-bufferize-dynamic-offset -tensor-bufferize -buffer-deallocation -finalizing-bufferize -expand-strided-metadata -one-shot-bufferize |
${LLVM_MLIR_BINARY_DIR}/mlir-opt
${LLVM_TOOLS_BINARY_DIR}/mlir-opt
-pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3}, gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
COMMENT "Building subgraph0.ll"
VERBATIM)

add_custom_command(
OUTPUT subgraph0.o
COMMAND ${LLVM_MLIR_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
DEPENDS ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
COMMENT "Building subgraph0.o"
VERBATIM)




# add_custom_command(
# OUTPUT subgraph1.ll
# COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm -func-bufferize-dynamic-offset -buffer-deallocation -finalizing-bufferize -expand-strided-metadata -one-shot-bufferize |
# ${LLVM_MLIR_BINARY_DIR}/mlir-opt
# ${LLVM_TOOLS_BINARY_DIR}/mlir-opt
# -pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3}, gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
# ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
# ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
# DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
# COMMENT "Building subgraph1.ll"
# VERBATIM)

# add_custom_command(
# OUTPUT subgraph1.o
# COMMAND ${LLVM_MLIR_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
# COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
# DEPENDS ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
# COMMENT "Building subgraph1.o"
# VERBATIM)

add_custom_command(
OUTPUT subgraph1.o
COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
${BUDDY_BINARY_DIR}/buddy-opt
-eliminate-empty-tensors
Expand All @@ -77,11 +127,11 @@ add_custom_command(
-convert-arith-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llvm-as |
${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
COMMENT "Building subgraph0.o"
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llvm-as |
${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
COMMENT "Building subgraph1.o"
VERBATIM)

add_library(LENET STATIC subgraph0.o subgraph1.o forward.o)
Expand All @@ -93,5 +143,5 @@ target_link_directories(buddy-lenet-run PRIVATE ${LLVM_LIBRARY_DIR})

set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils ${PNG_LIBRARIES})

set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils ${OpenCV_LIBS})
set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils mlir_cuda_runtime BuddyLibDIP ${PNG_LIBRARIES})
target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS})
14 changes: 6 additions & 8 deletions examples/BuddyLeNet/buddy-lenet-import.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,19 @@

with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "subgraph1.mlir"), "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "subgraph1.mlir"), "w") as module_file:
print(driver.subgraphs[1]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
print(driver.construct_main_graph(True), file=module_file)

# params = dynamo_compiler.imported_params[graph]
# current_path = os.path.dirname(os.path.abspath(__file__))
params = dynamo_compiler.imported_params[graph]
current_path = os.path.dirname(os.path.abspath(__file__))

# float32_param = np.concatenate(
# [param.detach().numpy().reshape([-1]) for param in params]
# )
float32_param = np.concatenate(
[param.detach().numpy().reshape([-1]) for param in params]
)

# float32_param.tofile(Path(current_path) / "arg0.data")
float32_param.tofile(Path(current_path) / "arg0.data")

# # Convert the lenet graph to JSON string
# json_str = graph.to_json()
Expand Down
17 changes: 17 additions & 0 deletions examples/BuddyLeNet/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@ MLIR_ASYNC_RUNTIME := ${LLVM_BUILD_DIR}/lib/libmlir_async_runtime.dylib
MTRIPLE := x86_64-apple-darwin
endif

buddy-gpu-matmul-lower:
@${BUDDY_OPT} subgraph0.mlir \
-transform-preload-library="transform-library-paths=transform.mlir" \
-transform-interpreter="entry-point=codegen" \
-o log.mlir

buddy-gpu-matmul:
@${BUDDY_OPT} subgraph0.mlir -transform-preload-library="transform-library-paths=transform.mlir" -transform-interpreter="entry-point=codegen" | \
${BUDDY_OPT} --pass-pipeline='builtin.module(func.func(nvgpu-optimize-shared-memory))' | \
${BUDDY_OPT} -arith-expand -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -linalg-bufferize -convert-linalg-to-affine-loops -affine-loop-fusion -affine-parallelize -lower-affine -canonicalize -func-bufferize -arith-bufferize -tensor-bufferize -buffer-deallocation -finalizing-bufferize -canonicalize | \
${BUDDY_OPT} -gpu-launch-sink-index-computations -canonicalize -legalize-shmem-outlining -canonicalize | \
${BUDDY_OPT} -convert-memcpy-to-gpu -gpu-async-region -canonicalize | \
${BUDDY_OPT} -convert-scf-to-cf -memref-expand -finalize-memref-to-llvm -convert-arith-to-llvm --convert-vector-to-llvm -convert-gpu-to-nvvm='has-redux=1' | \
${BUDDY_OPT} -llvm-request-c-wrappers -canonicalize -cse -sccp | \
${MLIR_OPT} --test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=fatbin" -o matmul-cubin.mlir

buddy-lenet-lower:
@${BUDDY_OPT} ./fake-lenet.mlir \
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" | \
Expand Down Expand Up @@ -124,3 +140,4 @@ buddy-lenet-opt-run:
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

3 changes: 3 additions & 0 deletions examples/BuddyLeNet/matmul-cubin.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module {
}

Loading

0 comments on commit 82b92f8

Please sign in to comment.