buddy-compiler · WuXintong123 · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024 · Jun 20, 2024
diff --git a/.gitignore b/.gitignore
@@ -13,5 +13,5 @@
 # Clangd cache
 .cache
 
-# Clangd configurations
-.clangd
+# environment bash
+env.sh
diff --git a/examples/BuddyLeNet/.gitignore b/examples/BuddyLeNet/.gitignore
@@ -3,8 +3,11 @@ log.ll
 log.s
 data
 *.data
+*.json
+*.dot
 __pycache__
 *.pth
 lenet.mlir
 forward.mlir
 subgraph0.mlir
+subgraph1.mlir
diff --git a/examples/BuddyLeNet/CMakeLists.txt b/examples/BuddyLeNet/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_custom_command(
-  OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
+  OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
   COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/buddy-lenet-import.py
-  COMMENT "Generating forward.mlir, subgraph0.mlir and parameter files"
+  COMMENT "Generating forward.mlir, subgraph1.mlir and parameter files"
 )
 
 add_custom_command(
@@ -17,15 +17,48 @@ add_custom_command(
   COMMENT "Building forward.o"
   VERBATIM)
 
+add_custom_command(
+  OUTPUT subgraph0.ll
+  COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm -func-bufferize-dynamic-offset -tensor-bufferize -buffer-deallocation -finalizing-bufferize -expand-strided-metadata  -one-shot-bufferize |
+          ${LLVM_TOOLS_BINARY_DIR}/mlir-opt 
+            -pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3},  gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
+          ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
+  DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
+  COMMENT "Building subgraph0.ll"
+  VERBATIM)
+
 add_custom_command(
   OUTPUT subgraph0.o
-  COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir 
+  COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
+  DEPENDS ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
+  COMMENT "Building subgraph0.o"
+  VERBATIM)
+
+# add_custom_command(
+#   OUTPUT subgraph1.ll
+#   COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm -func-bufferize-dynamic-offset -buffer-deallocation -finalizing-bufferize -expand-strided-metadata  -one-shot-bufferize |
+#           ${LLVM_TOOLS_BINARY_DIR}/mlir-opt 
+#             -pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3},  gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
+#           ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
+#   DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
+#   COMMENT "Building subgraph1.ll"
+#   VERBATIM)
+
+# add_custom_command(
+#   OUTPUT subgraph1.o
+#   COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
+#   DEPENDS ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
+#   COMMENT "Building subgraph1.o"
+#   VERBATIM)
+
+add_custom_command(
+  OUTPUT subgraph1.o
+  COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir 
             -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
           ${BUDDY_BINARY_DIR}/buddy-opt
             -eliminate-empty-tensors
-            -convert-tensor-to-linalg
+            -convert-tensor-to-linalg 
             -linalg-bufferize
-            -batchmatmul-optimize
             -convert-linalg-to-affine-loops
             -lower-affine
             -func-bufferize-dynamic-offset
@@ -45,18 +78,18 @@ add_custom_command(
             -reconcile-unrealized-casts | 
           ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
           ${LLVM_TOOLS_BINARY_DIR}/llvm-as |
-          ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj  -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
-  DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
-  COMMENT "Building subgraph0.o"
+          ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj  -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
+  DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
+  COMMENT "Building subgraph1.o"
   VERBATIM)
 
-add_library(LENET STATIC subgraph0.o forward.o)
+add_library(LENET STATIC subgraph0.o subgraph1.o forward.o)
 
 SET_TARGET_PROPERTIES(LENET PROPERTIES LINKER_LANGUAGE C)
 
 add_executable(buddy-lenet-run buddy-lenet-main.cpp)
-target_link_directories(buddy-lenet-run PRIVATE ${LLVM_LIBRARY_DIR})
+target_link_directories(buddy-lenet-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR})
 
 set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils ${PNG_LIBRARIES})
 
-target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS})
+target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS})
diff --git a/examples/BuddyLeNet/buddy-lenet-import.py b/examples/BuddyLeNet/buddy-lenet-import.py
@@ -28,7 +28,9 @@
 from buddy.compiler.frontend import DynamoCompiler
 from buddy.compiler.graph import GraphDriver
 from buddy.compiler.graph.transform import simply_fuse
-from buddy.compiler.ops import tosa
+from buddy.compiler.graph.type import DeviceType
+from buddy.compiler.ops import tosa, gpu
+from buddy.compiler.graph.json_decoder import json_to_graph
 from model import LeNet
 
 # Retrieve the LeNet model path from environment variables.
@@ -57,20 +59,48 @@
 graph = graphs[0]
 params = dynamo_compiler.imported_params[graph]
 pattern_list = [simply_fuse]
-graphs[0].fuse_ops(pattern_list)
-driver = GraphDriver(graphs[0])
-driver.subgraphs[0].lower_to_top_level_ir()
+graph.fuse_ops(pattern_list)
 path_prefix = os.path.dirname(os.path.abspath(__file__))
+
+# Convert the lenet graph to JSON string
+json_str = graph.to_json()
+with open(os.path.join(path_prefix, "lenet.json"), "w") as module_file:
+    module_file.write(json_str)
+
+# Convert the lenet graph Json string to a lenet graph
+graph0 = json_to_graph(json_str)
+driver = GraphDriver(graph0)
+driver.subgraphs[0].lower_to_top_level_ir()
+driver.subgraphs[1].lower_to_top_level_ir()
+
 with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
     print(driver.subgraphs[0]._imported_module, file=module_file)
+with open(os.path.join(path_prefix, "subgraph1.mlir"), "w") as module_file:
+    print(driver.subgraphs[1]._imported_module, file=module_file)
 with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
     print(driver.construct_main_graph(True), file=module_file)
 
-params = dynamo_compiler.imported_params[graph]
-current_path = os.path.dirname(os.path.abspath(__file__))
+# params = dynamo_compiler.imported_params[graph]
+# current_path = os.path.dirname(os.path.abspath(__file__))
 
-float32_param = np.concatenate(
-    [param.detach().numpy().reshape([-1]) for param in params]
-)
+# float32_param = np.concatenate(
+#     [param.detach().numpy().reshape([-1]) for param in params]
+# )
+
+# float32_param.tofile(Path(current_path) / "arg0.data")
+
+# # Convert the lenet graph to JSON string
+# json_str = graph.to_json()
+# with open(os.path.join(path_prefix, "lenet.json"), "w") as module_file:
+#     module_file.write(json_str)
+
+# # Convert the lenet graph Json string to a lenet graph
+# graph0 = json_to_graph(json_str)
+# graph0.lower_to_top_level_ir()
+# with open(os.path.join(path_prefix, "lenet.mlir"), "w") as module_file:
+#     print(graph0._imported_module, file=module_file)
 
-float32_param.tofile(Path(current_path) / "arg0.data")
+# # Convert the lenet graph to DOT string
+# dot_str = graph.to_dot()
+# with open(os.path.join(path_prefix, "graph.dot"), "w") as module_file:
+#     module_file.write(dot_str)
diff --git a/examples/BuddyTest/.gitignore b/examples/BuddyTest/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+*.mlir
+log.ll
diff --git a/examples/BuddyTest/CMakeLists.txt b/examples/BuddyTest/CMakeLists.txt
@@ -0,0 +1,29 @@
+add_custom_command(
+  OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyTest/forward.mlir
+  COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyTest/import-test.py
+  COMMENT "Generating forward.mlir"
+)
+
+
+add_custom_command(
+  OUTPUT forward.o
+  COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyTest/forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm |
+          ${LLVM_MLIR_BINARY_DIR}/mlir-opt 
+            -pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3},  gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
+          ${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
+          ${LLVM_MLIR_BINARY_DIR}/llvm-as |
+          ${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj  -relocation-model=pic -O3 -o ${BUDDY_BINARY_DIR}/../examples/BuddyTest/forward.o
+  DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyTest/forward.mlir
+  COMMENT "Building forward.o"
+  VERBATIM)
+
+
+add_library(TEST STATIC forward.o)
+
+SET_TARGET_PROPERTIES(TEST PROPERTIES LINKER_LANGUAGE C)
+
+add_executable(buddy-test-run test-main.cpp)
+target_link_directories(buddy-test-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR})
+
+set(BUDDY_TEST_LIBS TEST mlir_runner_utils mlir_cuda_runtime)
+target_link_libraries(buddy-test-run ${BUDDY_TEST_LIBS})
diff --git a/examples/BuddyTest/README.md b/examples/BuddyTest/README.md
@@ -0,0 +1,65 @@
+# Buddy Compiler Test Example
+
+0. Activate your python environment.
+
+1. Build LLVM/MLIR
+
+```bash
+$ cd buddy-mlir
+$ mkdir llvm/build
+$ cd llvm/build
+$ cmake -G Ninja ../llvm \
+    -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \
+    -DLLVM_TARGETS_TO_BUILD="host;NVPTX" \
+    -DMLIR_ENABLE_CUDA_RUNNER=ON \
+    -DLLVM_ENABLE_ASSERTIONS=ON \
+    -DOPENMP_ENABLE_LIBOMPTARGET=OFF \
+    -DCMAKE_BUILD_TYPE=RELEASE \
+    -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
+    -DPython3_EXECUTABLE=$(which python3)
+$ ninja check-clang check-mlir omp
+```
+
+2. Build buddy-mlir
+
+```bash
+$ mkdir build && cd build
+$ cmake -G Ninja .. \
+    -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
+    -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
+    -DLLVM_ENABLE_ASSERTIONS=ON \
+    -DCMAKE_BUILD_TYPE=RELEASE \
+    -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
+    -DPython3_EXECUTABLE=$(which python3) 
+$ ninja
+$ ninja check-buddy
+```
+
+3. Set the `PYTHONPATH` environment variable.
+
+Make sure you are in the build directory.
+
+```bash
+$ export BUDDY_MLIR_BUILD_DIR=$PWD
+$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
+$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
+```
+
+4. Build and run the Test example
+
+```bash
+$ cmake -G Ninja .. -DBUDDY_TEST_EXAMPLES=ON
+$ ninja buddy-test-run
+$ cd bin
+$ ./buddy-test-run
+```
+
+## Debug the Lowering Pass Pipeline with Fake Parameters.
+
+```bash
+$ cd buddy-mlir
+$ cd examples/BuddyTest
+$ make gpu-test-lower
+$ make gpu-test-translate
+$ make gpu-test-run
+```
diff --git a/examples/BuddyTest/import-test.py b/examples/BuddyTest/import-test.py
@@ -0,0 +1,55 @@
+# ===- buddy-lenet-import.py ---------------------------------------------------
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===---------------------------------------------------------------------------
+#
+# This is the Test model AOT importer.
+#
+# ===---------------------------------------------------------------------------
+
+import os
+from pathlib import Path
+
+import numpy as np
+import torch
+from torch._inductor.decomposition import decompositions as inductor_decomp
+
+from buddy.compiler.frontend import DynamoCompiler
+from buddy.compiler.graph import GraphDriver
+from buddy.compiler.graph.transform import simply_fuse
+from buddy.compiler.ops.gpu import ops_registry as gpu_ops_registry
+from model import TestModule
+
+model = TestModule()
+model = model.eval()
+
+# Initialize Dynamo Compiler with specific configurations as an importer.
+dynamo_compiler = DynamoCompiler(
+    primary_registry=gpu_ops_registry,
+    aot_autograd_decomposition=inductor_decomp,
+)
+
+data = torch.randn([1, 1, 12, 10])
+# Import the model into MLIR module and parameters.
+with torch.no_grad():
+    graphs = dynamo_compiler.importer(model, data)
+
+assert len(graphs) == 1
+graph = graphs[0]
+print(graph.body)
+graph.lower_to_top_level_ir()
+path_prefix = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
+    print(graph._imported_module, file=module_file)
+
diff --git a/examples/BuddyTest/makefile b/examples/BuddyTest/makefile
@@ -0,0 +1,56 @@
+#!/bin/bash
+BUDDY_OPT := ../../build/bin/buddy-opt
+MLIR_OPT := ../../llvm/build/bin/mlir-opt
+MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate
+MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner
+LLC := ../../llvm/build/bin/llc
+OPT_FLAG := -O0
+
+ifeq ($(shell uname),Linux)
+MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so
+MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so
+MLIR_ASYNC_RUNTIME := ../../llvm/build/lib/libmlir_async_runtime.so
+MLIR_CUDA_RUNTIME := ../../llvm/build/lib/libmlir_cuda_runtime.so
+MTRIPLE := x86_64-unknown-linux-gnu
+else ifeq ($(shell uname),Darwin)
+MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib
+MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib
+MLIR_ASYNC_RUNTIME := ./../llvm/build/lib/libmlir_async_runtime.dylib
+MTRIPLE := x86_64-apple-darwin
+endif
+
+gpu-test-lower:
+	@${MLIR_OPT} forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
+	${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
+	gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
+	${MLIR_OPT} -o log.mlir
+
+gpu-test-translate:
+	@${MLIR_OPT} forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
+	${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
+	gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
+	${MLIR_TRANSLATE} -mlir-to-llvmir -o log.ll
+
+gpu-test-run:
+	@${MLIR_OPT} forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
+	${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
+	gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
+	${MLIR_CPU_RUNNER} -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_CUDA_RUNTIME} 
+
+gpu-conv2d-lower:
+	@${MLIR_OPT} conv2d.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
+	${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
+	gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
+	${MLIR_OPT} -o log.mlir
+
+gpu-conv2d-translate:
+	@${MLIR_OPT} conv2d.mlir -gpu-kernel-outlining -llvm-request-c-wrappers | \
+	${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
+	gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, gpu-module-to-binary)" | \
+	${MLIR_TRANSLATE} -mlir-to-llvmir -o log.ll
+
+gpu-conv2d-run:
+	@${MLIR_OPT} conv2d.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
+	${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
+	gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
+	${MLIR_CPU_RUNNER} -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_CUDA_RUNTIME}