Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Graph]Implementation and Testing of Graph-Level Compilation Infrastructure for Heterogeneous Hardware Partitioning #392

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cbbba15
temp
wdjyd Jun 19, 2024
0858f29
fix/maxpool2d_simplify
wdjyd Jun 19, 2024
f2fd572
fix/maxpool2d_simplify
wdjyd Jun 19, 2024
b2c4c29
add json_encoder and json_decoder
wdjyd Jun 20, 2024
d09dd73
add json_encoder and json_decoder
wdjyd Jun 20, 2024
4e77924
add gpu.container_module
wdjyd Jul 31, 2024
43b3624
[frontend] Add GPU MLIR lowering path with ReLU operation support
wdjyd Aug 12, 2024
2d4eef1
delete env.sh
wdjyd Aug 12, 2024
78f6bca
delete env.sh
wdjyd Aug 12, 2024
abce382
[BuddyTest] Add Test Model E2E example.
wdjyd Aug 16, 2024
3d00fe6
[BuddyTest] Add README.
wdjyd Aug 16, 2024
ae794aa
[BuddyTest] Add README.
wdjyd Aug 16, 2024
b57103c
[frontend] Add GPU MLIR lowering path with Conv2d operation support
wdjyd Aug 30, 2024
0adf1df
[frontend] Add GPU MLIR lowering path with MaxPool2d operation support
wdjyd Sep 2, 2024
f636341
[frontend] Fix Permute Op
wdjyd Sep 3, 2024
72cdc82
[frontend] Fix implementation error in permute and conv_2d operation
wdjyd Sep 10, 2024
cf703c7
[frontend] Add LeNet example for E2E execution in GPU device
wdjyd Sep 18, 2024
9a88cb2
[frontend] Add the custom subgraph partitioning interface
wdjyd Sep 21, 2024
2f91175
[frontend] Fix error in graph partitioning interface
wdjyd Sep 22, 2024
20be444
Merge remote-tracking branch 'origin/fix/maxpool2d_simplify' into wafer
wdjyd Sep 26, 2024
3e88a45
[frontend] Add JSON format interface for subgraph partitioning implem…
wdjyd Sep 26, 2024
29745ef
[frontend] Add JSON format interface for subgraph partitioning implem…
wdjyd Sep 26, 2024
fd814f8
device_type
WuXintong123 Sep 29, 2024
488b3b0
device_type
WuXintong123 Sep 29, 2024
c92c261
Merge remote-tracking branch 'wdjyd/wafer' into heterogeneous
WuXintong123 Oct 13, 2024
81a35fb
Add mimalloc library
WuXintong123 Oct 13, 2024
26cdc21
correct
WuXintong123 Oct 13, 2024
97ff064
Merge remote-tracking branch 'upstream/main' into heterogeneous
WuXintong123 Oct 13, 2024
ec4fa8f
Merge branch 'buddy-compiler:main' into heterogeneous
WuXintong123 Oct 14, 2024
29fd975
Merge remote-tracking branch 'upstream/main' into heterogeneous
WuXintong123 Oct 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
# Clangd cache
.cache

# Clangd configurations
.clangd
# environment bash
env.sh
3 changes: 3 additions & 0 deletions examples/BuddyLeNet/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ log.ll
log.s
data
*.data
*.json
*.dot
__pycache__
*.pth
lenet.mlir
forward.mlir
subgraph0.mlir
subgraph1.mlir
55 changes: 44 additions & 11 deletions examples/BuddyLeNet/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
add_custom_command(
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/forward.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/arg0.data
COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/buddy-lenet-import.py
COMMENT "Generating forward.mlir, subgraph0.mlir and parameter files"
COMMENT "Generating forward.mlir, subgraph1.mlir and parameter files"
)

add_custom_command(
Expand All @@ -17,15 +17,48 @@ add_custom_command(
COMMENT "Building forward.o"
VERBATIM)

add_custom_command(
OUTPUT subgraph0.ll
COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm -func-bufferize-dynamic-offset -tensor-bufferize -buffer-deallocation -finalizing-bufferize -expand-strided-metadata -one-shot-bufferize |
${LLVM_TOOLS_BINARY_DIR}/mlir-opt
-pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3}, gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
COMMENT "Building subgraph0.ll"
VERBATIM)

add_custom_command(
OUTPUT subgraph0.o
COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
DEPENDS ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.ll
COMMENT "Building subgraph0.o"
VERBATIM)

# add_custom_command(
# OUTPUT subgraph1.ll
# COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm -func-bufferize-dynamic-offset -buffer-deallocation -finalizing-bufferize -expand-strided-metadata -one-shot-bufferize |
# ${LLVM_TOOLS_BINARY_DIR}/mlir-opt
# -pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3}, gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
# ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
# DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
# COMMENT "Building subgraph1.ll"
# VERBATIM)

# add_custom_command(
# OUTPUT subgraph1.o
# COMMAND ${LLVM_TOOLS_BINARY_DIR}/clang++ ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll -L/usr/local/cuda/lib64 -lcudart -O3 -c -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
# DEPENDS ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.ll
# COMMENT "Building subgraph1.o"
# VERBATIM)

add_custom_command(
OUTPUT subgraph1.o
COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
${BUDDY_BINARY_DIR}/buddy-opt
-eliminate-empty-tensors
-convert-tensor-to-linalg
-convert-tensor-to-linalg
-linalg-bufferize
-batchmatmul-optimize
-convert-linalg-to-affine-loops
-lower-affine
-func-bufferize-dynamic-offset
Expand All @@ -45,18 +78,18 @@ add_custom_command(
-reconcile-unrealized-casts |
${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_TOOLS_BINARY_DIR}/llvm-as |
${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph0.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph0.mlir
COMMENT "Building subgraph0.o"
${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O0 -o ${BUDDY_BINARY_DIR}/../examples/BuddyLeNet/subgraph1.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyLeNet/subgraph1.mlir
COMMENT "Building subgraph1.o"
VERBATIM)

add_library(LENET STATIC subgraph0.o forward.o)
add_library(LENET STATIC subgraph0.o subgraph1.o forward.o)

SET_TARGET_PROPERTIES(LENET PROPERTIES LINKER_LANGUAGE C)

add_executable(buddy-lenet-run buddy-lenet-main.cpp)
target_link_directories(buddy-lenet-run PRIVATE ${LLVM_LIBRARY_DIR})
target_link_directories(buddy-lenet-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR})

set(BUDDY_LENET_LIBS LENET mlir_c_runner_utils ${PNG_LIBRARIES})

target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS})
target_link_libraries(buddy-lenet-run ${BUDDY_LENET_LIBS})
50 changes: 40 additions & 10 deletions examples/BuddyLeNet/buddy-lenet-import.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
from buddy.compiler.frontend import DynamoCompiler
from buddy.compiler.graph import GraphDriver
from buddy.compiler.graph.transform import simply_fuse
from buddy.compiler.ops import tosa
from buddy.compiler.graph.type import DeviceType
from buddy.compiler.ops import tosa, gpu
from buddy.compiler.graph.json_decoder import json_to_graph
from model import LeNet

# Retrieve the LeNet model path from environment variables.
Expand Down Expand Up @@ -57,20 +59,48 @@
graph = graphs[0]
params = dynamo_compiler.imported_params[graph]
pattern_list = [simply_fuse]
graphs[0].fuse_ops(pattern_list)
driver = GraphDriver(graphs[0])
driver.subgraphs[0].lower_to_top_level_ir()
graph.fuse_ops(pattern_list)
path_prefix = os.path.dirname(os.path.abspath(__file__))

# Convert the lenet graph to JSON string
json_str = graph.to_json()
with open(os.path.join(path_prefix, "lenet.json"), "w") as module_file:
module_file.write(json_str)

# Convert the lenet graph Json string to a lenet graph
graph0 = json_to_graph(json_str)
driver = GraphDriver(graph0)
driver.subgraphs[0].lower_to_top_level_ir()
driver.subgraphs[1].lower_to_top_level_ir()

with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "subgraph1.mlir"), "w") as module_file:
print(driver.subgraphs[1]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
print(driver.construct_main_graph(True), file=module_file)

params = dynamo_compiler.imported_params[graph]
current_path = os.path.dirname(os.path.abspath(__file__))
# params = dynamo_compiler.imported_params[graph]
# current_path = os.path.dirname(os.path.abspath(__file__))

float32_param = np.concatenate(
[param.detach().numpy().reshape([-1]) for param in params]
)
# float32_param = np.concatenate(
# [param.detach().numpy().reshape([-1]) for param in params]
# )

# float32_param.tofile(Path(current_path) / "arg0.data")

# # Convert the lenet graph to JSON string
# json_str = graph.to_json()
# with open(os.path.join(path_prefix, "lenet.json"), "w") as module_file:
# module_file.write(json_str)

# # Convert the lenet graph Json string to a lenet graph
# graph0 = json_to_graph(json_str)
# graph0.lower_to_top_level_ir()
# with open(os.path.join(path_prefix, "lenet.mlir"), "w") as module_file:
# print(graph0._imported_module, file=module_file)

float32_param.tofile(Path(current_path) / "arg0.data")
# # Convert the lenet graph to DOT string
# dot_str = graph.to_dot()
# with open(os.path.join(path_prefix, "graph.dot"), "w") as module_file:
# module_file.write(dot_str)
3 changes: 3 additions & 0 deletions examples/BuddyTest/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__
*.mlir
log.ll
29 changes: 29 additions & 0 deletions examples/BuddyTest/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
add_custom_command(
OUTPUT ${BUDDY_EXAMPLES_DIR}/BuddyTest/forward.mlir
COMMAND python3 ${BUDDY_EXAMPLES_DIR}/BuddyTest/import-test.py
COMMENT "Generating forward.mlir"
)


add_custom_command(
OUTPUT forward.o
COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/BuddyTest/forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm |
${LLVM_MLIR_BINARY_DIR}/mlir-opt
-pass-pipeline "builtin.module(nvvm-attach-target{chip=sm_75 O=3}, gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llvm-as |
${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 -o ${BUDDY_BINARY_DIR}/../examples/BuddyTest/forward.o
DEPENDS ${BUDDY_EXAMPLES_DIR}/BuddyTest/forward.mlir
COMMENT "Building forward.o"
VERBATIM)


add_library(TEST STATIC forward.o)

SET_TARGET_PROPERTIES(TEST PROPERTIES LINKER_LANGUAGE C)

add_executable(buddy-test-run test-main.cpp)
target_link_directories(buddy-test-run PRIVATE ${LLVM_MLIR_LIBRARY_DIR})

set(BUDDY_TEST_LIBS TEST mlir_runner_utils mlir_cuda_runtime)
target_link_libraries(buddy-test-run ${BUDDY_TEST_LIBS})
65 changes: 65 additions & 0 deletions examples/BuddyTest/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Buddy Compiler Test Example

0. Activate your python environment.

1. Build LLVM/MLIR

```bash
$ cd buddy-mlir
$ mkdir llvm/build
$ cd llvm/build
$ cmake -G Ninja ../llvm \
-DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \
-DLLVM_TARGETS_TO_BUILD="host;NVPTX" \
-DMLIR_ENABLE_CUDA_RUNNER=ON \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DOPENMP_ENABLE_LIBOMPTARGET=OFF \
-DCMAKE_BUILD_TYPE=RELEASE \
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-DPython3_EXECUTABLE=$(which python3)
$ ninja check-clang check-mlir omp
```

2. Build buddy-mlir

```bash
$ mkdir build && cd build
$ cmake -G Ninja .. \
-DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
-DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DCMAKE_BUILD_TYPE=RELEASE \
-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
-DPython3_EXECUTABLE=$(which python3)
$ ninja
$ ninja check-buddy
```

3. Set the `PYTHONPATH` environment variable.

Make sure you are in the build directory.

```bash
$ export BUDDY_MLIR_BUILD_DIR=$PWD
$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
```

4. Build and run the Test example

```bash
$ cmake -G Ninja .. -DBUDDY_TEST_EXAMPLES=ON
$ ninja buddy-test-run
$ cd bin
$ ./buddy-test-run
```

## Debug the Lowering Pass Pipeline with Fake Parameters.

```bash
$ cd buddy-mlir
$ cd examples/BuddyTest
$ make gpu-test-lower
$ make gpu-test-translate
$ make gpu-test-run
```
55 changes: 55 additions & 0 deletions examples/BuddyTest/import-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# ===- buddy-lenet-import.py ---------------------------------------------------
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===---------------------------------------------------------------------------
#
# This is the Test model AOT importer.
#
# ===---------------------------------------------------------------------------

import os
from pathlib import Path

import numpy as np
import torch
from torch._inductor.decomposition import decompositions as inductor_decomp

from buddy.compiler.frontend import DynamoCompiler
from buddy.compiler.graph import GraphDriver
from buddy.compiler.graph.transform import simply_fuse
from buddy.compiler.ops.gpu import ops_registry as gpu_ops_registry
from model import TestModule

model = TestModule()
model = model.eval()

# Initialize Dynamo Compiler with specific configurations as an importer.
dynamo_compiler = DynamoCompiler(
primary_registry=gpu_ops_registry,
aot_autograd_decomposition=inductor_decomp,
)

data = torch.randn([1, 1, 12, 10])
# Import the model into MLIR module and parameters.
with torch.no_grad():
graphs = dynamo_compiler.importer(model, data)

assert len(graphs) == 1
graph = graphs[0]
print(graph.body)
graph.lower_to_top_level_ir()
path_prefix = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
print(graph._imported_module, file=module_file)

56 changes: 56 additions & 0 deletions examples/BuddyTest/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
BUDDY_OPT := ../../build/bin/buddy-opt
MLIR_OPT := ../../llvm/build/bin/mlir-opt
MLIR_TRANSLATE := ../../llvm/build/bin/mlir-translate
MLIR_CPU_RUNNER := ../../llvm/build/bin/mlir-cpu-runner
LLC := ../../llvm/build/bin/llc
OPT_FLAG := -O0

ifeq ($(shell uname),Linux)
MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.so
MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.so
MLIR_ASYNC_RUNTIME := ../../llvm/build/lib/libmlir_async_runtime.so
MLIR_CUDA_RUNTIME := ../../llvm/build/lib/libmlir_cuda_runtime.so
MTRIPLE := x86_64-unknown-linux-gnu
else ifeq ($(shell uname),Darwin)
MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib
MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib
MLIR_ASYNC_RUNTIME := ./../llvm/build/lib/libmlir_async_runtime.dylib
MTRIPLE := x86_64-apple-darwin
endif

gpu-test-lower:
@${MLIR_OPT} forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
${MLIR_OPT} -o log.mlir

gpu-test-translate:
@${MLIR_OPT} forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
${MLIR_TRANSLATE} -mlir-to-llvmir -o log.ll

gpu-test-run:
@${MLIR_OPT} forward.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
${MLIR_CPU_RUNNER} -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_CUDA_RUNTIME}

gpu-conv2d-lower:
@${MLIR_OPT} conv2d.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
${MLIR_OPT} -o log.mlir

gpu-conv2d-translate:
@${MLIR_OPT} conv2d.mlir -gpu-kernel-outlining -llvm-request-c-wrappers | \
${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, gpu-module-to-binary)" | \
${MLIR_TRANSLATE} -mlir-to-llvmir -o log.ll

gpu-conv2d-run:
@${MLIR_OPT} conv2d.mlir -gpu-kernel-outlining -llvm-request-c-wrappers -convert-vector-to-scf -convert-vector-to-llvm | \
${MLIR_OPT} -pass-pipeline="builtin.module(nvvm-attach-target{chip=sm_70 O=3},\
gpu.module(convert-scf-to-cf, convert-gpu-to-nvvm, convert-arith-to-llvm), convert-scf-to-cf, gpu-to-llvm, reconcile-unrealized-casts, gpu-module-to-binary)" | \
${MLIR_CPU_RUNNER} -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_CUDA_RUNTIME}
Loading