Skip to content

Commit

Permalink
rename ipu->npu in programming_examples
Browse files Browse the repository at this point in the history
  • Loading branch information
fifield committed Apr 19, 2024
1 parent 6447479 commit ca53aa2
Show file tree
Hide file tree
Showing 125 changed files with 424 additions and 424 deletions.
2 changes: 1 addition & 1 deletion programming_examples/basic/log_hello_world/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if (NOT WSL)
else()
set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif()

set(TARGET_NAME test CACHE STRING "Target to be built")
Expand Down
4 changes: 2 additions & 2 deletions programming_examples/basic/log_hello_world/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ build/hello_world.mlir: hello_world.py

build/hello_world.xclbin: build/hello_world.mlir build/kernel.o
mkdir -p ${@D}
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)

hello_world_elfstrings.csv: build/hello_world.xclbin
python3 elfStringParser.py --input ./build --output $@
Expand Down
10 changes: 5 additions & 5 deletions programming_examples/basic/log_hello_world/hello_world.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def printf():

with mlir_mod_ctx() as ctx:

@device(AIEDevice.ipu)
@device(AIEDevice.npu)
def device_body():
memRef_ty = T.memref(N, T.i32())

Expand Down Expand Up @@ -47,16 +47,16 @@ def core_body():
# To/from AIE-array data movement
@FuncOp.from_py_func(memRef_ty, memRef_ty, memRef_ty)
def sequence(in_mem, out_mem, logout):
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="outOF", bd_id=0, mem=out_mem, sizes=[1, 1, 1, N]
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="inOF", bd_id=1, mem=in_mem, sizes=[1, 1, 1, N]
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="logoutOF", bd_id=2, mem=logout, sizes=[1, 1, 1, N]
)
ipu_sync(column=0, row=0, direction=0, channel=0)
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
4 changes: 2 additions & 2 deletions programming_examples/basic/log_hello_world/run.lit
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
//
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o
// RUN: %python %S/hello_world.py > ./aie.mlir
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %python %S/elfStringParser.py --input . --output elf_string.csv
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -e elf_string.csv | FileCheck %s
// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -e elf_string.csv | FileCheck %s
// CHECK: Starting kernel execution
// CHECK: Core Location col=1 row=2
// CHECK: Completed executing. cycles=
6 changes: 3 additions & 3 deletions programming_examples/basic/matrix_add_one/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/..
SHELL := /bin/bash

targetname = matrixAddOne
devicename = ipu
devicename = npu
col = 0

all: build/final.xclbin

build/final.xclbin: build/aie.mlir
mkdir -p ${@D}
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}

${targetname}.exe: test.cpp
rm -rf _build
Expand Down
6 changes: 3 additions & 3 deletions programming_examples/basic/matrix_add_one/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

# <ins>Matrix Addition</ins>

Single tile performs a very simple `+` operation where the kernel loads data from local memory, increments the value by `1` and stores it back. The DMA in the Shim tile is programmed to bring the bottom left `8x16` portion of a larger `16x128` matrix into the tile to perform the operation. This reference design can be run on either a RyzenAI IPU or a VCK5000.
Single tile performs a very simple `+` operation where the kernel loads data from local memory, increments the value by `1` and stores it back. The DMA in the Shim tile is programmed to bring the bottom left `8x16` portion of a larger `16x128` matrix into the tile to perform the operation. This reference design can be run on either a RyzenAI NPU or a VCK5000.

The kernel executes on AIE tile (`col`, 2). Input data is brought to the local memory of the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The Shim tile is programmed with a 2D DMA to only bring a 2D submatrix into the AIE tile for processing.
The kernel executes on AIE tile (`col`, 2). Input data is brought to the local memory of the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The Shim tile is programmed with a 2D DMA to only bring a 2D submatrix into the AIE tile for processing.

To compile and run the design for IPU:
To compile and run the design for NPU:
```
make
make run
Expand Down
10 changes: 5 additions & 5 deletions programming_examples/basic/matrix_add_one/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def my_matrix_add_one():
if len(sys.argv) != 3:
raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")

if sys.argv[1] == "ipu":
dev = AIEDevice.ipu
if sys.argv[1] == "npu":
dev = AIEDevice.npu
elif sys.argv[1] == "xcvc1902":
dev = AIEDevice.xcvc1902
else:
Expand Down Expand Up @@ -85,21 +85,21 @@ def core_body():

@FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
def sequence(inTensor, notUsed, outTensor):
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="out0",
bd_id=0,
mem=outTensor,
sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
strides=[1, 1, IMAGE_WIDTH],
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="in0",
bd_id=1,
mem=inTensor,
sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
strides=[1, 1, IMAGE_WIDTH],
)
ipu_sync(column=0, row=0, direction=0, channel=0)
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
6 changes: 3 additions & 3 deletions programming_examples/basic/matrix_add_one/run.lit
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
//
// REQUIRES: ryzen_ai
//
// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
// CHECK: PASS!

Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if (NOT WSL)
else()
set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif()

set(TARGET_NAME test CACHE STRING "Target to be built")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ ${mlir_target}: aie2.py
${xclbin_target}: ${mlir_target} ${kernels:%=build/%.o}
mkdir -p ${@D}
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-ipu --ipu-insts-name=${insts_target:build/%=%} $(<:%=../%)
--aie-generate-npu --npu-insts-name=${insts_target:build/%=%} $(<:%=../%)

${targetname}.exe: test.cpp ../test.cpp ../common.h
rm -rf _build
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def my_matmul():

with mlir_mod_ctx() as ctx:

@device(AIEDevice.ipu)
@device(AIEDevice.npu)
def device_body():
memRef_inA_ty = T.memref(m * k, T.bf16())
memRef_inB_ty = T.memref(k, T.bf16())
Expand Down Expand Up @@ -176,7 +176,7 @@ def core_body():
T.memref(C_sz_in_i32s, T.i32()),
)
def sequence(A, B, C):
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata=inB_fifo_names[0],
bd_id=2,
mem=B,
Expand All @@ -186,15 +186,15 @@ def sequence(A, B, C):
for i in range(n_cores):
A_offset = i * M_div_m_div_n_cores * m * K * word_size_in // 4
C_offset = i * M_div_m_div_n_cores * m * word_size_out // 4
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata=memA_fifo_names[i],
bd_id=1,
mem=A,
offsets=[0, 0, 0, A_offset],
sizes=[M_div_m_div_n_cores, K_div_k, m, k_in_i32s],
strides=[m_x_K_in_i32s, k_in_i32s, K_in_i32s],
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata=outC_fifo_names[i],
bd_id=0,
mem=C,
Expand All @@ -204,7 +204,7 @@ def sequence(A, B, C):
)

for i in range(n_cores):
ipu_sync(column=i, row=0, direction=0, channel=0)
npu_sync(column=i, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
//
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mv.cc -o ./mv.o
// RUN: %python %S/aie2.py -M 288 -K 288 -N 1 > ./aie.mlir
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
// RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s
// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s
// CHECK: PASS!

Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def my_matmul():

with mlir_mod_ctx() as ctx:

@device(AIEDevice.ipu)
@device(AIEDevice.npu)
def device_body():
memref_a_ty = T.memref(m, k, T.bf16())
memref_b_ty = T.memref(k, n, T.bf16())
Expand Down Expand Up @@ -182,37 +182,37 @@ def sequence(A, B, C):
# BB <- Event to start trace capture
# C <- Trace mode, 00=event=time, 01=event-PC, 10=execution
# Configure so that "Event 1" (always true) causes tracing to start
ipu_write32(
npu_write32(
column=compute_tile2_col,
row=compute_tile2_row,
address=0x340D0,
value=0x00010000,
)
# 0x340D4: Trace Control 1
ipu_write32(
npu_write32(
column=compute_tile2_col,
row=compute_tile2_row,
address=0x340D4,
value=0x00000000,
)
# 0x340E0: Trace Event Group 1 (Which events to trace)
# 0xAABBCCDD AA, BB, CC, DD <- four event slots
ipu_write32(
npu_write32(
column=compute_tile2_col,
row=compute_tile2_row,
address=0x340E0,
value=0x4B222125,
)
# 0x340E4: Trace Event Group 2 (Which events to trace)
# 0xAABBCCDD AA, BB, CC, DD <- four event slots
ipu_write32(
npu_write32(
column=compute_tile2_col,
row=compute_tile2_row,
address=0x340E4,
value=0x2D2C1A4F,
)

ipu_write32(
npu_write32(
column=compute_tile2_col,
row=compute_tile2_row,
address=0x3FF00,
Expand All @@ -223,7 +223,7 @@ def sequence(A, B, C):
# out to host DDR memory
trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory
output_size = C_sz_in_bytes
ipu_writebd_shimtile(
npu_writebd_shimtile(
bd_id=trace_bd_id,
buffer_length=trace_size,
buffer_offset=output_size,
Expand Down Expand Up @@ -252,7 +252,7 @@ def sequence(A, B, C):
valid_bd=1,
)
# Set start BD to our shim bd_Id (3)
ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)

# only do 5 tile rows at a time before synchronizing, so we can reuse BDs
rows_per_block = 5
Expand All @@ -265,7 +265,7 @@ def sequence(A, B, C):
num_tile_rows = min(
[rows_per_block, M_div_m - tile_row_block * rows_per_block]
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="outC",
bd_id=0,
mem=C,
Expand All @@ -281,23 +281,23 @@ def sequence(A, B, C):
* word_size_in
// 4
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="inA",
bd_id=2 * tile_row + 1,
mem=A,
offsets=[0, 0, 0, A_row_offset_in_i32s],
sizes=[N_div_n, K_div_k, m, k_in_i32s],
strides=[0, k_in_i32s, K_in_i32s],
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata="inB",
bd_id=2 * tile_row + 2,
mem=B,
sizes=[N_div_n, K_div_k, k, n_in_i32s],
strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s],
)

ipu_sync(column=0, row=0, direction=0, channel=0)
npu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mm.cc -o ./mm.o
// RUN: %python %S/aie2.py -M 256 -K 256 -N 256 > ./aie.mlir
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
// RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 256 -N 256 -v 1 | FileCheck %s
// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 256 -N 256 -v 1 | FileCheck %s
// CHECK: PASS!
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def my_matmul(M=512, K=512, N=512):

with mlir_mod_ctx() as ctx:

@device(AIEDevice.ipu)
@device(AIEDevice.npu)
def device_body():
memRef_inA_ty = T.memref(m * k, T.bf16())
memRef_inB_ty = T.memref(k * n, T.bf16())
Expand Down Expand Up @@ -317,7 +317,7 @@ def sequence(A, B, C):
for i in range(n_cols):
C_col_offset = i * n * word_size_out
C_offset_in_i32s = (C_col_offset + C_row_offset) // 4
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata=outC_fifo_names[i],
bd_id=0,
mem=C,
Expand Down Expand Up @@ -345,7 +345,7 @@ def sequence(A, B, C):
)
A_col_offset_in_i32s = i * m * K * word_size_in // 4
B_col_offset_in_i32s = i * n * word_size_in // 4
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata=inA_fifo_names[i],
bd_id=2 * tile_row + 1,
mem=A,
Expand All @@ -358,7 +358,7 @@ def sequence(A, B, C):
sizes=[N_div_n_div_n_cols, K_div_k, m, k_in_i32s],
strides=[0, k_in_i32s, K_in_i32s],
)
ipu_dma_memcpy_nd(
npu_dma_memcpy_nd(
metadata=inB_fifo_names[i],
bd_id=2 * tile_row + 2,
mem=B,
Expand All @@ -367,7 +367,7 @@ def sequence(A, B, C):
strides=[n_x_n_cols_in_i32s, k_x_N_in_i32s, N_in_i32s],
)
for i in range(n_cols):
ipu_sync(column=i, row=0, direction=0, channel=0)
npu_sync(column=i, row=0, direction=0, channel=0)

# print(ctx.module.operation.verify())
print(ctx.module)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
//
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mm.cc -o ./mm.o
// RUN: %python %S/aie2.py -M 512 -K 512 -N 512 > ./aie.mlir
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
// RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 -M 512 -K 512 -N 512 | FileCheck %s
// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 -M 512 -K 512 -N 512 | FileCheck %s
// CHECK: PASS!

Loading

0 comments on commit ca53aa2

Please sign in to comment.