From ca53aa27d0f287384b7d9b9b8f39702f5a656d10 Mon Sep 17 00:00:00 2001 From: Jeff Fifield Date: Fri, 19 Apr 2024 09:28:54 -0600 Subject: [PATCH] rename ipu->npu in programming_examples --- .../basic/log_hello_world/CMakeLists.txt | 2 +- .../basic/log_hello_world/Makefile | 4 +- .../basic/log_hello_world/hello_world.py | 10 ++-- .../basic/log_hello_world/run.lit | 4 +- .../basic/matrix_add_one/Makefile | 6 +- .../basic/matrix_add_one/README.md | 6 +- .../basic/matrix_add_one/aie2.py | 10 ++-- .../basic/matrix_add_one/run.lit | 6 +- .../matrix_multiplication/CMakeLists.txt | 2 +- .../matrix_multiplication/makefile-common | 2 +- .../matrix_vector/aie2.py | 10 ++-- .../matrix_vector/run.lit | 4 +- .../matrix_multiplication/single_core/aie2.py | 24 ++++---- .../matrix_multiplication/single_core/run.lit | 4 +- .../matrix_multiplication/whole_array/aie2.py | 10 ++-- .../matrix_multiplication/whole_array/run.lit | 4 +- .../basic/passthrough_dmas/CMakeLists.txt | 2 +- .../basic/passthrough_dmas/Makefile | 4 +- .../basic/passthrough_dmas/aie2.py | 10 ++-- .../basic/passthrough_dmas/run.lit | 6 +- .../basic/passthrough_kernel/CMakeLists.txt | 2 +- .../basic/passthrough_kernel/Makefile | 4 +- .../basic/passthrough_kernel/aie2.py | 8 +-- .../basic/passthrough_kernel/run.lit | 6 +- .../basic/vector_add/Makefile | 6 +- .../basic/vector_add/README.md | 6 +- programming_examples/basic/vector_add/aie2.py | 12 ++-- programming_examples/basic/vector_add/run.lit | 6 +- .../basic/vector_exp/CMakeLists.txt | 2 +- .../basic/vector_exp/Makefile | 2 +- programming_examples/basic/vector_exp/aie2.py | 8 +-- programming_examples/basic/vector_exp/run.lit | 6 +- .../basic/vector_mult/CMakeLists.txt | 2 +- .../basic/vector_mult/Makefile | 6 +- .../basic/vector_mult/README.md | 6 +- .../basic/vector_mult/aie2.py | 12 ++-- .../basic/vector_mult/run.lit | 6 +- .../basic/vector_reduce_add/CMakeLists.txt | 2 +- .../basic/vector_reduce_add/Makefile | 4 +- .../basic/vector_reduce_add/aie2.py | 10 ++-- .../basic/vector_reduce_add/run.lit | 6 +- .../basic/vector_reduce_max/CMakeLists.txt | 2 +- .../basic/vector_reduce_max/Makefile | 4 +- .../basic/vector_reduce_max/aie2.py | 10 ++-- .../basic/vector_reduce_max/run.lit | 6 +- .../basic/vector_reduce_min/CMakeLists.txt | 2 +- .../basic/vector_reduce_min/Makefile | 4 +- .../basic/vector_reduce_min/aie2.py | 10 ++-- .../basic/vector_reduce_min/run.lit | 6 +- .../basic/vector_scalar_add/CMakeLists.txt | 2 +- .../basic/vector_scalar_add/Makefile | 4 +- .../basic/vector_scalar_add/aie2.py | 8 +-- .../basic/vector_scalar_add/run.lit | 4 +- .../basic/vector_scalar_mul/CMakeLists.txt | 2 +- .../basic/vector_scalar_mul/Makefile | 4 +- .../basic/vector_scalar_mul/aie2.py | 10 ++-- .../basic/vector_scalar_mul/run.lit | 6 +- .../basic/vector_sum/CMakeLists.txt | 2 +- .../basic/vector_sum/Makefile | 6 +- .../basic/vector_sum/README.md | 4 +- programming_examples/basic/vector_sum/aie2.py | 10 ++-- programming_examples/basic/vector_sum/run.lit | 6 +- programming_examples/lit.cfg.py | 8 +-- programming_examples/makefile-common | 2 +- .../ml/bottleneck/CMakeLists.txt | 2 +- programming_examples/ml/bottleneck/Makefile | 6 +- programming_examples/ml/bottleneck/aie2.py | 32 +++++----- programming_examples/ml/bottleneck/run.lit | 4 +- programming_examples/ml/conv2d/CMakeLists.txt | 2 +- programming_examples/ml/conv2d/Makefile | 6 +- programming_examples/ml/conv2d/aie2.py | 26 ++++----- programming_examples/ml/conv2d/run.lit | 4 +- .../ml/conv2d_fused_relu/CMakeLists.txt | 2 +- .../ml/conv2d_fused_relu/Makefile | 6 +- .../ml/conv2d_fused_relu/aie2.py | 26 ++++----- .../ml/conv2d_fused_relu/run.lit | 4 +- .../ml/eltwise_add/CMakeLists.txt | 2 +- programming_examples/ml/eltwise_add/Makefile | 4 +- programming_examples/ml/eltwise_add/aie2.py | 10 ++-- programming_examples/ml/eltwise_add/run.lit | 4 +- .../ml/eltwise_mul/CMakeLists.txt | 2 +- programming_examples/ml/eltwise_mul/Makefile | 4 +- programming_examples/ml/eltwise_mul/aie2.py | 10 ++-- programming_examples/ml/eltwise_mul/run.lit | 4 +- programming_examples/ml/relu/CMakeLists.txt | 2 +- programming_examples/ml/relu/Makefile | 4 +- programming_examples/ml/relu/aie2.py | 22 +++---- programming_examples/ml/relu/run.lit | 4 +- .../ml/resnet/layers_conv2_x/CMakeLists.txt | 2 +- .../ml/resnet/layers_conv2_x/Makefile | 6 +- .../ml/resnet/layers_conv2_x/aie.mlir | 58 +++++++++---------- .../ml/resnet/layers_conv2_x/aie2.py | 32 +++++----- .../ml/resnet/layers_conv2_x/run.lit | 4 +- .../ml/softmax/CMakeLists.txt | 2 +- programming_examples/ml/softmax/Makefile | 2 +- programming_examples/ml/softmax/aie2.py | 8 +-- programming_examples/ml/softmax/run.lit | 4 +- .../ml/weight_expand/CMakeLists.txt | 2 +- .../ml/weight_expand/Makefile | 2 +- programming_examples/ml/weight_expand/aie2.py | 8 +-- programming_examples/utils/README.md | 2 +- programming_examples/utils/parse_eventIR.py | 6 +- programming_examples/utils/parse_trace.py | 6 +- .../vision/color_detect/CMakeLists.txt | 2 +- .../vision/color_detect/Makefile | 4 +- .../vision/color_detect/README.md | 2 +- .../vision/color_detect/aie2_colorDetect.py | 8 +-- .../vision/color_detect/run.lit | 4 +- .../vision/color_threshold/CMakeLists.txt | 2 +- .../vision/color_threshold/Makefile | 4 +- .../vision/color_threshold/README.md | 2 +- .../color_threshold/aie2_colorThreshold.py | 32 +++++----- .../vision/color_threshold/run.lit | 4 +- .../vision/edge_detect/CMakeLists.txt | 2 +- .../vision/edge_detect/Makefile | 4 +- .../vision/edge_detect/README.md | 2 +- .../vision/edge_detect/aie2_edgeDetect.py | 8 +-- .../vision/edge_detect/run.lit | 4 +- .../vision/vision_passthrough/CMakeLists.txt | 2 +- .../vision/vision_passthrough/Makefile | 4 +- .../vision/vision_passthrough/aie2.py | 20 +++---- .../aie2_lineBased_8b_1080.mlir | 8 +-- .../aie2_lineBased_8b_8k.mlir | 8 +-- .../aie2_lineBased_8b_tiny.mlir | 8 +-- .../vision/vision_passthrough/run.lit | 4 +- 125 files changed, 424 insertions(+), 424 deletions(-) diff --git a/programming_examples/basic/log_hello_world/CMakeLists.txt b/programming_examples/basic/log_hello_world/CMakeLists.txt index c4ca0825d4..20f5d8a4a3 100755 --- a/programming_examples/basic/log_hello_world/CMakeLists.txt +++ b/programming_examples/basic/log_hello_world/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/log_hello_world/Makefile b/programming_examples/basic/log_hello_world/Makefile index c5bcd8d5c3..778badcb6a 100755 --- a/programming_examples/basic/log_hello_world/Makefile +++ b/programming_examples/basic/log_hello_world/Makefile @@ -22,8 +22,8 @@ build/hello_world.mlir: hello_world.py build/hello_world.xclbin: build/hello_world.mlir build/kernel.o mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) hello_world_elfstrings.csv: build/hello_world.xclbin python3 elfStringParser.py --input ./build --output $@ diff --git a/programming_examples/basic/log_hello_world/hello_world.py b/programming_examples/basic/log_hello_world/hello_world.py index b017d110b7..bc3cbe20fe 100644 --- a/programming_examples/basic/log_hello_world/hello_world.py +++ b/programming_examples/basic/log_hello_world/hello_world.py @@ -15,7 +15,7 @@ def printf(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_ty = T.memref(N, T.i32()) @@ -47,16 +47,16 @@ def core_body(): # To/from AIE-array data movement @FuncOp.from_py_func(memRef_ty, memRef_ty, memRef_ty) def sequence(in_mem, out_mem, logout): - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outOF", bd_id=0, mem=out_mem, sizes=[1, 1, 1, N] ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF", bd_id=1, mem=in_mem, sizes=[1, 1, 1, N] ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="logoutOF", bd_id=2, mem=logout, sizes=[1, 1, 1, N] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/log_hello_world/run.lit b/programming_examples/basic/log_hello_world/run.lit index 096df253c7..0fe0af2ada 100644 --- a/programming_examples/basic/log_hello_world/run.lit +++ b/programming_examples/basic/log_hello_world/run.lit @@ -5,10 +5,10 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o // RUN: %python %S/hello_world.py > ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %python %S/elfStringParser.py --input . --output elf_string.csv -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -e elf_string.csv | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -e elf_string.csv | FileCheck %s // CHECK: Starting kernel execution // CHECK: Core Location col=1 row=2 // CHECK: Completed executing. cycles= diff --git a/programming_examples/basic/matrix_add_one/Makefile b/programming_examples/basic/matrix_add_one/Makefile index 435b7b8c9e..83014fbeaf 100644 --- a/programming_examples/basic/matrix_add_one/Makefile +++ b/programming_examples/basic/matrix_add_one/Makefile @@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/.. SHELL := /bin/bash targetname = matrixAddOne -devicename = ipu +devicename = npu col = 0 all: build/final.xclbin build/final.xclbin: build/aie.mlir mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt ${Matrix Addition -Single tile performs a very simple `+` operation where the kernel loads data from local memory, increments the value by `1` and stores it back. The DMA in the Shim tile is programmed to bring the bottom left `8x16` portion of a larger `16x128` matrix into the tile to perform the operation. This reference design can be run on either a RyzenAI IPU or a VCK5000. +Single tile performs a very simple `+` operation where the kernel loads data from local memory, increments the value by `1` and stores it back. The DMA in the Shim tile is programmed to bring the bottom left `8x16` portion of a larger `16x128` matrix into the tile to perform the operation. This reference design can be run on either a RyzenAI NPU or a VCK5000. -The kernel executes on AIE tile (`col`, 2). Input data is brought to the local memory of the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The Shim tile is programmed with a 2D DMA to only bring a 2D submatrix into the AIE tile for processing. +The kernel executes on AIE tile (`col`, 2). Input data is brought to the local memory of the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The Shim tile is programmed with a 2D DMA to only bring a 2D submatrix into the AIE tile for processing. -To compile and run the design for IPU: +To compile and run the design for NPU: ``` make make run diff --git a/programming_examples/basic/matrix_add_one/aie2.py b/programming_examples/basic/matrix_add_one/aie2.py index 36eb3a3d38..a80ba794e6 100644 --- a/programming_examples/basic/matrix_add_one/aie2.py +++ b/programming_examples/basic/matrix_add_one/aie2.py @@ -35,8 +35,8 @@ def my_matrix_add_one(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -85,21 +85,21 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(inTensor, notUsed, outTensor): - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH], strides=[1, 1, IMAGE_WIDTH], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/matrix_add_one/run.lit b/programming_examples/basic/matrix_add_one/run.lit index a429e99221..1922c01828 100644 --- a/programming_examples/basic/matrix_add_one/run.lit +++ b/programming_examples/basic/matrix_add_one/run.lit @@ -3,9 +3,9 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 > ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/matrix_multiplication/CMakeLists.txt b/programming_examples/basic/matrix_multiplication/CMakeLists.txt index dfe345e188..0f062b0322 100644 --- a/programming_examples/basic/matrix_multiplication/CMakeLists.txt +++ b/programming_examples/basic/matrix_multiplication/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/matrix_multiplication/makefile-common b/programming_examples/basic/matrix_multiplication/makefile-common index fd6a438ea0..6149657e1b 100644 --- a/programming_examples/basic/matrix_multiplication/makefile-common +++ b/programming_examples/basic/matrix_multiplication/makefile-common @@ -60,7 +60,7 @@ ${mlir_target}: aie2.py ${xclbin_target}: ${mlir_target} ${kernels:%=build/%.o} mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=${insts_target:build/%=%} $(<:%=../%) + --aie-generate-npu --npu-insts-name=${insts_target:build/%=%} $(<:%=../%) ${targetname}.exe: test.cpp ../test.cpp ../common.h rm -rf _build diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py b/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py index 80b5c89613..4ac31574fd 100644 --- a/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py +++ b/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py @@ -42,7 +42,7 @@ def my_matmul(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_inA_ty = T.memref(m * k, T.bf16()) memRef_inB_ty = T.memref(k, T.bf16()) @@ -176,7 +176,7 @@ def core_body(): T.memref(C_sz_in_i32s, T.i32()), ) def sequence(A, B, C): - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata=inB_fifo_names[0], bd_id=2, mem=B, @@ -186,7 +186,7 @@ def sequence(A, B, C): for i in range(n_cores): A_offset = i * M_div_m_div_n_cores * m * K * word_size_in // 4 C_offset = i * M_div_m_div_n_cores * m * word_size_out // 4 - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata=memA_fifo_names[i], bd_id=1, mem=A, @@ -194,7 +194,7 @@ def sequence(A, B, C): sizes=[M_div_m_div_n_cores, K_div_k, m, k_in_i32s], strides=[m_x_K_in_i32s, k_in_i32s, K_in_i32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata=outC_fifo_names[i], bd_id=0, mem=C, @@ -204,7 +204,7 @@ def sequence(A, B, C): ) for i in range(n_cores): - ipu_sync(column=i, row=0, direction=0, channel=0) + npu_sync(column=i, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit b/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit index d446e4f966..eeaa69352a 100644 --- a/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit +++ b/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit @@ -5,8 +5,8 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mv.cc -o ./mv.o // RUN: %python %S/aie2.py -M 288 -K 288 -N 1 > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/matrix_multiplication/single_core/aie2.py b/programming_examples/basic/matrix_multiplication/single_core/aie2.py index 46973f90a4..9c43053ae4 100644 --- a/programming_examples/basic/matrix_multiplication/single_core/aie2.py +++ b/programming_examples/basic/matrix_multiplication/single_core/aie2.py @@ -54,7 +54,7 @@ def my_matmul(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memref_a_ty = T.memref(m, k, T.bf16()) memref_b_ty = T.memref(k, n, T.bf16()) @@ -182,14 +182,14 @@ def sequence(A, B, C): # BB <- Event to start trace capture # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution # Configure so that "Event 1" (always true) causes tracing to start - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340D0, value=0x00010000, ) # 0x340D4: Trace Control 1 - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340D4, @@ -197,7 +197,7 @@ def sequence(A, B, C): ) # 0x340E0: Trace Event Group 1 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340E0, @@ -205,14 +205,14 @@ def sequence(A, B, C): ) # 0x340E4: Trace Event Group 2 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340E4, value=0x2D2C1A4F, ) - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x3FF00, @@ -223,7 +223,7 @@ def sequence(A, B, C): # out to host DDR memory trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory output_size = C_sz_in_bytes - ipu_writebd_shimtile( + npu_writebd_shimtile( bd_id=trace_bd_id, buffer_length=trace_size, buffer_offset=output_size, @@ -252,7 +252,7 @@ def sequence(A, B, C): valid_bd=1, ) # Set start BD to our shim bd_Id (3) - ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) + npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) # only do 5 tile rows at a time before synchronizing, so we can reuse BDs rows_per_block = 5 @@ -265,7 +265,7 @@ def sequence(A, B, C): num_tile_rows = min( [rows_per_block, M_div_m - tile_row_block * rows_per_block] ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outC", bd_id=0, mem=C, @@ -281,7 +281,7 @@ def sequence(A, B, C): * word_size_in // 4 ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inA", bd_id=2 * tile_row + 1, mem=A, @@ -289,7 +289,7 @@ def sequence(A, B, C): sizes=[N_div_n, K_div_k, m, k_in_i32s], strides=[0, k_in_i32s, K_in_i32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inB", bd_id=2 * tile_row + 2, mem=B, @@ -297,7 +297,7 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/matrix_multiplication/single_core/run.lit b/programming_examples/basic/matrix_multiplication/single_core/run.lit index 0209415093..6f6a32320a 100644 --- a/programming_examples/basic/matrix_multiplication/single_core/run.lit +++ b/programming_examples/basic/matrix_multiplication/single_core/run.lit @@ -5,7 +5,7 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mm.cc -o ./mm.o // RUN: %python %S/aie2.py -M 256 -K 256 -N 256 > ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 256 -N 256 -v 1 | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 256 -N 256 -v 1 | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/matrix_multiplication/whole_array/aie2.py b/programming_examples/basic/matrix_multiplication/whole_array/aie2.py index 69a3c52394..d94a7e8eba 100644 --- a/programming_examples/basic/matrix_multiplication/whole_array/aie2.py +++ b/programming_examples/basic/matrix_multiplication/whole_array/aie2.py @@ -74,7 +74,7 @@ def my_matmul(M=512, K=512, N=512): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_inA_ty = T.memref(m * k, T.bf16()) memRef_inB_ty = T.memref(k * n, T.bf16()) @@ -317,7 +317,7 @@ def sequence(A, B, C): for i in range(n_cols): C_col_offset = i * n * word_size_out C_offset_in_i32s = (C_col_offset + C_row_offset) // 4 - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata=outC_fifo_names[i], bd_id=0, mem=C, @@ -345,7 +345,7 @@ def sequence(A, B, C): ) A_col_offset_in_i32s = i * m * K * word_size_in // 4 B_col_offset_in_i32s = i * n * word_size_in // 4 - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata=inA_fifo_names[i], bd_id=2 * tile_row + 1, mem=A, @@ -358,7 +358,7 @@ def sequence(A, B, C): sizes=[N_div_n_div_n_cols, K_div_k, m, k_in_i32s], strides=[0, k_in_i32s, K_in_i32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata=inB_fifo_names[i], bd_id=2 * tile_row + 2, mem=B, @@ -367,7 +367,7 @@ def sequence(A, B, C): strides=[n_x_n_cols_in_i32s, k_x_N_in_i32s, N_in_i32s], ) for i in range(n_cols): - ipu_sync(column=i, row=0, direction=0, channel=0) + npu_sync(column=i, row=0, direction=0, channel=0) # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/programming_examples/basic/matrix_multiplication/whole_array/run.lit b/programming_examples/basic/matrix_multiplication/whole_array/run.lit index 202e66b71e..fc23355630 100644 --- a/programming_examples/basic/matrix_multiplication/whole_array/run.lit +++ b/programming_examples/basic/matrix_multiplication/whole_array/run.lit @@ -5,8 +5,8 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mm.cc -o ./mm.o // RUN: %python %S/aie2.py -M 512 -K 512 -N 512 > ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 -M 512 -K 512 -N 512 | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 -M 512 -K 512 -N 512 | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/passthrough_dmas/CMakeLists.txt b/programming_examples/basic/passthrough_dmas/CMakeLists.txt index 3986c4a075..c17d3d365b 100644 --- a/programming_examples/basic/passthrough_dmas/CMakeLists.txt +++ b/programming_examples/basic/passthrough_dmas/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/passthrough_dmas/Makefile b/programming_examples/basic/passthrough_dmas/Makefile index 13a8d42aae..e09c8a91aa 100644 --- a/programming_examples/basic/passthrough_dmas/Makefile +++ b/programming_examples/basic/passthrough_dmas/Makefile @@ -26,13 +26,13 @@ inst/insts.txt: aie2.py rm -rf inst mkdir -p inst python3 $< ${devicename} ${col} ${LENGTH} > inst/aie.mlir - pushd inst && aiecc.py --aie-only-generate-ipu --ipu-insts-name=insts.txt aie.mlir && popd + pushd inst && aiecc.py --aie-only-generate-npu --npu-insts-name=insts.txt aie.mlir && popd ${powershell} ./build/${targetname}.exe -x build/final.xclbin -i inst/insts.txt -k MLIR_AIE -l ${LENGTH} build/final.xclbin: build/aie.mlir mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/passthrough_dmas/aie2.py b/programming_examples/basic/passthrough_dmas/aie2.py index b59d9332ac..f8dc35a6d9 100755 --- a/programming_examples/basic/passthrough_dmas/aie2.py +++ b/programming_examples/basic/passthrough_dmas/aie2.py @@ -23,8 +23,8 @@ if len(sys.argv) == 4: N = int(sys.argv[1]) -if sys.argv[1] == "ipu": - dev = AIEDevice.ipu +if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -62,9 +62,9 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(A, B, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/passthrough_dmas/run.lit b/programming_examples/basic/passthrough_dmas/run.lit index a4f5d568b6..a466533551 100644 --- a/programming_examples/basic/passthrough_dmas/run.lit +++ b/programming_examples/basic/passthrough_dmas/run.lit @@ -3,8 +3,8 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 > ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -l 4096 | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -l 4096 | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/passthrough_kernel/CMakeLists.txt b/programming_examples/basic/passthrough_kernel/CMakeLists.txt index 47375adc84..fddc513396 100644 --- a/programming_examples/basic/passthrough_kernel/CMakeLists.txt +++ b/programming_examples/basic/passthrough_kernel/CMakeLists.txt @@ -22,7 +22,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(PASSTHROUGH_SIZE 4096 CACHE STRING "size") diff --git a/programming_examples/basic/passthrough_kernel/Makefile b/programming_examples/basic/passthrough_kernel/Makefile index fbfc7580c4..458b992521 100644 --- a/programming_examples/basic/passthrough_kernel/Makefile +++ b/programming_examples/basic/passthrough_kernel/Makefile @@ -28,8 +28,8 @@ build/passThrough.cc.o: passThrough.cc build/final_${PASSTHROUGH_SIZE}.xclbin: build/aie2_lineBased_8b_${PASSTHROUGH_SIZE}.mlir build/passThrough.cc.o mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/passthrough_kernel/aie2.py b/programming_examples/basic/passthrough_kernel/aie2.py index baec4415fa..5b187a7d94 100644 --- a/programming_examples/basic/passthrough_kernel/aie2.py +++ b/programming_examples/basic/passthrough_kernel/aie2.py @@ -29,7 +29,7 @@ def passthroughKernel(): - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): # define types memRef_ty = T.memref(lineWidthInBytes, T.ui8()) @@ -87,19 +87,19 @@ def sequence(inTensor, outTensor, notUsed): events=[0x4B, 0x22, 0x21, 0x25, 0x2D, 0x2C, 0x1A, 0x4F], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="in", bd_id=0, mem=inTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="out", bd_id=1, mem=outTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) with mlir_mod_ctx() as ctx: diff --git a/programming_examples/basic/passthrough_kernel/run.lit b/programming_examples/basic/passthrough_kernel/run.lit index 30abe48152..7f1c2318b2 100644 --- a/programming_examples/basic/passthrough_kernel/run.lit +++ b/programming_examples/basic/passthrough_kernel/run.lit @@ -5,8 +5,8 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/generic/passThrough.cc -o passThrough.cc.o // RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DPASSTHROUGH_SIZE=4096 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s -// RUN: %run_on_ipu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_add/Makefile b/programming_examples/basic/vector_add/Makefile index 9a1a7a2a56..61133a555b 100755 --- a/programming_examples/basic/vector_add/Makefile +++ b/programming_examples/basic/vector_add/Makefile @@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/.. SHELL := /bin/bash targetname = vectorAdd -devicename = ipu +devicename = npu col = 0 all: build/final.xclbin build/final.xclbin: build/aie.mlir mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt ${Vector Add -Single tile performs a very simple `+` operations from two vectors loaded into memory. The tile then stores the sum of those two vectors back to external memory. This reference design can be run on either a RyzenAI IPU or a VCK5000. +Single tile performs a very simple `+` operations from two vectors loaded into memory. The tile then stores the sum of those two vectors back to external memory. This reference design can be run on either a RyzenAI NPU or a VCK5000. -The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The AIE tile performs the summation operations and the Shim tile brings the data back out to external memory. +The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The AIE tile performs the summation operations and the Shim tile brings the data back out to external memory. -To compile and run the design for IPU: +To compile and run the design for NPU: ``` make make run diff --git a/programming_examples/basic/vector_add/aie2.py b/programming_examples/basic/vector_add/aie2.py index 6f8ad2d5b6..581729e6ec 100755 --- a/programming_examples/basic/vector_add/aie2.py +++ b/programming_examples/basic/vector_add/aie2.py @@ -28,8 +28,8 @@ def my_vector_add(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -79,10 +79,10 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(A, B, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/vector_add/run.lit b/programming_examples/basic/vector_add/run.lit index a429e99221..1922c01828 100644 --- a/programming_examples/basic/vector_add/run.lit +++ b/programming_examples/basic/vector_add/run.lit @@ -3,9 +3,9 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 > ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_exp/CMakeLists.txt b/programming_examples/basic/vector_exp/CMakeLists.txt index 20452d080e..ee2050a94e 100644 --- a/programming_examples/basic/vector_exp/CMakeLists.txt +++ b/programming_examples/basic/vector_exp/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_exp/Makefile b/programming_examples/basic/vector_exp/Makefile index 68205484e0..5b471771ba 100644 --- a/programming_examples/basic/vector_exp/Makefile +++ b/programming_examples/basic/vector_exp/Makefile @@ -32,7 +32,7 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/kernels.a mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/vector_exp/aie2.py b/programming_examples/basic/vector_exp/aie2.py index 66980ac451..f010fcde9b 100644 --- a/programming_examples/basic/vector_exp/aie2.py +++ b/programming_examples/basic/vector_exp/aie2.py @@ -35,7 +35,7 @@ def my_eltwise_exp(): with mlir_mod_ctx() as ctx: # Device declaration - aie2 device NPU (aka Ryzen AI) - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_ty = T.memref(n, T.bf16()) @@ -113,13 +113,13 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty) def sequence(A, C): - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s] ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) # Print the mlir conversion print(ctx.module) diff --git a/programming_examples/basic/vector_exp/run.lit b/programming_examples/basic/vector_exp/run.lit index f2db79ab6a..247ca37a33 100644 --- a/programming_examples/basic/vector_exp/run.lit +++ b/programming_examples/basic/vector_exp/run.lit @@ -6,8 +6,8 @@ // RUN: xchesscc_wrapper aie2 -I %aietools/include -I %S/../../../aie_runtime_lib/AIE2 -c %S/../../../aie_kernels/aie2/bf16_exp.cc -o exp.o // RUN: xchesscc_wrapper aie2 -I %aietools/include -I. -c %S/../../../aie_runtime_lib/AIE2/lut_based_ops.cpp -o lut_based_ops.o // RUN: ar rvs kernels.a exp.o lut_based_ops.o -// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 | aie-opt -cse -canonicalize -o ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_mult/CMakeLists.txt b/programming_examples/basic/vector_mult/CMakeLists.txt index 20452d080e..ee2050a94e 100644 --- a/programming_examples/basic/vector_mult/CMakeLists.txt +++ b/programming_examples/basic/vector_mult/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_mult/Makefile b/programming_examples/basic/vector_mult/Makefile index 330692f4fb..bc07e3d05b 100755 --- a/programming_examples/basic/vector_mult/Makefile +++ b/programming_examples/basic/vector_mult/Makefile @@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/.. SHELL := /bin/bash targetname = vectorMult -devicename = ipu +devicename = npu col = 0 all: build/final.xclbin build/final.xclbin: build/aie.mlir mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt ${Vector Multiplication -Single tile performs a very simple `*` operations from two vectors loaded into memory. The tile then stores the element wise multiplication of those two vectors back to external memory. This reference design can be run on either a RyzenAI IPU or a VCK5000. +Single tile performs a very simple `*` operations from two vectors loaded into memory. The tile then stores the element wise multiplication of those two vectors back to external memory. This reference design can be run on either a RyzenAI NPU or a VCK5000. -The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The AIE tile performs the multiplication operations and the Shim tile brings the data back out to external memory. +The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The AIE tile performs the multiplication operations and the Shim tile brings the data back out to external memory. -To compile and run the design for IPU: +To compile and run the design for NPU: ``` make make run diff --git a/programming_examples/basic/vector_mult/aie2.py b/programming_examples/basic/vector_mult/aie2.py index 5a36f85a33..209f5243bb 100755 --- a/programming_examples/basic/vector_mult/aie2.py +++ b/programming_examples/basic/vector_mult/aie2.py @@ -28,8 +28,8 @@ def my_vector_add(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -79,10 +79,10 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(A, B, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/vector_mult/run.lit b/programming_examples/basic/vector_mult/run.lit index a429e99221..1922c01828 100644 --- a/programming_examples/basic/vector_mult/run.lit +++ b/programming_examples/basic/vector_mult/run.lit @@ -3,9 +3,9 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 > ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_reduce_add/CMakeLists.txt b/programming_examples/basic/vector_reduce_add/CMakeLists.txt index 9ae325a430..024b4cfd54 100644 --- a/programming_examples/basic/vector_reduce_add/CMakeLists.txt +++ b/programming_examples/basic/vector_reduce_add/CMakeLists.txt @@ -22,7 +22,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_reduce_add/Makefile b/programming_examples/basic/vector_reduce_add/Makefile index 37ca25abec..b0f8eebe0c 100644 --- a/programming_examples/basic/vector_reduce_add/Makefile +++ b/programming_examples/basic/vector_reduce_add/Makefile @@ -11,7 +11,7 @@ include ../../makefile-common ACDC_AIE = $(dir $(shell which aie-opt))/.. targetname = vector_max -devicename = ipu +devicename = npu col = 0 CHESS_FLAGS=${CHESSCCWRAP2_FLAGS} KERNEL_LIB=${ACDC_AIE}/../../aie_kernels/aie2/ @@ -29,7 +29,7 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/i32_add_reduce.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/vector_reduce_add/aie2.py b/programming_examples/basic/vector_reduce_add/aie2.py index 098de83380..b8b8ff5c59 100644 --- a/programming_examples/basic/vector_reduce_add/aie2.py +++ b/programming_examples/basic/vector_reduce_add/aie2.py @@ -26,8 +26,8 @@ def my_reduce_add(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -71,9 +71,9 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty) def sequence(A, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) - ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/vector_reduce_add/run.lit b/programming_examples/basic/vector_reduce_add/run.lit index 192380beb0..37c0544b6d 100644 --- a/programming_examples/basic/vector_reduce_add/run.lit +++ b/programming_examples/basic/vector_reduce_add/run.lit @@ -3,9 +3,9 @@ // // REQUIRES: ryzen_ai, chess // -// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir +// RUN: %python %S/aie2.py npu 0 > ./aie.mlir // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_add.cc -o reduce_add.cc.o -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_reduce_max/CMakeLists.txt b/programming_examples/basic/vector_reduce_max/CMakeLists.txt index 9ae325a430..024b4cfd54 100644 --- a/programming_examples/basic/vector_reduce_max/CMakeLists.txt +++ b/programming_examples/basic/vector_reduce_max/CMakeLists.txt @@ -22,7 +22,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_reduce_max/Makefile b/programming_examples/basic/vector_reduce_max/Makefile index 55a013704d..5e47d478b2 100755 --- a/programming_examples/basic/vector_reduce_max/Makefile +++ b/programming_examples/basic/vector_reduce_max/Makefile @@ -11,7 +11,7 @@ include ../../makefile-common ACDC_AIE = $(dir $(shell which aie-opt))/.. targetname = reduce_max -devicename = ipu +devicename = npu col = 0 CHESS_FLAGS=${CHESSCCWRAP2_FLAGS} KERNEL_LIB=../../../aie_kernels/aie2 @@ -29,7 +29,7 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/reduce_max.cc.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/vector_reduce_max/aie2.py b/programming_examples/basic/vector_reduce_max/aie2.py index 5fc3e39d72..9ef47de0bf 100755 --- a/programming_examples/basic/vector_reduce_max/aie2.py +++ b/programming_examples/basic/vector_reduce_max/aie2.py @@ -26,8 +26,8 @@ def my_reduce_max(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -71,9 +71,9 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty) def sequence(A, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) - ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/vector_reduce_max/run.lit b/programming_examples/basic/vector_reduce_max/run.lit index 6c3233183c..584d7c1628 100644 --- a/programming_examples/basic/vector_reduce_max/run.lit +++ b/programming_examples/basic/vector_reduce_max/run.lit @@ -4,8 +4,8 @@ // REQUIRES: ryzen_ai, chess // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_max.cc -o reduce_max.cc.o -// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 | aie-opt -cse -canonicalize -o ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_reduce_min/CMakeLists.txt b/programming_examples/basic/vector_reduce_min/CMakeLists.txt index 76d48dfe36..820bc8059d 100644 --- a/programming_examples/basic/vector_reduce_min/CMakeLists.txt +++ b/programming_examples/basic/vector_reduce_min/CMakeLists.txt @@ -22,7 +22,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_reduce_min/Makefile b/programming_examples/basic/vector_reduce_min/Makefile index 177213e22a..b4321855e1 100755 --- a/programming_examples/basic/vector_reduce_min/Makefile +++ b/programming_examples/basic/vector_reduce_min/Makefile @@ -11,7 +11,7 @@ include ../../makefile-common ACDC_AIE = $(dir $(shell which aie-opt))/.. targetname = reduce_min -devicename = ipu +devicename = npu col = 0 CHESS_FLAGS=${CHESSCCWRAP2_FLAGS} KERNEL_LIB=../../../aie_kernels/aie2 @@ -29,7 +29,7 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/reduce_min.cc.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/vector_reduce_min/aie2.py b/programming_examples/basic/vector_reduce_min/aie2.py index 35bb351fee..eafca4a57f 100755 --- a/programming_examples/basic/vector_reduce_min/aie2.py +++ b/programming_examples/basic/vector_reduce_min/aie2.py @@ -26,8 +26,8 @@ def my_reduce_min(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -71,9 +71,9 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty) def sequence(A, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) - ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/vector_reduce_min/run.lit b/programming_examples/basic/vector_reduce_min/run.lit index 95ecbd533a..710a9a02cd 100644 --- a/programming_examples/basic/vector_reduce_min/run.lit +++ b/programming_examples/basic/vector_reduce_min/run.lit @@ -4,8 +4,8 @@ // REQUIRES: ryzen_ai, chess // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_min.cc -o reduce_min.cc.o -// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 | aie-opt -cse -canonicalize -o ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_scalar_add/CMakeLists.txt b/programming_examples/basic/vector_scalar_add/CMakeLists.txt index c4ca0825d4..20f5d8a4a3 100644 --- a/programming_examples/basic/vector_scalar_add/CMakeLists.txt +++ b/programming_examples/basic/vector_scalar_add/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_scalar_add/Makefile b/programming_examples/basic/vector_scalar_add/Makefile index 4ad8553675..463b63532b 100644 --- a/programming_examples/basic/vector_scalar_add/Makefile +++ b/programming_examples/basic/vector_scalar_add/Makefile @@ -18,8 +18,8 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt ${ ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_scalar_mul/CMakeLists.txt b/programming_examples/basic/vector_scalar_mul/CMakeLists.txt index c4ca0825d4..20f5d8a4a3 100644 --- a/programming_examples/basic/vector_scalar_mul/CMakeLists.txt +++ b/programming_examples/basic/vector_scalar_mul/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_scalar_mul/Makefile b/programming_examples/basic/vector_scalar_mul/Makefile index c6f18a71b3..e93b53da4c 100755 --- a/programming_examples/basic/vector_scalar_mul/Makefile +++ b/programming_examples/basic/vector_scalar_mul/Makefile @@ -36,12 +36,12 @@ build/aie_trace.mlir: aie2.py build/final.xclbin: build/aie.mlir build/scale.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) build/final_trace.xclbin: build/aie_trace.mlir build/scale.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/basic/vector_scalar_mul/aie2.py b/programming_examples/basic/vector_scalar_mul/aie2.py index bf85beae56..caed881c92 100755 --- a/programming_examples/basic/vector_scalar_mul/aie2.py +++ b/programming_examples/basic/vector_scalar_mul/aie2.py @@ -25,7 +25,7 @@ def my_vector_scalar(trace_size): vectorized = True - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_ty = T.memref(n, T.i32()) memRef_ty2 = T.memref(1, T.i32()) @@ -92,10 +92,10 @@ def sequence(A, F, C): size=trace_size, offset=N_in_bytes, ) - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1]) + npu_sync(column=0, row=0, direction=0, channel=0) try: diff --git a/programming_examples/basic/vector_scalar_mul/run.lit b/programming_examples/basic/vector_scalar_mul/run.lit index ab713ff7bb..a38f82b550 100644 --- a/programming_examples/basic/vector_scalar_mul/run.lit +++ b/programming_examples/basic/vector_scalar_mul/run.lit @@ -5,8 +5,8 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/scale.cc -o ./scale.o // RUN: %python %S/aie2.py > ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DPASSTHROUGH_SIZE=4096 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s -// RUN: %run_on_ipu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/basic/vector_sum/CMakeLists.txt b/programming_examples/basic/vector_sum/CMakeLists.txt index f253b14fb0..5e637b4d7d 100644 --- a/programming_examples/basic/vector_sum/CMakeLists.txt +++ b/programming_examples/basic/vector_sum/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/basic/vector_sum/Makefile b/programming_examples/basic/vector_sum/Makefile index 8c0372f191..e9c2016543 100755 --- a/programming_examples/basic/vector_sum/Makefile +++ b/programming_examples/basic/vector_sum/Makefile @@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/.. SHELL := /bin/bash targetname = vectorSum -devicename = ipu +devicename = npu col = 0 all: build/final.xclbin build/final.xclbin: build/aie.mlir mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt ${Vector sum -Single tile traverses through a vector in memory and returns the sum of each value in the vector. The tile that performs the operation is tile (`col`, 2) and the data is read from and written to external memory through Shim tile (`col`, 0). A buffer in tile (`col`, 2) is used to store the temporary maximum value during processing, which is then pushed through an object FIFO to the Shim tile when processing is complete. This reference design can be run on either a RyzenAI IPU or a VCK5000. The value of `col` is dependent on whether the application is targetting IPU or VCK5000. +Single tile traverses through a vector in memory and returns the sum of each value in the vector. The tile that performs the operation is tile (`col`, 2) and the data is read from and written to external memory through Shim tile (`col`, 0). A buffer in tile (`col`, 2) is used to store the temporary maximum value during processing, which is then pushed through an object FIFO to the Shim tile when processing is complete. This reference design can be run on either a RyzenAI NPU or a VCK5000. The value of `col` is dependent on whether the application is targetting NPU or VCK5000. -To compile and run the design for IPU: +To compile and run the design for NPU: ``` make make run diff --git a/programming_examples/basic/vector_sum/aie2.py b/programming_examples/basic/vector_sum/aie2.py index 4e40b8009c..8073833962 100755 --- a/programming_examples/basic/vector_sum/aie2.py +++ b/programming_examples/basic/vector_sum/aie2.py @@ -26,8 +26,8 @@ def my_vector_sum(): if len(sys.argv) != 3: raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)") - if sys.argv[1] == "ipu": - dev = AIEDevice.ipu + if sys.argv[1] == "npu": + dev = AIEDevice.npu elif sys.argv[1] == "xcvc1902": dev = AIEDevice.xcvc1902 else: @@ -77,9 +77,9 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) def sequence(A, B, C): - ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) - ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/basic/vector_sum/run.lit b/programming_examples/basic/vector_sum/run.lit index a429e99221..1922c01828 100644 --- a/programming_examples/basic/vector_sum/run.lit +++ b/programming_examples/basic/vector_sum/run.lit @@ -3,9 +3,9 @@ // // REQUIRES: ryzen_ai // -// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir -// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python %S/aie2.py npu 0 > ./aie.mlir +// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/lit.cfg.py b/programming_examples/lit.cfg.py index b774bc5280..c28fdec458 100755 --- a/programming_examples/lit.cfg.py +++ b/programming_examples/lit.cfg.py @@ -45,7 +45,7 @@ # for python llvm_config.with_environment("PYTHONPATH", os.path.join(config.aie_obj_root, "python")) -run_on_ipu = "echo" +run_on_npu = "echo" xrt_flags = "" # Not using run_on_board anymore, need more specific per-platform commands @@ -137,8 +137,8 @@ aie_model = m.group(2) print("\tmodel:", aie_model) config.available_features.add("ryzen_ai") - run_on_ipu = ( - f"flock /tmp/ipu.lock {config.aie_src_root}/utils/run_on_ipu.sh" + run_on_npu = ( + f"flock /tmp/npu.lock {config.aie_src_root}/utils/run_on_npu.sh" ) except: print("Failed to run xbutil") @@ -146,7 +146,7 @@ else: print("xrt not found") -config.substitutions.append(("%run_on_ipu", run_on_ipu)) +config.substitutions.append(("%run_on_npu", run_on_npu)) config.substitutions.append(("%xrt_flags", xrt_flags)) config.substitutions.append(("%XRT_DIR", config.xrt_dir)) config.environment["XRT_HACK_UNSECURE_LOADING_XCLBIN"] = "1" diff --git a/programming_examples/makefile-common b/programming_examples/makefile-common index 5ab55c2e08..b5007535b8 100644 --- a/programming_examples/makefile-common +++ b/programming_examples/makefile-common @@ -1,4 +1,4 @@ -# Contains common definitions used across the Makefiles of ipu-xrt tests. +# Contains common definitions used across the Makefiles of npu-xrt tests. REPO_ROOT ?= $(shell realpath $(dir $(shell which aie-opt))/../../..) INSTALL_ROOT ?= $(shell realpath $(dir $(shell which aie-opt))/..) diff --git a/programming_examples/ml/bottleneck/CMakeLists.txt b/programming_examples/ml/bottleneck/CMakeLists.txt index 4b897cb29c..c7db0e9c5c 100644 --- a/programming_examples/ml/bottleneck/CMakeLists.txt +++ b/programming_examples/ml/bottleneck/CMakeLists.txt @@ -25,7 +25,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") diff --git a/programming_examples/ml/bottleneck/Makefile b/programming_examples/ml/bottleneck/Makefile index f5c6e4561f..43aa8e7e87 100755 --- a/programming_examples/ml/bottleneck/Makefile +++ b/programming_examples/ml/bottleneck/Makefile @@ -16,7 +16,7 @@ build/${mlirFileName}.mlir: aie2.py python3 $< > $@ insts.txt: build/${mlirFileName}.mlir - aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $< build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ @@ -28,8 +28,8 @@ build/conv2dk1_skip.o: ../../../aie_kernels/aie2/conv2dk1_skip.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ build/final.xclbin: build/${mlirFileName}.mlir - cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) clean: rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log .xclbin sim \ diff --git a/programming_examples/ml/bottleneck/aie2.py b/programming_examples/ml/bottleneck/aie2.py index a488ae8ded..ac349259f4 100644 --- a/programming_examples/ml/bottleneck/aie2.py +++ b/programming_examples/ml/bottleneck/aie2.py @@ -38,7 +38,7 @@ def bottleneck4AIEs(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def deviceBody(): # define types @@ -543,9 +543,9 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): # EVENTS_CORE_PORT_RUNNING_0 (0x4B) # Trace_Event0 (4 slots) - ipu_write32(0, 4, 0x340E0, 0x4B222125) + npu_write32(0, 4, 0x340E0, 0x4B222125) # Trace_Event1 (4 slots) - ipu_write32(0, 4, 0x340E4, 0x2D2C1A4F) + npu_write32(0, 4, 0x340E4, 0x2D2C1A4F) # Event slots as configured above: # 0: Kernel executes vector instruction @@ -559,13 +559,13 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): # Stream_Switch_Event_Port_Selection_0 # This is necessary to capture the Port_Running_0 and Port_Running_1 events - ipu_write32(0, 4, 0x3FF00, 0x121) + npu_write32(0, 4, 0x3FF00, 0x121) # Trace_Control0: Define trace start and stop triggers. Set start event TRUE. - ipu_write32(0, 4, 0x340D0, 0x10000) + npu_write32(0, 4, 0x340D0, 0x10000) # Start trace copy out. - ipu_writebd_shimtile( + npu_writebd_shimtile( bd_id=3, buffer_length=trace_sz_in_i32s, buffer_offset=acitivationsOutSize32b, @@ -593,45 +593,45 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): use_next_bd=0, valid_bd=1, ) - ipu_write32(0, 2, 0x1D20C, 0x3) + npu_write32(0, 2, 0x1D20C, 0x3) # write RTP parameters - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile2", col=0, row=2, index=0, value=1 ) # scale - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile3", col=0, row=3, index=0, value=1 ) # scale - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile5", col=0, row=5, index=0, value=1 ) # scale - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile4", col=0, row=4, index=0, value=1 ) # scale: conv1x1 with the same scale as the input so we match the scaling factor of output after conv1x1 and the initial input - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile4", col=0, row=4, index=1, value=0 ) # skip_scale - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_act_L3L2", bd_id=0, mem=inputFromL3, sizes=[1, 1, 1, activationsInSize32b], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outOFL2L3", bd_id=2, mem=outputToL3, sizes=[1, 1, 1, acitivationsOutSize32b], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_wts_0_L3L2", bd_id=1, mem=weightsFromL3, sizes=[1, 1, 1, totalWeightsSize32b], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/ml/bottleneck/run.lit b/programming_examples/ml/bottleneck/run.lit index ec30002c97..2a6d25eb25 100644 --- a/programming_examples/ml/bottleneck/run.lit +++ b/programming_examples/ml/bottleneck/run.lit @@ -7,6 +7,6 @@ // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DUINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk3.cc -o conv2dk3.o // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk1_skip.cc -o conv2dk1_skip.o // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir -// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_npu %python %S/test.py | FileCheck %s // CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/conv2d/CMakeLists.txt b/programming_examples/ml/conv2d/CMakeLists.txt index 4b897cb29c..c7db0e9c5c 100644 --- a/programming_examples/ml/conv2d/CMakeLists.txt +++ b/programming_examples/ml/conv2d/CMakeLists.txt @@ -25,7 +25,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") diff --git a/programming_examples/ml/conv2d/Makefile b/programming_examples/ml/conv2d/Makefile index 0274f3fef7..5cb1fab988 100755 --- a/programming_examples/ml/conv2d/Makefile +++ b/programming_examples/ml/conv2d/Makefile @@ -18,14 +18,14 @@ build/${mlirFileName}.mlir: aie2.py insts.txt: build/${mlirFileName}.mlir - aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $< build/conv2dk1_i8.o: ../../../aie_kernels/aie2/conv2dk1_i8.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ build/final.xclbin: build/${mlirFileName}.mlir - cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) clean: rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \ diff --git a/programming_examples/ml/conv2d/aie2.py b/programming_examples/ml/conv2d/aie2.py index 74a2c38838..82584170cf 100644 --- a/programming_examples/ml/conv2d/aie2.py +++ b/programming_examples/ml/conv2d/aie2.py @@ -42,7 +42,7 @@ def conv2dk1(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): actIn_ty = T.memref(actIn, T.i8()) @@ -162,14 +162,14 @@ def sequence(I, W, O): # BB <- Event to start trace capture # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution # Configure so that "Event 1" (always true) causes tracing to start - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340D0, value=0x00010000, ) # 0x340D4: Trace Control 1 - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340D4, @@ -177,7 +177,7 @@ def sequence(I, W, O): ) # 0x340E0: Trace Event Group 1 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340E0, @@ -185,14 +185,14 @@ def sequence(I, W, O): ) # 0x340E4: Trace Event Group 2 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340E4, value=0x2D2C1A4F, ) - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x3FF00, @@ -203,7 +203,7 @@ def sequence(I, W, O): # out to host DDR memory trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory output_size = bufOut - ipu_writebd_shimtile( + npu_writebd_shimtile( bd_id=trace_bd_id, buffer_length=trace_size, buffer_offset=output_size, @@ -232,29 +232,29 @@ def sequence(I, W, O): valid_bd=1, ) # Set start BD to our shim bd_Id (3) - ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) + npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) - IpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=10) + NpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=10) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_act_L3L2", bd_id=0, mem=I, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outOFL2L3", bd_id=2, mem=O, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_wts_0_L3L2", bd_id=2, mem=W, sizes=[1, 1, 1, weightsInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/programming_examples/ml/conv2d/run.lit b/programming_examples/ml/conv2d/run.lit index 1eeef90b94..81e08e5444 100644 --- a/programming_examples/ml/conv2d/run.lit +++ b/programming_examples/ml/conv2d/run.lit @@ -5,6 +5,6 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk1_i8.cc -o conv2dk1_i8.o // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir -// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_npu %python %S/test.py | FileCheck %s // CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt b/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt index 4b897cb29c..c7db0e9c5c 100644 --- a/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt +++ b/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt @@ -25,7 +25,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") diff --git a/programming_examples/ml/conv2d_fused_relu/Makefile b/programming_examples/ml/conv2d_fused_relu/Makefile index 80cb34dc08..6f26a9bf8e 100755 --- a/programming_examples/ml/conv2d_fused_relu/Makefile +++ b/programming_examples/ml/conv2d_fused_relu/Makefile @@ -17,14 +17,14 @@ build/${mlirFileName}.mlir: aie2.py insts.txt: build/${mlirFileName}.mlir - aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $< build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ build/final.xclbin: build/${mlirFileName}.mlir - cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) clean: rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \ diff --git a/programming_examples/ml/conv2d_fused_relu/aie2.py b/programming_examples/ml/conv2d_fused_relu/aie2.py index be0167e3b4..13a59f0934 100644 --- a/programming_examples/ml/conv2d_fused_relu/aie2.py +++ b/programming_examples/ml/conv2d_fused_relu/aie2.py @@ -42,7 +42,7 @@ def conv2dk1(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): actIn_ty = T.memref(actIn, T.i8()) @@ -162,14 +162,14 @@ def sequence(I, W, O): # BB <- Event to start trace capture # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution # Configure so that "Event 1" (always true) causes tracing to start - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340D0, value=0x00010000, ) # 0x340D4: Trace Control 1 - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340D4, @@ -177,7 +177,7 @@ def sequence(I, W, O): ) # 0x340E0: Trace Event Group 1 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340E0, @@ -185,14 +185,14 @@ def sequence(I, W, O): ) # 0x340E4: Trace Event Group 2 (Which events to trace) # 0xAABBCCDD AA, BB, CC, DD <- four event slots - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x340E4, value=0x2D2C1A4F, ) - ipu_write32( + npu_write32( column=compute_tile2_col, row=compute_tile2_row, address=0x3FF00, @@ -203,7 +203,7 @@ def sequence(I, W, O): # out to host DDR memory trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory output_size = bufOut - ipu_writebd_shimtile( + npu_writebd_shimtile( bd_id=trace_bd_id, buffer_length=trace_size, buffer_offset=output_size, @@ -232,29 +232,29 @@ def sequence(I, W, O): valid_bd=1, ) # Set start BD to our shim bd_Id (3) - ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) + npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) - IpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=1) + NpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=1) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_act_L3L2", bd_id=0, mem=I, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outOFL2L3", bd_id=2, mem=O, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_wts_0_L3L2", bd_id=2, mem=W, sizes=[1, 1, 1, weightsInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/programming_examples/ml/conv2d_fused_relu/run.lit b/programming_examples/ml/conv2d_fused_relu/run.lit index 0c122f451e..be7c1c5d08 100644 --- a/programming_examples/ml/conv2d_fused_relu/run.lit +++ b/programming_examples/ml/conv2d_fused_relu/run.lit @@ -5,6 +5,6 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -DINT8_ACT -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1.o // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir -// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_npu %python %S/test.py | FileCheck %s // CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/eltwise_add/CMakeLists.txt b/programming_examples/ml/eltwise_add/CMakeLists.txt index c4ca0825d4..20f5d8a4a3 100644 --- a/programming_examples/ml/eltwise_add/CMakeLists.txt +++ b/programming_examples/ml/eltwise_add/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/ml/eltwise_add/Makefile b/programming_examples/ml/eltwise_add/Makefile index ebaf16c2f9..f685a607f3 100644 --- a/programming_examples/ml/eltwise_add/Makefile +++ b/programming_examples/ml/eltwise_add/Makefile @@ -22,8 +22,8 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/add.o mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt ${ - aiex.ipu.dma_memcpy_nd(0, 0, %out[0, 0, 0, 0][1, 1, 1, %act_out][0, 0, 0]) {id = 2 : i64, metadata = @outOFL2L3} : memref<65536xi32> - aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, 0][1, 1, 1, %total_wts][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_0_L3L2} : memref<53248xi32> - aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts][1, 1, 1, %total_wts_2][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_1_L3L2} : memref<53248xi32> - aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts_3_off][1, 1, 1, %total_wts_3][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_2_L3L2} : memref<53248xi32> + aiex.npu.dma_memcpy_nd(0, 0, %in0[0, 0, 0, 0][1, 1, 1, %act_in][0, 0, 0]) {id = 0 : i64, metadata = @inOF_act_L3L2} : memref<16384xi32> + aiex.npu.dma_memcpy_nd(0, 0, %out[0, 0, 0, 0][1, 1, 1, %act_out][0, 0, 0]) {id = 2 : i64, metadata = @outOFL2L3} : memref<65536xi32> + aiex.npu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, 0][1, 1, 1, %total_wts][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_0_L3L2} : memref<53248xi32> + aiex.npu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts][1, 1, 1, %total_wts_2][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_1_L3L2} : memref<53248xi32> + aiex.npu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts_3_off][1, 1, 1, %total_wts_3][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_2_L3L2} : memref<53248xi32> - aiex.ipu.sync {channel = 0 : i32, column = 1 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.sync {channel = 0 : i32, column = 1 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie2.py b/programming_examples/ml/resnet/layers_conv2_x/aie2.py index 385a4fc7a5..e26f16b549 100755 --- a/programming_examples/ml/resnet/layers_conv2_x/aie2.py +++ b/programming_examples/ml/resnet/layers_conv2_x/aie2.py @@ -38,7 +38,7 @@ def bottleneck4AIEs(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def deviceBody(): # define types @@ -543,9 +543,9 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): # EVENTS_CORE_PORT_RUNNING_0 (0x4B) # Trace_Event0 (4 slots) - ipu_write32(0, 4, 0x340E0, 0x4B222125) + npu_write32(0, 4, 0x340E0, 0x4B222125) # Trace_Event1 (4 slots) - ipu_write32(0, 4, 0x340E4, 0x2D2C1A4F) + npu_write32(0, 4, 0x340E4, 0x2D2C1A4F) # Event slots as configured above: # 0: Kernel executes vector instruction @@ -559,13 +559,13 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): # Stream_Switch_Event_Port_Selection_0 # This is necessary to capture the Port_Running_0 and Port_Running_1 events - ipu_write32(0, 4, 0x3FF00, 0x121) + npu_write32(0, 4, 0x3FF00, 0x121) # Trace_Control0: Define trace start and stop triggers. Set start event TRUE. - ipu_write32(0, 4, 0x340D0, 0x10000) + npu_write32(0, 4, 0x340D0, 0x10000) # Start trace copy out. - ipu_writebd_shimtile( + npu_writebd_shimtile( bd_id=3, buffer_length=trace_sz_in_i32s, buffer_offset=acitivationsOutSize32b, @@ -593,45 +593,45 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): use_next_bd=0, valid_bd=1, ) - ipu_write32(0, 2, 0x1D20C, 0x3) + npu_write32(0, 2, 0x1D20C, 0x3) # write RTP parameters - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile2", col=0, row=2, index=0, value=1 ) # scale - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile3", col=0, row=3, index=0, value=1 ) # scale - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile5", col=0, row=5, index=0, value=1 ) # scale - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile4", col=0, row=4, index=0, value=1 ) # scale: conv1x1 with the same scale as the input so we match the scaling factor of output after conv1x1 and the initial input - IpuWriteRTPOp( + NpuWriteRTPOp( "rtpComputeTile4", col=0, row=4, index=1, value=0 ) # skip_scale - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_act_L3L2", bd_id=0, mem=inputFromL3, sizes=[1, 1, 1, activationsInSize32b], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outOFL2L3", bd_id=2, mem=outputToL3, sizes=[1, 1, 1, acitivationsOutSize32b], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inOF_wts_0_L3L2", bd_id=1, mem=weightsFromL3, sizes=[1, 1, 1, totalWeightsSize32b], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/ml/resnet/layers_conv2_x/run.lit b/programming_examples/ml/resnet/layers_conv2_x/run.lit index 61f43e45e6..a8e86282a6 100755 --- a/programming_examples/ml/resnet/layers_conv2_x/run.lit +++ b/programming_examples/ml/resnet/layers_conv2_x/run.lit @@ -9,6 +9,6 @@ // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DSCALAR -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1_ui8.o // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DSCALAR -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1_skip.cc -o conv2dk1_skip.o // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir -// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_npu %python %S/test.py | FileCheck %s // CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/softmax/CMakeLists.txt b/programming_examples/ml/softmax/CMakeLists.txt index c4ca0825d4..20f5d8a4a3 100644 --- a/programming_examples/ml/softmax/CMakeLists.txt +++ b/programming_examples/ml/softmax/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/ml/softmax/Makefile b/programming_examples/ml/softmax/Makefile index 4f27c07551..9048de8c69 100755 --- a/programming_examples/ml/softmax/Makefile +++ b/programming_examples/ml/softmax/Makefile @@ -37,7 +37,7 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/kernels.a mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/ml/softmax/aie2.py b/programming_examples/ml/softmax/aie2.py index 5672819f7a..29c52bcb60 100755 --- a/programming_examples/ml/softmax/aie2.py +++ b/programming_examples/ml/softmax/aie2.py @@ -32,7 +32,7 @@ def my_eltwise_add(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_ty = T.memref(n, T.bf16()) @@ -109,13 +109,13 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty) def sequence(A, C): - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s] ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/ml/softmax/run.lit b/programming_examples/ml/softmax/run.lit index 54c7ccff98..42441e898a 100644 --- a/programming_examples/ml/softmax/run.lit +++ b/programming_examples/ml/softmax/run.lit @@ -9,7 +9,7 @@ // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/softmax.cc -o softmax.o // RUN: ar rvs kernels.a dut.o lut_based_ops.o softmax.o // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/ml/weight_expand/CMakeLists.txt b/programming_examples/ml/weight_expand/CMakeLists.txt index c4ca0825d4..20f5d8a4a3 100644 --- a/programming_examples/ml/weight_expand/CMakeLists.txt +++ b/programming_examples/ml/weight_expand/CMakeLists.txt @@ -27,7 +27,7 @@ if (NOT WSL) else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif() set(TARGET_NAME test CACHE STRING "Target to be built") diff --git a/programming_examples/ml/weight_expand/Makefile b/programming_examples/ml/weight_expand/Makefile index 641b4902b3..b4967596fb 100755 --- a/programming_examples/ml/weight_expand/Makefile +++ b/programming_examples/ml/weight_expand/Makefile @@ -23,7 +23,7 @@ build/aie.mlir: aie2.py build/final.xclbin: build/aie.mlir build/expand.o mkdir -p ${@D} cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \ - --aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%) + --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/ml/weight_expand/aie2.py b/programming_examples/ml/weight_expand/aie2.py index 3ca1f7aee3..32fe95429f 100755 --- a/programming_examples/ml/weight_expand/aie2.py +++ b/programming_examples/ml/weight_expand/aie2.py @@ -45,7 +45,7 @@ def my_expand(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): memRef_i_ty = T.memref( input_buffer_size_bytes, T.i8() @@ -91,13 +91,13 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, tensor_ty) def sequence(A, C): - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="outB", bd_id=0, mem=C, sizes=[1, 1, 1, B_sz_in_i32s] ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s] ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/utils/README.md b/programming_examples/utils/README.md index 9dc2731012..1d59d46e08 100644 --- a/programming_examples/utils/README.md +++ b/programming_examples/utils/README.md @@ -54,7 +54,7 @@ The parse script create a temporary directory `tmpTrace` performs the following We prepend `0x` before each hex line and save it `prep.` since the `hwfrontend` utility expects it. ### 2. Parse MLIR to build event table -The MLIR parser is pretty rudimentary as it scans the source mlir file looking for `aiex.ipu.write32` calls and does a pattern match for trace unit config address and then grab the hex events, which it looks up from an internal table to provide waveform labels. It would be better to use an MLIR pass that already has the config information and cross reference it with a more official event-to-label lookup table instead. +The MLIR parser is pretty rudimentary as it scans the source mlir file looking for `aiex.npu.write32` calls and does a pattern match for trace unit config address and then grab the hex events, which it looks up from an internal table to provide waveform labels. It would be better to use an MLIR pass that already has the config information and cross reference it with a more official event-to-label lookup table instead. ### 3. Create .target file Create a dummy file (`.target`) in the `tmpTrace` with the file content 'hw' since `hwfrontend` utility expects it. diff --git a/programming_examples/utils/parse_eventIR.py b/programming_examples/utils/parse_eventIR.py index b7c989ca3c..b41ff9c74a 100755 --- a/programming_examples/utils/parse_eventIR.py +++ b/programming_examples/utils/parse_eventIR.py @@ -594,9 +594,9 @@ def parse_mlir_trace_events(lines): # TODO Need to check if this line is commented out, check for // ? (harder to check of /* */) # TODO Need to support value in hex with 0x or decimal - # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}" - # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" - pattern = r"aiex.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" + # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}" + # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" + pattern = r"aiex.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" pid_events = list() for t in range(NumTraceTypes): diff --git a/programming_examples/utils/parse_trace.py b/programming_examples/utils/parse_trace.py index ed45353f31..9d2cd144a6 100755 --- a/programming_examples/utils/parse_trace.py +++ b/programming_examples/utils/parse_trace.py @@ -582,9 +582,9 @@ def parse_mlir_trace_events(lines): # TODO Need to check if this line is commented out, check for // ? (harder to check of /* */) # TODO Need to support value in hex with 0x or decimal - # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}" - # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" - pattern = r"aiex.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" + # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}" + # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" + pattern = r"aiex.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}" pid_events = list() for t in range(NumTraceTypes): diff --git a/programming_examples/vision/color_detect/CMakeLists.txt b/programming_examples/vision/color_detect/CMakeLists.txt index d850efcad5..f743789b61 100644 --- a/programming_examples/vision/color_detect/CMakeLists.txt +++ b/programming_examples/vision/color_detect/CMakeLists.txt @@ -25,7 +25,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(COLORDETECT_WIDTH 1920 CACHE STRING "image width") diff --git a/programming_examples/vision/color_detect/Makefile b/programming_examples/vision/color_detect/Makefile index c8feea4cb6..ffb8ca55d1 100755 --- a/programming_examples/vision/color_detect/Makefile +++ b/programming_examples/vision/color_detect/Makefile @@ -39,8 +39,8 @@ build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir: aie2_colorDetect.py build/final_${COLORDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir build/rgba2hue.cc.o build/threshold.cc.o build/combined_bitwiseOR_gray2rgba_bitwiseAND.a mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) build/${targetname}.exe: test.cpp mkdir -p ${@D} diff --git a/programming_examples/vision/color_detect/README.md b/programming_examples/vision/color_detect/README.md index 33d41a2339..f2f24dbea6 100644 --- a/programming_examples/vision/color_detect/README.md +++ b/programming_examples/vision/color_detect/README.md @@ -12,7 +12,7 @@ The Color Detect pipeline design consists of the following blocks arranged in a pipeline fashion for the detecting of 2 colors in a sequence of images : `rgba2hue`, `threshold`, `threshold`, `bitwiseOR`, `gray2rgba`, `bitwiseAND`. -The pipeline is mapped onto a single column of the ipu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2hue`, and the two `threshold` kernels are each mapped onto one compute tile, while `bitwiseOR`, `gray2rgba` and `bitwiseAND` are mapped together on AIE tile (0, 5). +The pipeline is mapped onto a single column of the npu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2hue`, and the two `threshold` kernels are each mapped onto one compute tile, while `bitwiseOR`, `gray2rgba` and `bitwiseAND` are mapped together on AIE tile (0, 5).

./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DCOLORDETECT_WIDTH=1920 -DCOLORDETECT_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/vision/color_threshold/CMakeLists.txt b/programming_examples/vision/color_threshold/CMakeLists.txt index 040bc74533..f630f55106 100644 --- a/programming_examples/vision/color_threshold/CMakeLists.txt +++ b/programming_examples/vision/color_threshold/CMakeLists.txt @@ -25,7 +25,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(COLORTHRESHOLD_WIDTH 128 CACHE STRING "image width") diff --git a/programming_examples/vision/color_threshold/Makefile b/programming_examples/vision/color_threshold/Makefile index 286f342b08..69958f4c2e 100644 --- a/programming_examples/vision/color_threshold/Makefile +++ b/programming_examples/vision/color_threshold/Makefile @@ -36,8 +36,8 @@ build/aie2_${COLORTHRESHOLD_WIDTH}.mlir: aie2_colorThreshold.py build/final_${COLORTHRESHOLD_WIDTH}.xclbin: build/aie2_${COLORTHRESHOLD_WIDTH}.mlir build/threshold.cc.o mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/vision/color_threshold/README.md b/programming_examples/vision/color_threshold/README.md index fbab6235cc..ad8613544a 100644 --- a/programming_examples/vision/color_threshold/README.md +++ b/programming_examples/vision/color_threshold/README.md @@ -12,7 +12,7 @@ The Color Threshold pipeline design consists of 4 threshold blocks in separate AIE tiles that process a different region of an input image, as shown in the image below. -The pipeline is mapped onto a single column of the ipu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). +The pipeline is mapped onto a single column of the npu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5).

./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DCOLORTHRESHOLD_WIDTH=1920 -DCOLORTHRESHOLD_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/vision/edge_detect/CMakeLists.txt b/programming_examples/vision/edge_detect/CMakeLists.txt index 59fe331831..c0ceb81739 100644 --- a/programming_examples/vision/edge_detect/CMakeLists.txt +++ b/programming_examples/vision/edge_detect/CMakeLists.txt @@ -25,7 +25,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") diff --git a/programming_examples/vision/edge_detect/Makefile b/programming_examples/vision/edge_detect/Makefile index d40e606e63..71c2012432 100755 --- a/programming_examples/vision/edge_detect/Makefile +++ b/programming_examples/vision/edge_detect/Makefile @@ -39,8 +39,8 @@ build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir: aie2_edgeDetect.py build/final_${EDGEDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir build/rgba2gray.cc.o build/gray2rgba.cc.o build/filter2d.cc.o build/threshold.cc.o build/addWeighted.cc.o build/combined_gray2rgba_addWeighted.a mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/vision/edge_detect/README.md b/programming_examples/vision/edge_detect/README.md index 2450f019ec..26f2d4aff8 100644 --- a/programming_examples/vision/edge_detect/README.md +++ b/programming_examples/vision/edge_detect/README.md @@ -12,7 +12,7 @@ The Edge Detect pipeline design consists of the following blocks arranged in a pipeline fashion for the detection of edges in a sequence of images : `rgba2gray`, `filter2D`, `threshold`, `gray2rgba`, `addWeighted`. -The pipeline is mapped onto a single column of the ipu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2gray`, `filter2D` and `threshold` kernels are each mapped onto one compute tile, while `gray2rgba` and `addWeighted` are mapped together on AIE tile (0, 5). +The pipeline is mapped onto a single column of the npu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2gray`, `filter2D` and `threshold` kernels are each mapped onto one compute tile, while `gray2rgba` and `addWeighted` are mapped together on AIE tile (0, 5).

./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DEDGEDETECT_WIDTH=1920 -DEDGEDETECT_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS! diff --git a/programming_examples/vision/vision_passthrough/CMakeLists.txt b/programming_examples/vision/vision_passthrough/CMakeLists.txt index 7ba68b268b..a2bb8ac761 100644 --- a/programming_examples/vision/vision_passthrough/CMakeLists.txt +++ b/programming_examples/vision/vision_passthrough/CMakeLists.txt @@ -28,7 +28,7 @@ else() set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") - set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") endif () set(PASSTHROUGH_WIDTH 1920 CACHE STRING "image width") diff --git a/programming_examples/vision/vision_passthrough/Makefile b/programming_examples/vision/vision_passthrough/Makefile index f07d90fda2..1ae853d942 100644 --- a/programming_examples/vision/vision_passthrough/Makefile +++ b/programming_examples/vision/vision_passthrough/Makefile @@ -32,8 +32,8 @@ build/passThrough.cc.o: passThrough.cc build/final_${PASSTHROUGH_WIDTH}.xclbin: build/aie2_lineBased_8b_${PASSTHROUGH_WIDTH}.mlir build/passThrough.cc.o mkdir -p ${@D} - cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ - --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ + --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) ${targetname}.exe: test.cpp rm -rf _build diff --git a/programming_examples/vision/vision_passthrough/aie2.py b/programming_examples/vision/vision_passthrough/aie2.py index 5422f803d1..920d109cfa 100644 --- a/programming_examples/vision/vision_passthrough/aie2.py +++ b/programming_examples/vision/vision_passthrough/aie2.py @@ -29,7 +29,7 @@ def passThroughAIE2(): with mlir_mod_ctx() as ctx: - @device(AIEDevice.ipu) + @device(AIEDevice.npu) def device_body(): # define types line_ty = T.memref(lineWidthInBytes, T.ui8()) @@ -101,9 +101,9 @@ def sequence(inTensor, notUsed, outTensor): # EVENTS_CORE_PORT_RUNNING_0 (0x4B) # Trace_Event0 (4 slots) - IpuWrite32(0, 2, 0x340E0, 0x4B222125) + NpuWrite32(0, 2, 0x340E0, 0x4B222125) # Trace_Event1 (4 slots) - IpuWrite32(0, 2, 0x340E4, 0x2D2C1A4F) + NpuWrite32(0, 2, 0x340E4, 0x2D2C1A4F) # Event slots as configured above: # 0: Kernel executes vector instruction @@ -117,13 +117,13 @@ def sequence(inTensor, notUsed, outTensor): # Stream_Switch_Event_Port_Selection_0 # This is necessary to capture the Port_Running_0 and Port_Running_1 events - IpuWrite32(0, 2, 0x3FF00, 0x121) + NpuWrite32(0, 2, 0x3FF00, 0x121) # Trace_Control0: Define trace start and stop triggers. Set start event TRUE. - IpuWrite32(0, 2, 0x340D0, 0x10000) + NpuWrite32(0, 2, 0x340D0, 0x10000) # Start trace copy out. - IpuWriteBdShimTile( + NpuWriteBdShimTile( bd_id=3, buffer_length=traceSizeInBytes, buffer_offset=tensorSize, @@ -151,21 +151,21 @@ def sequence(inTensor, notUsed, outTensor): use_next_bd=0, valid_bd=1, ) - IpuWrite32(0, 0, 0x1D20C, 0x3) + NpuWrite32(0, 0, 0x1D20C, 0x3) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="in", bd_id=1, mem=inTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_dma_memcpy_nd( + npu_dma_memcpy_nd( metadata="out", bd_id=0, mem=outTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) + npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir index 3c547e4016..0621e0b622 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir @@ -12,7 +12,7 @@ // AIE tiles, buffers, and communication in an AI Engine design module @passThroughLine_aie2 { - aie.device(ipu) { + aie.device(npu) { // declare kernel external kernel function func.func private @passThroughLine(%in: memref<1920xui8>, %out: memref<1920xui8>, %tilewidth: i32) -> () @@ -53,9 +53,9 @@ module @passThroughLine_aie2 { %tilewidth = arith.constant 480 : i64 // in 32b words so tileWidth/4 //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) - aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<518400xi32> - aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<518400xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<518400xi32> + aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<518400xi32> + aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir index d07ba213c4..c2c31b0d9b 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir @@ -12,7 +12,7 @@ // AIE tiles, buffers, and communication in an AI Engine design module @passThroughLine_aie2 { - aie.device(ipu) { + aie.device(npu) { // declare kernel external kernel function func.func private @passThroughLine(%in: memref<7680xui8>, %out: memref<7680xui8>, %tilewidth: i32) -> () @@ -54,9 +54,9 @@ module @passThroughLine_aie2 { %totalLenRGBA = arith.constant 2073600 : i64 //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) - aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @inOF, id = 1 : i64 } : memref<2073600xi32> - aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @outOF, id = 0 : i64 } : memref<2073600xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @inOF, id = 1 : i64 } : memref<2073600xi32> + aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @outOF, id = 0 : i64 } : memref<2073600xi32> + aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir index 13f7417166..dd66475ca5 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir @@ -12,7 +12,7 @@ // AIE tiles, buffers, and communication in an AI Engine design module @passThroughLine_aie2 { - aie.device(ipu) { + aie.device(npu) { // declare kernel external kernel function func.func private @passThroughLine(%in: memref<512xui8>, %out: memref<512xui8>, %tilewidth: i32) -> () @@ -53,9 +53,9 @@ module @passThroughLine_aie2 { %tilewidth = arith.constant 128 : i64 // in 32b words so tileWidth/4 //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) - aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<1152xi32> - aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<1152xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<1152xi32> + aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<1152xi32> + aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } diff --git a/programming_examples/vision/vision_passthrough/run.lit b/programming_examples/vision/vision_passthrough/run.lit index 5093e3c80c..58f914861c 100644 --- a/programming_examples/vision/vision_passthrough/run.lit +++ b/programming_examples/vision/vision_passthrough/run.lit @@ -5,7 +5,7 @@ // // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/generic/passThrough.cc -o passThrough.cc.o // RUN: %python %S/aie2.py 1920 1080 | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DPASSTHROUGH_WIDTH=1920 -DPASSTHROUGH_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s +// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s // CHECK: PASS!