From ca53aa27d0f287384b7d9b9b8f39702f5a656d10 Mon Sep 17 00:00:00 2001
From: Jeff Fifield <jeff.fifield@amd.com>
Date: Fri, 19 Apr 2024 09:28:54 -0600
Subject: [PATCH] rename ipu->npu in programming_examples

---
 .../basic/log_hello_world/CMakeLists.txt      |  2 +-
 .../basic/log_hello_world/Makefile            |  4 +-
 .../basic/log_hello_world/hello_world.py      | 10 ++--
 .../basic/log_hello_world/run.lit             |  4 +-
 .../basic/matrix_add_one/Makefile             |  6 +-
 .../basic/matrix_add_one/README.md            |  6 +-
 .../basic/matrix_add_one/aie2.py              | 10 ++--
 .../basic/matrix_add_one/run.lit              |  6 +-
 .../matrix_multiplication/CMakeLists.txt      |  2 +-
 .../matrix_multiplication/makefile-common     |  2 +-
 .../matrix_vector/aie2.py                     | 10 ++--
 .../matrix_vector/run.lit                     |  4 +-
 .../matrix_multiplication/single_core/aie2.py | 24 ++++----
 .../matrix_multiplication/single_core/run.lit |  4 +-
 .../matrix_multiplication/whole_array/aie2.py | 10 ++--
 .../matrix_multiplication/whole_array/run.lit |  4 +-
 .../basic/passthrough_dmas/CMakeLists.txt     |  2 +-
 .../basic/passthrough_dmas/Makefile           |  4 +-
 .../basic/passthrough_dmas/aie2.py            | 10 ++--
 .../basic/passthrough_dmas/run.lit            |  6 +-
 .../basic/passthrough_kernel/CMakeLists.txt   |  2 +-
 .../basic/passthrough_kernel/Makefile         |  4 +-
 .../basic/passthrough_kernel/aie2.py          |  8 +--
 .../basic/passthrough_kernel/run.lit          |  6 +-
 .../basic/vector_add/Makefile                 |  6 +-
 .../basic/vector_add/README.md                |  6 +-
 programming_examples/basic/vector_add/aie2.py | 12 ++--
 programming_examples/basic/vector_add/run.lit |  6 +-
 .../basic/vector_exp/CMakeLists.txt           |  2 +-
 .../basic/vector_exp/Makefile                 |  2 +-
 programming_examples/basic/vector_exp/aie2.py |  8 +--
 programming_examples/basic/vector_exp/run.lit |  6 +-
 .../basic/vector_mult/CMakeLists.txt          |  2 +-
 .../basic/vector_mult/Makefile                |  6 +-
 .../basic/vector_mult/README.md               |  6 +-
 .../basic/vector_mult/aie2.py                 | 12 ++--
 .../basic/vector_mult/run.lit                 |  6 +-
 .../basic/vector_reduce_add/CMakeLists.txt    |  2 +-
 .../basic/vector_reduce_add/Makefile          |  4 +-
 .../basic/vector_reduce_add/aie2.py           | 10 ++--
 .../basic/vector_reduce_add/run.lit           |  6 +-
 .../basic/vector_reduce_max/CMakeLists.txt    |  2 +-
 .../basic/vector_reduce_max/Makefile          |  4 +-
 .../basic/vector_reduce_max/aie2.py           | 10 ++--
 .../basic/vector_reduce_max/run.lit           |  6 +-
 .../basic/vector_reduce_min/CMakeLists.txt    |  2 +-
 .../basic/vector_reduce_min/Makefile          |  4 +-
 .../basic/vector_reduce_min/aie2.py           | 10 ++--
 .../basic/vector_reduce_min/run.lit           |  6 +-
 .../basic/vector_scalar_add/CMakeLists.txt    |  2 +-
 .../basic/vector_scalar_add/Makefile          |  4 +-
 .../basic/vector_scalar_add/aie2.py           |  8 +--
 .../basic/vector_scalar_add/run.lit           |  4 +-
 .../basic/vector_scalar_mul/CMakeLists.txt    |  2 +-
 .../basic/vector_scalar_mul/Makefile          |  4 +-
 .../basic/vector_scalar_mul/aie2.py           | 10 ++--
 .../basic/vector_scalar_mul/run.lit           |  6 +-
 .../basic/vector_sum/CMakeLists.txt           |  2 +-
 .../basic/vector_sum/Makefile                 |  6 +-
 .../basic/vector_sum/README.md                |  4 +-
 programming_examples/basic/vector_sum/aie2.py | 10 ++--
 programming_examples/basic/vector_sum/run.lit |  6 +-
 programming_examples/lit.cfg.py               |  8 +--
 programming_examples/makefile-common          |  2 +-
 .../ml/bottleneck/CMakeLists.txt              |  2 +-
 programming_examples/ml/bottleneck/Makefile   |  6 +-
 programming_examples/ml/bottleneck/aie2.py    | 32 +++++-----
 programming_examples/ml/bottleneck/run.lit    |  4 +-
 programming_examples/ml/conv2d/CMakeLists.txt |  2 +-
 programming_examples/ml/conv2d/Makefile       |  6 +-
 programming_examples/ml/conv2d/aie2.py        | 26 ++++-----
 programming_examples/ml/conv2d/run.lit        |  4 +-
 .../ml/conv2d_fused_relu/CMakeLists.txt       |  2 +-
 .../ml/conv2d_fused_relu/Makefile             |  6 +-
 .../ml/conv2d_fused_relu/aie2.py              | 26 ++++-----
 .../ml/conv2d_fused_relu/run.lit              |  4 +-
 .../ml/eltwise_add/CMakeLists.txt             |  2 +-
 programming_examples/ml/eltwise_add/Makefile  |  4 +-
 programming_examples/ml/eltwise_add/aie2.py   | 10 ++--
 programming_examples/ml/eltwise_add/run.lit   |  4 +-
 .../ml/eltwise_mul/CMakeLists.txt             |  2 +-
 programming_examples/ml/eltwise_mul/Makefile  |  4 +-
 programming_examples/ml/eltwise_mul/aie2.py   | 10 ++--
 programming_examples/ml/eltwise_mul/run.lit   |  4 +-
 programming_examples/ml/relu/CMakeLists.txt   |  2 +-
 programming_examples/ml/relu/Makefile         |  4 +-
 programming_examples/ml/relu/aie2.py          | 22 +++----
 programming_examples/ml/relu/run.lit          |  4 +-
 .../ml/resnet/layers_conv2_x/CMakeLists.txt   |  2 +-
 .../ml/resnet/layers_conv2_x/Makefile         |  6 +-
 .../ml/resnet/layers_conv2_x/aie.mlir         | 58 +++++++++----------
 .../ml/resnet/layers_conv2_x/aie2.py          | 32 +++++-----
 .../ml/resnet/layers_conv2_x/run.lit          |  4 +-
 .../ml/softmax/CMakeLists.txt                 |  2 +-
 programming_examples/ml/softmax/Makefile      |  2 +-
 programming_examples/ml/softmax/aie2.py       |  8 +--
 programming_examples/ml/softmax/run.lit       |  4 +-
 .../ml/weight_expand/CMakeLists.txt           |  2 +-
 .../ml/weight_expand/Makefile                 |  2 +-
 programming_examples/ml/weight_expand/aie2.py |  8 +--
 programming_examples/utils/README.md          |  2 +-
 programming_examples/utils/parse_eventIR.py   |  6 +-
 programming_examples/utils/parse_trace.py     |  6 +-
 .../vision/color_detect/CMakeLists.txt        |  2 +-
 .../vision/color_detect/Makefile              |  4 +-
 .../vision/color_detect/README.md             |  2 +-
 .../vision/color_detect/aie2_colorDetect.py   |  8 +--
 .../vision/color_detect/run.lit               |  4 +-
 .../vision/color_threshold/CMakeLists.txt     |  2 +-
 .../vision/color_threshold/Makefile           |  4 +-
 .../vision/color_threshold/README.md          |  2 +-
 .../color_threshold/aie2_colorThreshold.py    | 32 +++++-----
 .../vision/color_threshold/run.lit            |  4 +-
 .../vision/edge_detect/CMakeLists.txt         |  2 +-
 .../vision/edge_detect/Makefile               |  4 +-
 .../vision/edge_detect/README.md              |  2 +-
 .../vision/edge_detect/aie2_edgeDetect.py     |  8 +--
 .../vision/edge_detect/run.lit                |  4 +-
 .../vision/vision_passthrough/CMakeLists.txt  |  2 +-
 .../vision/vision_passthrough/Makefile        |  4 +-
 .../vision/vision_passthrough/aie2.py         | 20 +++----
 .../aie2_lineBased_8b_1080.mlir               |  8 +--
 .../aie2_lineBased_8b_8k.mlir                 |  8 +--
 .../aie2_lineBased_8b_tiny.mlir               |  8 +--
 .../vision/vision_passthrough/run.lit         |  4 +-
 125 files changed, 424 insertions(+), 424 deletions(-)

diff --git a/programming_examples/basic/log_hello_world/CMakeLists.txt b/programming_examples/basic/log_hello_world/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100755
--- a/programming_examples/basic/log_hello_world/CMakeLists.txt
+++ b/programming_examples/basic/log_hello_world/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/log_hello_world/Makefile b/programming_examples/basic/log_hello_world/Makefile
index c5bcd8d5c3..778badcb6a 100755
--- a/programming_examples/basic/log_hello_world/Makefile
+++ b/programming_examples/basic/log_hello_world/Makefile
@@ -22,8 +22,8 @@ build/hello_world.mlir: hello_world.py
 
 build/hello_world.xclbin: build/hello_world.mlir build/kernel.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 hello_world_elfstrings.csv: build/hello_world.xclbin
 	python3 elfStringParser.py --input ./build --output $@
diff --git a/programming_examples/basic/log_hello_world/hello_world.py b/programming_examples/basic/log_hello_world/hello_world.py
index b017d110b7..bc3cbe20fe 100644
--- a/programming_examples/basic/log_hello_world/hello_world.py
+++ b/programming_examples/basic/log_hello_world/hello_world.py
@@ -15,7 +15,7 @@ def printf():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_ty = T.memref(N, T.i32())
 
@@ -47,16 +47,16 @@ def core_body():
             # To/from AIE-array data movement
             @FuncOp.from_py_func(memRef_ty, memRef_ty, memRef_ty)
             def sequence(in_mem, out_mem, logout):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOF", bd_id=0, mem=out_mem, sizes=[1, 1, 1, N]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF", bd_id=1, mem=in_mem, sizes=[1, 1, 1, N]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="logoutOF", bd_id=2, mem=logout, sizes=[1, 1, 1, N]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/log_hello_world/run.lit b/programming_examples/basic/log_hello_world/run.lit
index 096df253c7..0fe0af2ada 100644
--- a/programming_examples/basic/log_hello_world/run.lit
+++ b/programming_examples/basic/log_hello_world/run.lit
@@ -5,10 +5,10 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o
 // RUN: %python %S/hello_world.py > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
 // RUN: %python %S/elfStringParser.py --input . --output elf_string.csv
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -e elf_string.csv | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -e elf_string.csv | FileCheck %s
 // CHECK: Starting kernel execution
 // CHECK: Core Location col=1 row=2
 // CHECK: Completed executing. cycles=
diff --git a/programming_examples/basic/matrix_add_one/Makefile b/programming_examples/basic/matrix_add_one/Makefile
index 435b7b8c9e..83014fbeaf 100644
--- a/programming_examples/basic/matrix_add_one/Makefile
+++ b/programming_examples/basic/matrix_add_one/Makefile
@@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/..
 SHELL := /bin/bash
 
 targetname = matrixAddOne
-devicename = ipu
+devicename = npu
 col = 0
 
 all: build/final.xclbin
 
 build/final.xclbin: build/aie.mlir
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/matrix_add_one/README.md b/programming_examples/basic/matrix_add_one/README.md
index 22afedbfc1..8516bdfe47 100644
--- a/programming_examples/basic/matrix_add_one/README.md
+++ b/programming_examples/basic/matrix_add_one/README.md
@@ -10,11 +10,11 @@
 
 # <ins>Matrix Addition</ins>
 
-Single tile performs a very simple `+` operation where the kernel loads data from local memory, increments the value by `1` and stores it back. The DMA in the Shim tile is programmed to bring the bottom left `8x16` portion of a larger `16x128` matrix into the tile to perform the operation. This reference design can be run on either a RyzenAI IPU or a VCK5000.
+Single tile performs a very simple `+` operation where the kernel loads data from local memory, increments the value by `1` and stores it back. The DMA in the Shim tile is programmed to bring the bottom left `8x16` portion of a larger `16x128` matrix into the tile to perform the operation. This reference design can be run on either a RyzenAI NPU or a VCK5000.
 
-The kernel executes on AIE tile (`col`, 2). Input data is brought to the local memory of the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The Shim tile is programmed with a 2D DMA to only bring a 2D submatrix into the AIE tile for processing. 
+The kernel executes on AIE tile (`col`, 2). Input data is brought to the local memory of the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The Shim tile is programmed with a 2D DMA to only bring a 2D submatrix into the AIE tile for processing. 
 
-To compile and run the design for IPU:
+To compile and run the design for NPU:
 ```
 make
 make run
diff --git a/programming_examples/basic/matrix_add_one/aie2.py b/programming_examples/basic/matrix_add_one/aie2.py
index 36eb3a3d38..a80ba794e6 100644
--- a/programming_examples/basic/matrix_add_one/aie2.py
+++ b/programming_examples/basic/matrix_add_one/aie2.py
@@ -35,8 +35,8 @@ def my_matrix_add_one():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -85,21 +85,21 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(inTensor, notUsed, outTensor):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="out0",
                     bd_id=0,
                     mem=outTensor,
                     sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
                     strides=[1, 1, IMAGE_WIDTH],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="in0",
                     bd_id=1,
                     mem=inTensor,
                     sizes=[1, 1, TILE_HEIGHT, TILE_WIDTH],
                     strides=[1, 1, IMAGE_WIDTH],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/matrix_add_one/run.lit b/programming_examples/basic/matrix_add_one/run.lit
index a429e99221..1922c01828 100644
--- a/programming_examples/basic/matrix_add_one/run.lit
+++ b/programming_examples/basic/matrix_add_one/run.lit
@@ -3,9 +3,9 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/basic/matrix_multiplication/CMakeLists.txt b/programming_examples/basic/matrix_multiplication/CMakeLists.txt
index dfe345e188..0f062b0322 100644
--- a/programming_examples/basic/matrix_multiplication/CMakeLists.txt
+++ b/programming_examples/basic/matrix_multiplication/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/matrix_multiplication/makefile-common b/programming_examples/basic/matrix_multiplication/makefile-common
index fd6a438ea0..6149657e1b 100644
--- a/programming_examples/basic/matrix_multiplication/makefile-common
+++ b/programming_examples/basic/matrix_multiplication/makefile-common
@@ -60,7 +60,7 @@ ${mlir_target}: aie2.py
 ${xclbin_target}: ${mlir_target} ${kernels:%=build/%.o}
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=${insts_target:build/%=%} $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=${insts_target:build/%=%} $(<:%=../%)
 
 ${targetname}.exe: test.cpp ../test.cpp ../common.h
 	rm -rf _build
diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py b/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py
index 80b5c89613..4ac31574fd 100644
--- a/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py
+++ b/programming_examples/basic/matrix_multiplication/matrix_vector/aie2.py
@@ -42,7 +42,7 @@ def my_matmul():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_inA_ty = T.memref(m * k, T.bf16())
             memRef_inB_ty = T.memref(k, T.bf16())
@@ -176,7 +176,7 @@ def core_body():
                 T.memref(C_sz_in_i32s, T.i32()),
             )
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata=inB_fifo_names[0],
                     bd_id=2,
                     mem=B,
@@ -186,7 +186,7 @@ def sequence(A, B, C):
                 for i in range(n_cores):
                     A_offset = i * M_div_m_div_n_cores * m * K * word_size_in // 4
                     C_offset = i * M_div_m_div_n_cores * m * word_size_out // 4
-                    ipu_dma_memcpy_nd(
+                    npu_dma_memcpy_nd(
                         metadata=memA_fifo_names[i],
                         bd_id=1,
                         mem=A,
@@ -194,7 +194,7 @@ def sequence(A, B, C):
                         sizes=[M_div_m_div_n_cores, K_div_k, m, k_in_i32s],
                         strides=[m_x_K_in_i32s, k_in_i32s, K_in_i32s],
                     )
-                    ipu_dma_memcpy_nd(
+                    npu_dma_memcpy_nd(
                         metadata=outC_fifo_names[i],
                         bd_id=0,
                         mem=C,
@@ -204,7 +204,7 @@ def sequence(A, B, C):
                     )
 
                 for i in range(n_cores):
-                    ipu_sync(column=i, row=0, direction=0, channel=0)
+                    npu_sync(column=i, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit b/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit
index d446e4f966..eeaa69352a 100644
--- a/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit
+++ b/programming_examples/basic/matrix_multiplication/matrix_vector/run.lit
@@ -5,8 +5,8 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mv.cc -o ./mv.o
 // RUN: %python %S/aie2.py -M 288 -K 288 -N 1 > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 288 -K 288 -N 1 -v 1 | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/basic/matrix_multiplication/single_core/aie2.py b/programming_examples/basic/matrix_multiplication/single_core/aie2.py
index 46973f90a4..9c43053ae4 100644
--- a/programming_examples/basic/matrix_multiplication/single_core/aie2.py
+++ b/programming_examples/basic/matrix_multiplication/single_core/aie2.py
@@ -54,7 +54,7 @@ def my_matmul():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memref_a_ty = T.memref(m, k, T.bf16())
             memref_b_ty = T.memref(k, n, T.bf16())
@@ -182,14 +182,14 @@ def sequence(A, B, C):
                     #              BB      <- Event to start trace capture
                     #                   C  <- Trace mode, 00=event=time, 01=event-PC, 10=execution
                     # Configure so that "Event 1" (always true) causes tracing to start
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340D0,
                         value=0x00010000,
                     )
                     # 0x340D4: Trace Control 1
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340D4,
@@ -197,7 +197,7 @@ def sequence(A, B, C):
                     )
                     # 0x340E0: Trace Event Group 1  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340E0,
@@ -205,14 +205,14 @@ def sequence(A, B, C):
                     )
                     # 0x340E4: Trace Event Group 2  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340E4,
                         value=0x2D2C1A4F,
                     )
 
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x3FF00,
@@ -223,7 +223,7 @@ def sequence(A, B, C):
                     # out to host DDR memory
                     trace_bd_id = 13  # use BD 13 for writing trace output from compute tile to DDR host memory
                     output_size = C_sz_in_bytes
-                    ipu_writebd_shimtile(
+                    npu_writebd_shimtile(
                         bd_id=trace_bd_id,
                         buffer_length=trace_size,
                         buffer_offset=output_size,
@@ -252,7 +252,7 @@ def sequence(A, B, C):
                         valid_bd=1,
                     )
                     # Set start BD to our shim bd_Id (3)
-                    ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
+                    npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
 
                 # only do 5 tile rows at a time before synchronizing, so we can reuse BDs
                 rows_per_block = 5
@@ -265,7 +265,7 @@ def sequence(A, B, C):
                     num_tile_rows = min(
                         [rows_per_block, M_div_m - tile_row_block * rows_per_block]
                     )
-                    ipu_dma_memcpy_nd(
+                    npu_dma_memcpy_nd(
                         metadata="outC",
                         bd_id=0,
                         mem=C,
@@ -281,7 +281,7 @@ def sequence(A, B, C):
                             * word_size_in
                             // 4
                         )
-                        ipu_dma_memcpy_nd(
+                        npu_dma_memcpy_nd(
                             metadata="inA",
                             bd_id=2 * tile_row + 1,
                             mem=A,
@@ -289,7 +289,7 @@ def sequence(A, B, C):
                             sizes=[N_div_n, K_div_k, m, k_in_i32s],
                             strides=[0, k_in_i32s, K_in_i32s],
                         )
-                        ipu_dma_memcpy_nd(
+                        npu_dma_memcpy_nd(
                             metadata="inB",
                             bd_id=2 * tile_row + 2,
                             mem=B,
@@ -297,7 +297,7 @@ def sequence(A, B, C):
                             strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s],
                         )
 
-                    ipu_sync(column=0, row=0, direction=0, channel=0)
+                    npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/matrix_multiplication/single_core/run.lit b/programming_examples/basic/matrix_multiplication/single_core/run.lit
index 0209415093..6f6a32320a 100644
--- a/programming_examples/basic/matrix_multiplication/single_core/run.lit
+++ b/programming_examples/basic/matrix_multiplication/single_core/run.lit
@@ -5,7 +5,7 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mm.cc -o ./mm.o
 // RUN: %python %S/aie2.py -M 256 -K 256 -N 256 > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 256 -N 256 -v 1 | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -M 256 -K 256 -N 256 -v 1 | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/matrix_multiplication/whole_array/aie2.py b/programming_examples/basic/matrix_multiplication/whole_array/aie2.py
index 69a3c52394..d94a7e8eba 100644
--- a/programming_examples/basic/matrix_multiplication/whole_array/aie2.py
+++ b/programming_examples/basic/matrix_multiplication/whole_array/aie2.py
@@ -74,7 +74,7 @@ def my_matmul(M=512, K=512, N=512):
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_inA_ty = T.memref(m * k, T.bf16())
             memRef_inB_ty = T.memref(k * n, T.bf16())
@@ -317,7 +317,7 @@ def sequence(A, B, C):
                     for i in range(n_cols):
                         C_col_offset = i * n * word_size_out
                         C_offset_in_i32s = (C_col_offset + C_row_offset) // 4
-                        ipu_dma_memcpy_nd(
+                        npu_dma_memcpy_nd(
                             metadata=outC_fifo_names[i],
                             bd_id=0,
                             mem=C,
@@ -345,7 +345,7 @@ def sequence(A, B, C):
                             )
                             A_col_offset_in_i32s = i * m * K * word_size_in // 4
                             B_col_offset_in_i32s = i * n * word_size_in // 4
-                            ipu_dma_memcpy_nd(
+                            npu_dma_memcpy_nd(
                                 metadata=inA_fifo_names[i],
                                 bd_id=2 * tile_row + 1,
                                 mem=A,
@@ -358,7 +358,7 @@ def sequence(A, B, C):
                                 sizes=[N_div_n_div_n_cols, K_div_k, m, k_in_i32s],
                                 strides=[0, k_in_i32s, K_in_i32s],
                             )
-                            ipu_dma_memcpy_nd(
+                            npu_dma_memcpy_nd(
                                 metadata=inB_fifo_names[i],
                                 bd_id=2 * tile_row + 2,
                                 mem=B,
@@ -367,7 +367,7 @@ def sequence(A, B, C):
                                 strides=[n_x_n_cols_in_i32s, k_x_N_in_i32s, N_in_i32s],
                             )
                     for i in range(n_cols):
-                        ipu_sync(column=i, row=0, direction=0, channel=0)
+                        npu_sync(column=i, row=0, direction=0, channel=0)
 
     # print(ctx.module.operation.verify())
     print(ctx.module)
diff --git a/programming_examples/basic/matrix_multiplication/whole_array/run.lit b/programming_examples/basic/matrix_multiplication/whole_array/run.lit
index 202e66b71e..fc23355630 100644
--- a/programming_examples/basic/matrix_multiplication/whole_array/run.lit
+++ b/programming_examples/basic/matrix_multiplication/whole_array/run.lit
@@ -5,8 +5,8 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../../aie_kernels/aie2/mm.cc -o ./mm.o
 // RUN: %python %S/aie2.py -M 512 -K 512 -N 512 > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 -M 512 -K 512 -N 512 | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -v 1 -M 512 -K 512 -N 512 | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/basic/passthrough_dmas/CMakeLists.txt b/programming_examples/basic/passthrough_dmas/CMakeLists.txt
index 3986c4a075..c17d3d365b 100644
--- a/programming_examples/basic/passthrough_dmas/CMakeLists.txt
+++ b/programming_examples/basic/passthrough_dmas/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/passthrough_dmas/Makefile b/programming_examples/basic/passthrough_dmas/Makefile
index 13a8d42aae..e09c8a91aa 100644
--- a/programming_examples/basic/passthrough_dmas/Makefile
+++ b/programming_examples/basic/passthrough_dmas/Makefile
@@ -26,13 +26,13 @@ inst/insts.txt: aie2.py
 	rm -rf inst
 	mkdir -p inst 
 	python3 $< ${devicename} ${col} ${LENGTH} > inst/aie.mlir
-	pushd inst && aiecc.py --aie-only-generate-ipu --ipu-insts-name=insts.txt aie.mlir && popd
+	pushd inst && aiecc.py --aie-only-generate-npu --npu-insts-name=insts.txt aie.mlir && popd
 	${powershell} ./build/${targetname}.exe -x build/final.xclbin -i inst/insts.txt -k MLIR_AIE -l ${LENGTH}
 
 build/final.xclbin: build/aie.mlir
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/passthrough_dmas/aie2.py b/programming_examples/basic/passthrough_dmas/aie2.py
index b59d9332ac..f8dc35a6d9 100755
--- a/programming_examples/basic/passthrough_dmas/aie2.py
+++ b/programming_examples/basic/passthrough_dmas/aie2.py
@@ -23,8 +23,8 @@
 if len(sys.argv) == 4:
     N = int(sys.argv[1])
 
-if sys.argv[1] == "ipu":
-    dev = AIEDevice.ipu
+if sys.argv[1] == "npu":
+    dev = AIEDevice.npu
 elif sys.argv[1] == "xcvc1902":
     dev = AIEDevice.xcvc1902
 else:
@@ -62,9 +62,9 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
-                ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
+                npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/passthrough_dmas/run.lit b/programming_examples/basic/passthrough_dmas/run.lit
index a4f5d568b6..a466533551 100644
--- a/programming_examples/basic/passthrough_dmas/run.lit
+++ b/programming_examples/basic/passthrough_dmas/run.lit
@@ -3,8 +3,8 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -l 4096 | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt -l 4096 | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/passthrough_kernel/CMakeLists.txt b/programming_examples/basic/passthrough_kernel/CMakeLists.txt
index 47375adc84..fddc513396 100644
--- a/programming_examples/basic/passthrough_kernel/CMakeLists.txt
+++ b/programming_examples/basic/passthrough_kernel/CMakeLists.txt
@@ -22,7 +22,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(PASSTHROUGH_SIZE 4096 CACHE STRING "size")
diff --git a/programming_examples/basic/passthrough_kernel/Makefile b/programming_examples/basic/passthrough_kernel/Makefile
index fbfc7580c4..458b992521 100644
--- a/programming_examples/basic/passthrough_kernel/Makefile
+++ b/programming_examples/basic/passthrough_kernel/Makefile
@@ -28,8 +28,8 @@ build/passThrough.cc.o: passThrough.cc
 	
 build/final_${PASSTHROUGH_SIZE}.xclbin: build/aie2_lineBased_8b_${PASSTHROUGH_SIZE}.mlir build/passThrough.cc.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/passthrough_kernel/aie2.py b/programming_examples/basic/passthrough_kernel/aie2.py
index baec4415fa..5b187a7d94 100644
--- a/programming_examples/basic/passthrough_kernel/aie2.py
+++ b/programming_examples/basic/passthrough_kernel/aie2.py
@@ -29,7 +29,7 @@
 
 def passthroughKernel():
 
-    @device(AIEDevice.ipu)
+    @device(AIEDevice.npu)
     def device_body():
         # define types
         memRef_ty = T.memref(lineWidthInBytes, T.ui8())
@@ -87,19 +87,19 @@ def sequence(inTensor, outTensor, notUsed):
                     events=[0x4B, 0x22, 0x21, 0x25, 0x2D, 0x2C, 0x1A, 0x4F],
                 )
 
-            ipu_dma_memcpy_nd(
+            npu_dma_memcpy_nd(
                 metadata="in",
                 bd_id=0,
                 mem=inTensor,
                 sizes=[1, 1, 1, tensorSizeInInt32s],
             )
-            ipu_dma_memcpy_nd(
+            npu_dma_memcpy_nd(
                 metadata="out",
                 bd_id=1,
                 mem=outTensor,
                 sizes=[1, 1, 1, tensorSizeInInt32s],
             )
-            ipu_sync(column=0, row=0, direction=0, channel=0)
+            npu_sync(column=0, row=0, direction=0, channel=0)
 
 
 with mlir_mod_ctx() as ctx:
diff --git a/programming_examples/basic/passthrough_kernel/run.lit b/programming_examples/basic/passthrough_kernel/run.lit
index 30abe48152..7f1c2318b2 100644
--- a/programming_examples/basic/passthrough_kernel/run.lit
+++ b/programming_examples/basic/passthrough_kernel/run.lit
@@ -5,8 +5,8 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/generic/passThrough.cc -o passThrough.cc.o
 // RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DPASSTHROUGH_SIZE=4096 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
-// RUN: %run_on_ipu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/vector_add/Makefile b/programming_examples/basic/vector_add/Makefile
index 9a1a7a2a56..61133a555b 100755
--- a/programming_examples/basic/vector_add/Makefile
+++ b/programming_examples/basic/vector_add/Makefile
@@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/..
 SHELL := /bin/bash
 
 targetname = vectorAdd
-devicename = ipu
+devicename = npu
 col = 0
 
 all: build/final.xclbin
 
 build/final.xclbin: build/aie.mlir
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_add/README.md b/programming_examples/basic/vector_add/README.md
index 34e8d222bc..65cdafefca 100644
--- a/programming_examples/basic/vector_add/README.md
+++ b/programming_examples/basic/vector_add/README.md
@@ -10,11 +10,11 @@
 
 # <ins>Vector Add</ins>
 
-Single tile performs a very simple `+` operations from two vectors loaded into memory. The tile then stores the sum of those two vectors back to external memory. This reference design can be run on either a RyzenAI IPU or a VCK5000. 
+Single tile performs a very simple `+` operations from two vectors loaded into memory. The tile then stores the sum of those two vectors back to external memory. This reference design can be run on either a RyzenAI NPU or a VCK5000. 
 
-The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The AIE tile performs the summation operations and the Shim tile brings the data back out to external memory.
+The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The AIE tile performs the summation operations and the Shim tile brings the data back out to external memory.
 
-To compile and run the design for IPU:
+To compile and run the design for NPU:
 ```
 make
 make run
diff --git a/programming_examples/basic/vector_add/aie2.py b/programming_examples/basic/vector_add/aie2.py
index 6f8ad2d5b6..581729e6ec 100755
--- a/programming_examples/basic/vector_add/aie2.py
+++ b/programming_examples/basic/vector_add/aie2.py
@@ -28,8 +28,8 @@ def my_vector_add():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -79,10 +79,10 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
-                ipu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
+                npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_add/run.lit b/programming_examples/basic/vector_add/run.lit
index a429e99221..1922c01828 100644
--- a/programming_examples/basic/vector_add/run.lit
+++ b/programming_examples/basic/vector_add/run.lit
@@ -3,9 +3,9 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/basic/vector_exp/CMakeLists.txt b/programming_examples/basic/vector_exp/CMakeLists.txt
index 20452d080e..ee2050a94e 100644
--- a/programming_examples/basic/vector_exp/CMakeLists.txt
+++ b/programming_examples/basic/vector_exp/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_exp/Makefile b/programming_examples/basic/vector_exp/Makefile
index 68205484e0..5b471771ba 100644
--- a/programming_examples/basic/vector_exp/Makefile
+++ b/programming_examples/basic/vector_exp/Makefile
@@ -32,7 +32,7 @@ build/aie.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/kernels.a
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_exp/aie2.py b/programming_examples/basic/vector_exp/aie2.py
index 66980ac451..f010fcde9b 100644
--- a/programming_examples/basic/vector_exp/aie2.py
+++ b/programming_examples/basic/vector_exp/aie2.py
@@ -35,7 +35,7 @@ def my_eltwise_exp():
     with mlir_mod_ctx() as ctx:
 
         # Device declaration - aie2 device NPU (aka Ryzen AI)
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
 
             memRef_ty = T.memref(n, T.bf16())
@@ -113,13 +113,13 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty)
             def sequence(A, C):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     # Print the mlir conversion
     print(ctx.module)
diff --git a/programming_examples/basic/vector_exp/run.lit b/programming_examples/basic/vector_exp/run.lit
index f2db79ab6a..247ca37a33 100644
--- a/programming_examples/basic/vector_exp/run.lit
+++ b/programming_examples/basic/vector_exp/run.lit
@@ -6,8 +6,8 @@
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -I %S/../../../aie_runtime_lib/AIE2 -c %S/../../../aie_kernels/aie2/bf16_exp.cc -o exp.o
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -I. -c %S/../../../aie_runtime_lib/AIE2/lut_based_ops.cpp -o lut_based_ops.o
 // RUN: ar rvs kernels.a exp.o lut_based_ops.o
-// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/vector_mult/CMakeLists.txt b/programming_examples/basic/vector_mult/CMakeLists.txt
index 20452d080e..ee2050a94e 100644
--- a/programming_examples/basic/vector_mult/CMakeLists.txt
+++ b/programming_examples/basic/vector_mult/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_mult/Makefile b/programming_examples/basic/vector_mult/Makefile
index 330692f4fb..bc07e3d05b 100755
--- a/programming_examples/basic/vector_mult/Makefile
+++ b/programming_examples/basic/vector_mult/Makefile
@@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/..
 SHELL := /bin/bash
 
 targetname = vectorMult
-devicename = ipu
+devicename = npu
 col = 0
 
 all: build/final.xclbin
 
 build/final.xclbin: build/aie.mlir
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_mult/README.md b/programming_examples/basic/vector_mult/README.md
index 825b33d3cd..3abe2b9999 100644
--- a/programming_examples/basic/vector_mult/README.md
+++ b/programming_examples/basic/vector_mult/README.md
@@ -10,11 +10,11 @@
 
 # <ins>Vector Multiplication</ins>
 
-Single tile performs a very simple `*` operations from two vectors loaded into memory. The tile then stores the element wise multiplication of those two vectors back to external memory. This reference design can be run on either a RyzenAI IPU or a VCK5000.
+Single tile performs a very simple `*` operations from two vectors loaded into memory. The tile then stores the element wise multiplication of those two vectors back to external memory. This reference design can be run on either a RyzenAI NPU or a VCK5000.
 
-The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting IPU or VCK5000. The AIE tile performs the multiplication operations and the Shim tile brings the data back out to external memory.
+The kernel executes on AIE tile (`col`, 2). Both input vectors are brought into the tile from Shim tile (`col`, 0). The value of `col` is dependent on whether the application is targetting NPU or VCK5000. The AIE tile performs the multiplication operations and the Shim tile brings the data back out to external memory.
 
-To compile and run the design for IPU:
+To compile and run the design for NPU:
 ```
 make
 make run
diff --git a/programming_examples/basic/vector_mult/aie2.py b/programming_examples/basic/vector_mult/aie2.py
index 5a36f85a33..209f5243bb 100755
--- a/programming_examples/basic/vector_mult/aie2.py
+++ b/programming_examples/basic/vector_mult/aie2.py
@@ -28,8 +28,8 @@ def my_vector_add():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -79,10 +79,10 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
-                ipu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
+                npu_dma_memcpy_nd(metadata="in1", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_dma_memcpy_nd(metadata="in2", bd_id=2, mem=B, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_mult/run.lit b/programming_examples/basic/vector_mult/run.lit
index a429e99221..1922c01828 100644
--- a/programming_examples/basic/vector_mult/run.lit
+++ b/programming_examples/basic/vector_mult/run.lit
@@ -3,9 +3,9 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/basic/vector_reduce_add/CMakeLists.txt b/programming_examples/basic/vector_reduce_add/CMakeLists.txt
index 9ae325a430..024b4cfd54 100644
--- a/programming_examples/basic/vector_reduce_add/CMakeLists.txt
+++ b/programming_examples/basic/vector_reduce_add/CMakeLists.txt
@@ -22,7 +22,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_reduce_add/Makefile b/programming_examples/basic/vector_reduce_add/Makefile
index 37ca25abec..b0f8eebe0c 100644
--- a/programming_examples/basic/vector_reduce_add/Makefile
+++ b/programming_examples/basic/vector_reduce_add/Makefile
@@ -11,7 +11,7 @@ include ../../makefile-common
 ACDC_AIE = $(dir $(shell which aie-opt))/..
 
 targetname = vector_max
-devicename = ipu
+devicename = npu
 col = 0
 CHESS_FLAGS=${CHESSCCWRAP2_FLAGS}
 KERNEL_LIB=${ACDC_AIE}/../../aie_kernels/aie2/
@@ -29,7 +29,7 @@ build/aie.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/i32_add_reduce.o
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_reduce_add/aie2.py b/programming_examples/basic/vector_reduce_add/aie2.py
index 098de83380..b8b8ff5c59 100644
--- a/programming_examples/basic/vector_reduce_add/aie2.py
+++ b/programming_examples/basic/vector_reduce_add/aie2.py
@@ -26,8 +26,8 @@ def my_reduce_add():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -71,9 +71,9 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty)
             def sequence(A, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
-                ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
+                npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_reduce_add/run.lit b/programming_examples/basic/vector_reduce_add/run.lit
index 192380beb0..37c0544b6d 100644
--- a/programming_examples/basic/vector_reduce_add/run.lit
+++ b/programming_examples/basic/vector_reduce_add/run.lit
@@ -3,9 +3,9 @@
 //
 // REQUIRES: ryzen_ai, chess
 //
-// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_add.cc -o reduce_add.cc.o
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/vector_reduce_max/CMakeLists.txt b/programming_examples/basic/vector_reduce_max/CMakeLists.txt
index 9ae325a430..024b4cfd54 100644
--- a/programming_examples/basic/vector_reduce_max/CMakeLists.txt
+++ b/programming_examples/basic/vector_reduce_max/CMakeLists.txt
@@ -22,7 +22,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_reduce_max/Makefile b/programming_examples/basic/vector_reduce_max/Makefile
index 55a013704d..5e47d478b2 100755
--- a/programming_examples/basic/vector_reduce_max/Makefile
+++ b/programming_examples/basic/vector_reduce_max/Makefile
@@ -11,7 +11,7 @@ include ../../makefile-common
 ACDC_AIE = $(dir $(shell which aie-opt))/..
 
 targetname = reduce_max
-devicename = ipu
+devicename = npu
 col = 0
 CHESS_FLAGS=${CHESSCCWRAP2_FLAGS}
 KERNEL_LIB=../../../aie_kernels/aie2
@@ -29,7 +29,7 @@ build/aie.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/reduce_max.cc.o
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_reduce_max/aie2.py b/programming_examples/basic/vector_reduce_max/aie2.py
index 5fc3e39d72..9ef47de0bf 100755
--- a/programming_examples/basic/vector_reduce_max/aie2.py
+++ b/programming_examples/basic/vector_reduce_max/aie2.py
@@ -26,8 +26,8 @@ def my_reduce_max():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -71,9 +71,9 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty)
             def sequence(A, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
-                ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
+                npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_reduce_max/run.lit b/programming_examples/basic/vector_reduce_max/run.lit
index 6c3233183c..584d7c1628 100644
--- a/programming_examples/basic/vector_reduce_max/run.lit
+++ b/programming_examples/basic/vector_reduce_max/run.lit
@@ -4,8 +4,8 @@
 // REQUIRES: ryzen_ai, chess
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_max.cc -o reduce_max.cc.o
-// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/vector_reduce_min/CMakeLists.txt b/programming_examples/basic/vector_reduce_min/CMakeLists.txt
index 76d48dfe36..820bc8059d 100644
--- a/programming_examples/basic/vector_reduce_min/CMakeLists.txt
+++ b/programming_examples/basic/vector_reduce_min/CMakeLists.txt
@@ -22,7 +22,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_reduce_min/Makefile b/programming_examples/basic/vector_reduce_min/Makefile
index 177213e22a..b4321855e1 100755
--- a/programming_examples/basic/vector_reduce_min/Makefile
+++ b/programming_examples/basic/vector_reduce_min/Makefile
@@ -11,7 +11,7 @@ include ../../makefile-common
 ACDC_AIE = $(dir $(shell which aie-opt))/..
 
 targetname = reduce_min
-devicename = ipu
+devicename = npu
 col = 0
 CHESS_FLAGS=${CHESSCCWRAP2_FLAGS}
 KERNEL_LIB=../../../aie_kernels/aie2
@@ -29,7 +29,7 @@ build/aie.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/reduce_min.cc.o
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_reduce_min/aie2.py b/programming_examples/basic/vector_reduce_min/aie2.py
index 35bb351fee..eafca4a57f 100755
--- a/programming_examples/basic/vector_reduce_min/aie2.py
+++ b/programming_examples/basic/vector_reduce_min/aie2.py
@@ -26,8 +26,8 @@ def my_reduce_min():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -71,9 +71,9 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty)
             def sequence(A, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
-                ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
+                npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_reduce_min/run.lit b/programming_examples/basic/vector_reduce_min/run.lit
index 95ecbd533a..710a9a02cd 100644
--- a/programming_examples/basic/vector_reduce_min/run.lit
+++ b/programming_examples/basic/vector_reduce_min/run.lit
@@ -4,8 +4,8 @@
 // REQUIRES: ryzen_ai, chess
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_min.cc -o reduce_min.cc.o
-// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/vector_scalar_add/CMakeLists.txt b/programming_examples/basic/vector_scalar_add/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/basic/vector_scalar_add/CMakeLists.txt
+++ b/programming_examples/basic/vector_scalar_add/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_scalar_add/Makefile b/programming_examples/basic/vector_scalar_add/Makefile
index 4ad8553675..463b63532b 100644
--- a/programming_examples/basic/vector_scalar_add/Makefile
+++ b/programming_examples/basic/vector_scalar_add/Makefile
@@ -18,8 +18,8 @@ build/aie.mlir: aie2.py
 
 build/final.xclbin: build/aie.mlir
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_scalar_add/aie2.py b/programming_examples/basic/vector_scalar_add/aie2.py
index 7c99acd401..ef36a8a891 100644
--- a/programming_examples/basic/vector_scalar_add/aie2.py
+++ b/programming_examples/basic/vector_scalar_add/aie2.py
@@ -15,7 +15,7 @@
 def my_vector_bias_add():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_16_ty = T.memref(16, T.i32())
             memRef_8_ty = T.memref(8, T.i32())
@@ -61,13 +61,13 @@ def core_body():
 
             @FuncOp.from_py_func(memRef_64_ty, memRef_32_ty, memRef_64_ty)
             def sequence(inTensor, notUsed, outTensor):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="out0", bd_id=0, mem=outTensor, sizes=[1, 1, 1, 64]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 64]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_scalar_add/run.lit b/programming_examples/basic/vector_scalar_add/run.lit
index 49cd75e360..82fc93e501 100644
--- a/programming_examples/basic/vector_scalar_add/run.lit
+++ b/programming_examples/basic/vector_scalar_add/run.lit
@@ -4,8 +4,8 @@
 // REQUIRES: ryzen_ai
 //
 // RUN: %python %S/aie2.py > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/basic/vector_scalar_mul/CMakeLists.txt b/programming_examples/basic/vector_scalar_mul/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/basic/vector_scalar_mul/CMakeLists.txt
+++ b/programming_examples/basic/vector_scalar_mul/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_scalar_mul/Makefile b/programming_examples/basic/vector_scalar_mul/Makefile
index c6f18a71b3..e93b53da4c 100755
--- a/programming_examples/basic/vector_scalar_mul/Makefile
+++ b/programming_examples/basic/vector_scalar_mul/Makefile
@@ -36,12 +36,12 @@ build/aie_trace.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/scale.o
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 build/final_trace.xclbin: build/aie_trace.mlir build/scale.o
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_scalar_mul/aie2.py b/programming_examples/basic/vector_scalar_mul/aie2.py
index bf85beae56..caed881c92 100755
--- a/programming_examples/basic/vector_scalar_mul/aie2.py
+++ b/programming_examples/basic/vector_scalar_mul/aie2.py
@@ -25,7 +25,7 @@ def my_vector_scalar(trace_size):
 
     vectorized = True
 
-    @device(AIEDevice.ipu)
+    @device(AIEDevice.npu)
     def device_body():
         memRef_ty = T.memref(n, T.i32())
         memRef_ty2 = T.memref(1, T.i32())
@@ -92,10 +92,10 @@ def sequence(A, F, C):
                     size=trace_size,
                     offset=N_in_bytes,
                 )
-            ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
-            ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-            ipu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1])
-            ipu_sync(column=0, row=0, direction=0, channel=0)
+            npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N])
+            npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+            npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1])
+            npu_sync(column=0, row=0, direction=0, channel=0)
 
 
 try:
diff --git a/programming_examples/basic/vector_scalar_mul/run.lit b/programming_examples/basic/vector_scalar_mul/run.lit
index ab713ff7bb..a38f82b550 100644
--- a/programming_examples/basic/vector_scalar_mul/run.lit
+++ b/programming_examples/basic/vector_scalar_mul/run.lit
@@ -5,8 +5,8 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/scale.cc -o ./scale.o
 // RUN: %python %S/aie2.py > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DPASSTHROUGH_SIZE=4096 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
-// RUN: %run_on_ipu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE -s 4096 | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/basic/vector_sum/CMakeLists.txt b/programming_examples/basic/vector_sum/CMakeLists.txt
index f253b14fb0..5e637b4d7d 100644
--- a/programming_examples/basic/vector_sum/CMakeLists.txt
+++ b/programming_examples/basic/vector_sum/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/basic/vector_sum/Makefile b/programming_examples/basic/vector_sum/Makefile
index 8c0372f191..e9c2016543 100755
--- a/programming_examples/basic/vector_sum/Makefile
+++ b/programming_examples/basic/vector_sum/Makefile
@@ -13,15 +13,15 @@ ACDC_AIE = $(dir $(shell which aie-opt))/..
 SHELL := /bin/bash
 
 targetname = vectorSum
-devicename = ipu
+devicename = npu
 col = 0
 
 all: build/final.xclbin
 
 build/final.xclbin: build/aie.mlir
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/basic/vector_sum/README.md b/programming_examples/basic/vector_sum/README.md
index bb591ea622..60d9715528 100644
--- a/programming_examples/basic/vector_sum/README.md
+++ b/programming_examples/basic/vector_sum/README.md
@@ -10,9 +10,9 @@
 
 # <ins>Vector sum</ins>
 
-Single tile traverses through a vector in memory and returns the sum of each value in the vector. The tile that performs the operation is tile (`col`, 2) and the data is read from and written to external memory through Shim tile (`col`, 0). A buffer in tile (`col`, 2) is used to store the temporary maximum value during processing, which is then pushed through an object FIFO to the Shim tile when processing is complete. This reference design can be run on either a RyzenAI IPU or a VCK5000. The value of `col` is dependent on whether the application is targetting IPU or VCK5000.
+Single tile traverses through a vector in memory and returns the sum of each value in the vector. The tile that performs the operation is tile (`col`, 2) and the data is read from and written to external memory through Shim tile (`col`, 0). A buffer in tile (`col`, 2) is used to store the temporary maximum value during processing, which is then pushed through an object FIFO to the Shim tile when processing is complete. This reference design can be run on either a RyzenAI NPU or a VCK5000. The value of `col` is dependent on whether the application is targetting NPU or VCK5000.
 
-To compile and run the design for IPU:
+To compile and run the design for NPU:
 ```
 make
 make run
diff --git a/programming_examples/basic/vector_sum/aie2.py b/programming_examples/basic/vector_sum/aie2.py
index 4e40b8009c..8073833962 100755
--- a/programming_examples/basic/vector_sum/aie2.py
+++ b/programming_examples/basic/vector_sum/aie2.py
@@ -26,8 +26,8 @@ def my_vector_sum():
         if len(sys.argv) != 3:
             raise ValueError("[ERROR] Need 2 command line arguments (Device name, Col)")
 
-        if sys.argv[1] == "ipu":
-            dev = AIEDevice.ipu
+        if sys.argv[1] == "npu":
+            dev = AIEDevice.npu
         elif sys.argv[1] == "xcvc1902":
             dev = AIEDevice.xcvc1902
         else:
@@ -77,9 +77,9 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
-                ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 1])
+                npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N])
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/basic/vector_sum/run.lit b/programming_examples/basic/vector_sum/run.lit
index a429e99221..1922c01828 100644
--- a/programming_examples/basic/vector_sum/run.lit
+++ b/programming_examples/basic/vector_sum/run.lit
@@ -3,9 +3,9 @@
 //
 // REQUIRES: ryzen_ai
 //
-// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
-// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python %S/aie2.py npu 0 > ./aie.mlir
+// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/lit.cfg.py b/programming_examples/lit.cfg.py
index b774bc5280..c28fdec458 100755
--- a/programming_examples/lit.cfg.py
+++ b/programming_examples/lit.cfg.py
@@ -45,7 +45,7 @@
 # for python
 llvm_config.with_environment("PYTHONPATH", os.path.join(config.aie_obj_root, "python"))
 
-run_on_ipu = "echo"
+run_on_npu = "echo"
 xrt_flags = ""
 
 # Not using run_on_board anymore, need more specific per-platform commands
@@ -137,8 +137,8 @@
                     aie_model = m.group(2)
                     print("\tmodel:", aie_model)
                 config.available_features.add("ryzen_ai")
-                run_on_ipu = (
-                    f"flock /tmp/ipu.lock {config.aie_src_root}/utils/run_on_ipu.sh"
+                run_on_npu = (
+                    f"flock /tmp/npu.lock {config.aie_src_root}/utils/run_on_npu.sh"
                 )
     except:
         print("Failed to run xbutil")
@@ -146,7 +146,7 @@
 else:
     print("xrt not found")
 
-config.substitutions.append(("%run_on_ipu", run_on_ipu))
+config.substitutions.append(("%run_on_npu", run_on_npu))
 config.substitutions.append(("%xrt_flags", xrt_flags))
 config.substitutions.append(("%XRT_DIR", config.xrt_dir))
 config.environment["XRT_HACK_UNSECURE_LOADING_XCLBIN"] = "1"
diff --git a/programming_examples/makefile-common b/programming_examples/makefile-common
index 5ab55c2e08..b5007535b8 100644
--- a/programming_examples/makefile-common
+++ b/programming_examples/makefile-common
@@ -1,4 +1,4 @@
-# Contains common definitions used across the Makefiles of ipu-xrt tests.
+# Contains common definitions used across the Makefiles of npu-xrt tests.
 REPO_ROOT ?= $(shell realpath $(dir $(shell which aie-opt))/../../..)
 INSTALL_ROOT ?= $(shell realpath $(dir $(shell which aie-opt))/..)
 
diff --git a/programming_examples/ml/bottleneck/CMakeLists.txt b/programming_examples/ml/bottleneck/CMakeLists.txt
index 4b897cb29c..c7db0e9c5c 100644
--- a/programming_examples/ml/bottleneck/CMakeLists.txt
+++ b/programming_examples/ml/bottleneck/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/ml/bottleneck/Makefile b/programming_examples/ml/bottleneck/Makefile
index f5c6e4561f..43aa8e7e87 100755
--- a/programming_examples/ml/bottleneck/Makefile
+++ b/programming_examples/ml/bottleneck/Makefile
@@ -16,7 +16,7 @@ build/${mlirFileName}.mlir: aie2.py
 	python3 $< > $@
 
 insts.txt: build/${mlirFileName}.mlir
-	aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $<
+	aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $<
 
 build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc
 	xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@
@@ -28,8 +28,8 @@ build/conv2dk1_skip.o: ../../../aie_kernels/aie2/conv2dk1_skip.cc
 	xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@
 
 build/final.xclbin: build/${mlirFileName}.mlir 
-	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 clean:
 	rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log .xclbin sim \
diff --git a/programming_examples/ml/bottleneck/aie2.py b/programming_examples/ml/bottleneck/aie2.py
index a488ae8ded..ac349259f4 100644
--- a/programming_examples/ml/bottleneck/aie2.py
+++ b/programming_examples/ml/bottleneck/aie2.py
@@ -38,7 +38,7 @@
 def bottleneck4AIEs():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def deviceBody():
 
             # define types
@@ -543,9 +543,9 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
                     # EVENTS_CORE_PORT_RUNNING_0 (0x4B)
 
                     # Trace_Event0  (4 slots)
-                    ipu_write32(0, 4, 0x340E0, 0x4B222125)
+                    npu_write32(0, 4, 0x340E0, 0x4B222125)
                     # Trace_Event1  (4 slots)
-                    ipu_write32(0, 4, 0x340E4, 0x2D2C1A4F)
+                    npu_write32(0, 4, 0x340E4, 0x2D2C1A4F)
 
                     # Event slots as configured above:
                     # 0: Kernel executes vector instruction
@@ -559,13 +559,13 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
 
                     # Stream_Switch_Event_Port_Selection_0
                     # This is necessary to capture the Port_Running_0 and Port_Running_1 events
-                    ipu_write32(0, 4, 0x3FF00, 0x121)
+                    npu_write32(0, 4, 0x3FF00, 0x121)
 
                     # Trace_Control0: Define trace start and stop triggers. Set start event TRUE.
-                    ipu_write32(0, 4, 0x340D0, 0x10000)
+                    npu_write32(0, 4, 0x340D0, 0x10000)
 
                     # Start trace copy out.
-                    ipu_writebd_shimtile(
+                    npu_writebd_shimtile(
                         bd_id=3,
                         buffer_length=trace_sz_in_i32s,
                         buffer_offset=acitivationsOutSize32b,
@@ -593,45 +593,45 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
                         use_next_bd=0,
                         valid_bd=1,
                     )
-                    ipu_write32(0, 2, 0x1D20C, 0x3)
+                    npu_write32(0, 2, 0x1D20C, 0x3)
 
                 # write RTP parameters
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile2", col=0, row=2, index=0, value=1
                 )  # scale
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile3", col=0, row=3, index=0, value=1
                 )  # scale
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile5", col=0, row=5, index=0, value=1
                 )  # scale
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile4", col=0, row=4, index=0, value=1
                 )  # scale: conv1x1 with the same scale as the input so we match the scaling factor of output after conv1x1 and the initial input
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile4", col=0, row=4, index=1, value=0
                 )  # skip_scale
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_act_L3L2",
                     bd_id=0,
                     mem=inputFromL3,
                     sizes=[1, 1, 1, activationsInSize32b],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOFL2L3",
                     bd_id=2,
                     mem=outputToL3,
                     sizes=[1, 1, 1, acitivationsOutSize32b],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_wts_0_L3L2",
                     bd_id=1,
                     mem=weightsFromL3,
                     sizes=[1, 1, 1, totalWeightsSize32b],
                 )
 
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/ml/bottleneck/run.lit b/programming_examples/ml/bottleneck/run.lit
index ec30002c97..2a6d25eb25 100644
--- a/programming_examples/ml/bottleneck/run.lit
+++ b/programming_examples/ml/bottleneck/run.lit
@@ -7,6 +7,6 @@
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DUINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk3.cc -o conv2dk3.o
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk1_skip.cc -o conv2dk1_skip.o
 // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
-// RUN: %run_on_ipu %python %S/test.py | FileCheck %s
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
+// RUN: %run_on_npu %python %S/test.py | FileCheck %s
 // CHECK: PASS!
\ No newline at end of file
diff --git a/programming_examples/ml/conv2d/CMakeLists.txt b/programming_examples/ml/conv2d/CMakeLists.txt
index 4b897cb29c..c7db0e9c5c 100644
--- a/programming_examples/ml/conv2d/CMakeLists.txt
+++ b/programming_examples/ml/conv2d/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/ml/conv2d/Makefile b/programming_examples/ml/conv2d/Makefile
index 0274f3fef7..5cb1fab988 100755
--- a/programming_examples/ml/conv2d/Makefile
+++ b/programming_examples/ml/conv2d/Makefile
@@ -18,14 +18,14 @@ build/${mlirFileName}.mlir: aie2.py
 
 
 insts.txt: build/${mlirFileName}.mlir
-	aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $<
+	aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $<
 
 build/conv2dk1_i8.o: ../../../aie_kernels/aie2/conv2dk1_i8.cc
 	xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@
 
 build/final.xclbin: build/${mlirFileName}.mlir 
-	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 clean:
 	rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \
diff --git a/programming_examples/ml/conv2d/aie2.py b/programming_examples/ml/conv2d/aie2.py
index 74a2c38838..82584170cf 100644
--- a/programming_examples/ml/conv2d/aie2.py
+++ b/programming_examples/ml/conv2d/aie2.py
@@ -42,7 +42,7 @@
 def conv2dk1():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
 
             actIn_ty = T.memref(actIn, T.i8())
@@ -162,14 +162,14 @@ def sequence(I, W, O):
                     #              BB      <- Event to start trace capture
                     #                   C  <- Trace mode, 00=event=time, 01=event-PC, 10=execution
                     # Configure so that "Event 1" (always true) causes tracing to start
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340D0,
                         value=0x00010000,
                     )
                     # 0x340D4: Trace Control 1
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340D4,
@@ -177,7 +177,7 @@ def sequence(I, W, O):
                     )
                     # 0x340E0: Trace Event Group 1  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340E0,
@@ -185,14 +185,14 @@ def sequence(I, W, O):
                     )
                     # 0x340E4: Trace Event Group 2  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340E4,
                         value=0x2D2C1A4F,
                     )
 
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x3FF00,
@@ -203,7 +203,7 @@ def sequence(I, W, O):
                     # out to host DDR memory
                     trace_bd_id = 13  # use BD 13 for writing trace output from compute tile to DDR host memory
                     output_size = bufOut
-                    ipu_writebd_shimtile(
+                    npu_writebd_shimtile(
                         bd_id=trace_bd_id,
                         buffer_length=trace_size,
                         buffer_offset=output_size,
@@ -232,29 +232,29 @@ def sequence(I, W, O):
                         valid_bd=1,
                     )
                     # Set start BD to our shim bd_Id (3)
-                    ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
+                    npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
 
-                IpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=10)
+                NpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=10)
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_act_L3L2",
                     bd_id=0,
                     mem=I,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOFL2L3",
                     bd_id=2,
                     mem=O,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_wts_0_L3L2",
                     bd_id=2,
                     mem=W,
                     sizes=[1, 1, 1, weightsInInt32s],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     #    print(ctx.module.operation.verify())
     print(ctx.module)
diff --git a/programming_examples/ml/conv2d/run.lit b/programming_examples/ml/conv2d/run.lit
index 1eeef90b94..81e08e5444 100644
--- a/programming_examples/ml/conv2d/run.lit
+++ b/programming_examples/ml/conv2d/run.lit
@@ -5,6 +5,6 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8  -DINT8_ACT  -c %S/../../../aie_kernels/aie2/conv2dk1_i8.cc -o conv2dk1_i8.o
 // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
-// RUN: %run_on_ipu %python %S/test.py | FileCheck %s
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
+// RUN: %run_on_npu %python %S/test.py | FileCheck %s
 // CHECK: PASS!
\ No newline at end of file
diff --git a/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt b/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt
index 4b897cb29c..c7db0e9c5c 100644
--- a/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt
+++ b/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/ml/conv2d_fused_relu/Makefile b/programming_examples/ml/conv2d_fused_relu/Makefile
index 80cb34dc08..6f26a9bf8e 100755
--- a/programming_examples/ml/conv2d_fused_relu/Makefile
+++ b/programming_examples/ml/conv2d_fused_relu/Makefile
@@ -17,14 +17,14 @@ build/${mlirFileName}.mlir: aie2.py
 
 
 insts.txt: build/${mlirFileName}.mlir
-	aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $<
+	aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $<
 
 build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc
 	xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@
 
 build/final.xclbin: build/${mlirFileName}.mlir 
-	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 clean:
 	rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \
diff --git a/programming_examples/ml/conv2d_fused_relu/aie2.py b/programming_examples/ml/conv2d_fused_relu/aie2.py
index be0167e3b4..13a59f0934 100644
--- a/programming_examples/ml/conv2d_fused_relu/aie2.py
+++ b/programming_examples/ml/conv2d_fused_relu/aie2.py
@@ -42,7 +42,7 @@
 def conv2dk1():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
 
             actIn_ty = T.memref(actIn, T.i8())
@@ -162,14 +162,14 @@ def sequence(I, W, O):
                     #              BB      <- Event to start trace capture
                     #                   C  <- Trace mode, 00=event=time, 01=event-PC, 10=execution
                     # Configure so that "Event 1" (always true) causes tracing to start
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340D0,
                         value=0x00010000,
                     )
                     # 0x340D4: Trace Control 1
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340D4,
@@ -177,7 +177,7 @@ def sequence(I, W, O):
                     )
                     # 0x340E0: Trace Event Group 1  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340E0,
@@ -185,14 +185,14 @@ def sequence(I, W, O):
                     )
                     # 0x340E4: Trace Event Group 2  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x340E4,
                         value=0x2D2C1A4F,
                     )
 
-                    ipu_write32(
+                    npu_write32(
                         column=compute_tile2_col,
                         row=compute_tile2_row,
                         address=0x3FF00,
@@ -203,7 +203,7 @@ def sequence(I, W, O):
                     # out to host DDR memory
                     trace_bd_id = 13  # use BD 13 for writing trace output from compute tile to DDR host memory
                     output_size = bufOut
-                    ipu_writebd_shimtile(
+                    npu_writebd_shimtile(
                         bd_id=trace_bd_id,
                         buffer_length=trace_size,
                         buffer_offset=output_size,
@@ -232,29 +232,29 @@ def sequence(I, W, O):
                         valid_bd=1,
                     )
                     # Set start BD to our shim bd_Id (3)
-                    ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
+                    npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
 
-                IpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=1)
+                NpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=1)
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_act_L3L2",
                     bd_id=0,
                     mem=I,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOFL2L3",
                     bd_id=2,
                     mem=O,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_wts_0_L3L2",
                     bd_id=2,
                     mem=W,
                     sizes=[1, 1, 1, weightsInInt32s],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     #    print(ctx.module.operation.verify())
     print(ctx.module)
diff --git a/programming_examples/ml/conv2d_fused_relu/run.lit b/programming_examples/ml/conv2d_fused_relu/run.lit
index 0c122f451e..be7c1c5d08 100644
--- a/programming_examples/ml/conv2d_fused_relu/run.lit
+++ b/programming_examples/ml/conv2d_fused_relu/run.lit
@@ -5,6 +5,6 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DINT8_ACT -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1.o
 // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
-// RUN: %run_on_ipu %python %S/test.py | FileCheck %s
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
+// RUN: %run_on_npu %python %S/test.py | FileCheck %s
 // CHECK: PASS!
\ No newline at end of file
diff --git a/programming_examples/ml/eltwise_add/CMakeLists.txt b/programming_examples/ml/eltwise_add/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/ml/eltwise_add/CMakeLists.txt
+++ b/programming_examples/ml/eltwise_add/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/ml/eltwise_add/Makefile b/programming_examples/ml/eltwise_add/Makefile
index ebaf16c2f9..f685a607f3 100644
--- a/programming_examples/ml/eltwise_add/Makefile
+++ b/programming_examples/ml/eltwise_add/Makefile
@@ -22,8 +22,8 @@ build/aie.mlir: aie2.py
 
 build/final.xclbin: build/aie.mlir build/add.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/ml/eltwise_add/aie2.py b/programming_examples/ml/eltwise_add/aie2.py
index 9a7be696fb..bce1974b34 100644
--- a/programming_examples/ml/eltwise_add/aie2.py
+++ b/programming_examples/ml/eltwise_add/aie2.py
@@ -33,7 +33,7 @@ def my_eltwise_add():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_ty = T.memref(n, T.bf16())
 
@@ -134,16 +134,16 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inB", bd_id=2, mem=B, sizes=[1, 1, 1, B_sz_in_i32s]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/ml/eltwise_add/run.lit b/programming_examples/ml/eltwise_add/run.lit
index 8e6562b9e3..863e0d23c4 100644
--- a/programming_examples/ml/eltwise_add/run.lit
+++ b/programming_examples/ml/eltwise_add/run.lit
@@ -5,7 +5,7 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/add.cc -o add.o
 // RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall  -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/ml/eltwise_mul/CMakeLists.txt b/programming_examples/ml/eltwise_mul/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/ml/eltwise_mul/CMakeLists.txt
+++ b/programming_examples/ml/eltwise_mul/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/ml/eltwise_mul/Makefile b/programming_examples/ml/eltwise_mul/Makefile
index 1ad0072822..e76c26f959 100644
--- a/programming_examples/ml/eltwise_mul/Makefile
+++ b/programming_examples/ml/eltwise_mul/Makefile
@@ -22,8 +22,8 @@ build/aie.mlir: aie2.py
 
 build/final.xclbin: build/aie.mlir build/mul.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/ml/eltwise_mul/aie2.py b/programming_examples/ml/eltwise_mul/aie2.py
index c5f15a459d..0b2bea5b51 100644
--- a/programming_examples/ml/eltwise_mul/aie2.py
+++ b/programming_examples/ml/eltwise_mul/aie2.py
@@ -33,7 +33,7 @@ def my_eltwise_mul():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_ty = T.memref(n, T.bf16())
 
@@ -134,16 +134,16 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
             def sequence(A, B, C):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inB", bd_id=2, mem=B, sizes=[1, 1, 1, B_sz_in_i32s]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/ml/eltwise_mul/run.lit b/programming_examples/ml/eltwise_mul/run.lit
index b9a8bd9f2d..c6e794acae 100644
--- a/programming_examples/ml/eltwise_mul/run.lit
+++ b/programming_examples/ml/eltwise_mul/run.lit
@@ -5,7 +5,7 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/mul.cc -o mul.o
 // RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall  -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/ml/relu/CMakeLists.txt b/programming_examples/ml/relu/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/ml/relu/CMakeLists.txt
+++ b/programming_examples/ml/relu/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/ml/relu/Makefile b/programming_examples/ml/relu/Makefile
index 2869ca2976..f8f5999d8e 100644
--- a/programming_examples/ml/relu/Makefile
+++ b/programming_examples/ml/relu/Makefile
@@ -22,8 +22,8 @@ build/aie.mlir: aie2.py
 
 build/final.xclbin: build/aie.mlir build/relu.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt ${<F}
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/ml/relu/aie2.py b/programming_examples/ml/relu/aie2.py
index 6f3fe40ee0..cf3f278b6d 100644
--- a/programming_examples/ml/relu/aie2.py
+++ b/programming_examples/ml/relu/aie2.py
@@ -35,7 +35,7 @@ def my_relu():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_ty = T.memref(n, T.bf16())
 
@@ -123,14 +123,14 @@ def sequence(A, C):
                     #              BB      <- Event to start trace capture
                     #                   C  <- Trace mode, 00=event=time, 01=event-PC, 10=execution
                     # Configure so that "Event 1" (always true) causes tracing to start
-                    ipu_write32(
+                    npu_write32(
                         column=0,
                         row=2,
                         address=0x340D0,
                         value=0x00010000,
                     )
                     # 0x340D4: Trace Control 1
-                    ipu_write32(
+                    npu_write32(
                         column=0,
                         row=2,
                         address=0x340D4,
@@ -138,7 +138,7 @@ def sequence(A, C):
                     )
                     # 0x340E0: Trace Event Group 1  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=0,
                         row=2,
                         address=0x340E0,
@@ -146,14 +146,14 @@ def sequence(A, C):
                     )
                     # 0x340E4: Trace Event Group 2  (Which events to trace)
                     #          0xAABBCCDD    AA, BB, CC, DD <- four event slots
-                    ipu_write32(
+                    npu_write32(
                         column=0,
                         row=2,
                         address=0x340E4,
                         value=0x00000000,
                     )
 
-                    ipu_write32(
+                    npu_write32(
                         column=0,
                         row=2,
                         address=0x3FF00,
@@ -164,7 +164,7 @@ def sequence(A, C):
                     # out to host DDR memory
                     trace_bd_id = 13  # use BD 13 for writing trace output from compute tile to DDR host memory
                     output_size = N_in_bytes
-                    ipu_writebd_shimtile(
+                    npu_writebd_shimtile(
                         bd_id=trace_bd_id,
                         buffer_length=trace_size,
                         buffer_offset=output_size,
@@ -193,15 +193,15 @@ def sequence(A, C):
                         valid_bd=1,
                     )
                     # Set start BD to our shim bd_Id (13)
-                    ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
+                    npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id)
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/ml/relu/run.lit b/programming_examples/ml/relu/run.lit
index 16c48f2aeb..7a13ec7850 100644
--- a/programming_examples/ml/relu/run.lit
+++ b/programming_examples/ml/relu/run.lit
@@ -5,7 +5,7 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/relu.cc -o relu.o
 // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt b/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt
index 4b897cb29c..c7db0e9c5c 100755
--- a/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt
+++ b/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/ml/resnet/layers_conv2_x/Makefile b/programming_examples/ml/resnet/layers_conv2_x/Makefile
index 2f978a05ba..9611ab5e6d 100755
--- a/programming_examples/ml/resnet/layers_conv2_x/Makefile
+++ b/programming_examples/ml/resnet/layers_conv2_x/Makefile
@@ -20,7 +20,7 @@ build/${mlirFileName}.mlir: aie.mlir
 	mkdir -p ${@D}
 	cp $< $@
 insts.txt: build/${mlirFileName}.mlir
-	aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $<
+	aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $<
 
 build/conv2dk1_i8.o: ../../../../aie_kernels/aie2/conv2dk1.cc
 	xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@
@@ -38,8 +38,8 @@ build/conv2dk1_skip.o: ../../../../aie_kernels/aie2/conv2dk1_skip.cc
 	xchesscc -d ${CHESSCC2_FLAGS} -DUINT8_ACT -c $< -o $@
 
 build/final.xclbin: build/${mlirFileName}.mlir 
-	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 clean:
 	rm -rf build *.elf* *.lst *.bif log* ${mlirFileName}.mlir.prj *.xclbin sim \
diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie.mlir b/programming_examples/ml/resnet/layers_conv2_x/aie.mlir
index ccc04efb9a..103cbbbcbe 100755
--- a/programming_examples/ml/resnet/layers_conv2_x/aie.mlir
+++ b/programming_examples/ml/resnet/layers_conv2_x/aie.mlir
@@ -6,7 +6,7 @@
 //===----------------------------------------------------------------------===//
 
 module {
-aie.device(ipu) {
+aie.device(npu) {
 
   //shim
   %tile00 = aie.tile(0, 0)
@@ -909,9 +909,9 @@ aie.device(ipu) {
 
 
       // Trace_Event0  (4 slots)
-      aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340E0 : ui32, value = 0x4B222125 : ui32 }
+      aiex.npu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340E0 : ui32, value = 0x4B222125 : ui32 }
       // Trace_Event1  (4 slots)
-      aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340E4 : ui32, value = 0x2D2C1A4F : ui32 }
+      aiex.npu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340E4 : ui32, value = 0x2D2C1A4F : ui32 }
 
       // Event slots as configured above:
       // 0: Kernel executes vector instruction
@@ -925,13 +925,13 @@ aie.device(ipu) {
 
       // Stream_Switch_Event_Port_Selection_0
       // This is necessary to capture the Port_Running_0 and Port_Running_1 events
-      aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x3FF00 : ui32, value = 0x121 : ui32 }
+      aiex.npu.write32 { column = 2 : i32, row = 4 : i32, address = 0x3FF00 : ui32, value = 0x121 : ui32 }
 
       // Trace_Control0: Define trace start and stop triggers. Set start event TRUE.
-      aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340D0 : ui32, value = 0x10000 : ui32 }
+      aiex.npu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340D0 : ui32, value = 0x10000 : ui32 }
 
       // Start trace copy out.
-      aiex.ipu.writebd_shimtile { bd_id = 3 : i32,
+      aiex.npu.writebd_shimtile { bd_id = 3 : i32,
                                   buffer_length = 16384 : i32,
                                   buffer_offset = 262144 : i32,
                                   enable_packet = 0 : i32,
@@ -965,30 +965,30 @@ aie.device(ipu) {
                                   next_bd = 0 : i32,
                                   use_next_bd = 0 : i32,
                                   valid_bd = 1 : i32}
-      aiex.ipu.write32 { column = 0 : i32, row = 0 : i32, address = 0x1D20C : ui32, value = 0x3 : ui32 }
+      aiex.npu.write32 { column = 0 : i32, row = 0 : i32, address = 0x1D20C : ui32, value = 0x3 : ui32 }
 
     //End trace dump
 
       
 
-      aiex.ipu.rtp_write(0, 2, 0,  1) { buffer_sym_name = "rtp2" }  
-      aiex.ipu.rtp_write(0, 3, 0,  1) { buffer_sym_name = "rtp3" } 
-      aiex.ipu.rtp_write(0, 5, 0,  1) { buffer_sym_name = "rtp4" }  
-      aiex.ipu.rtp_write(0, 4, 0,  1)  { buffer_sym_name = "rtp5" }  
-      aiex.ipu.rtp_write(0, 4, 1,  0)  { buffer_sym_name = "rtp5" }  
-      aiex.ipu.rtp_write(0, 4, 2,  1)  { buffer_sym_name = "rtp5" }  
+      aiex.npu.rtp_write(0, 2, 0,  1) { buffer_sym_name = "rtp2" }  
+      aiex.npu.rtp_write(0, 3, 0,  1) { buffer_sym_name = "rtp3" } 
+      aiex.npu.rtp_write(0, 5, 0,  1) { buffer_sym_name = "rtp4" }  
+      aiex.npu.rtp_write(0, 4, 0,  1)  { buffer_sym_name = "rtp5" }  
+      aiex.npu.rtp_write(0, 4, 1,  0)  { buffer_sym_name = "rtp5" }  
+      aiex.npu.rtp_write(0, 4, 2,  1)  { buffer_sym_name = "rtp5" }  
 
-      aiex.ipu.rtp_write(1, 5, 0,  1) { buffer_sym_name = "rtp15" }  
-      aiex.ipu.rtp_write(1, 4, 0,  1) { buffer_sym_name = "rtp14" }  
-      aiex.ipu.rtp_write(1, 2, 0,  1) { buffer_sym_name = "rtp12" }  
-      aiex.ipu.rtp_write(1, 3, 0,  1)  { buffer_sym_name = "rtp13" }  
-      aiex.ipu.rtp_write(1, 3, 1,  0)  { buffer_sym_name = "rtp13" }  
+      aiex.npu.rtp_write(1, 5, 0,  1) { buffer_sym_name = "rtp15" }  
+      aiex.npu.rtp_write(1, 4, 0,  1) { buffer_sym_name = "rtp14" }  
+      aiex.npu.rtp_write(1, 2, 0,  1) { buffer_sym_name = "rtp12" }  
+      aiex.npu.rtp_write(1, 3, 0,  1)  { buffer_sym_name = "rtp13" }  
+      aiex.npu.rtp_write(1, 3, 1,  0)  { buffer_sym_name = "rtp13" }  
 
-      aiex.ipu.rtp_write(2, 2, 0,  1) { buffer_sym_name = "rtp22" }  
-      aiex.ipu.rtp_write(2, 3, 0,  1) { buffer_sym_name = "rtp23" }  
-      aiex.ipu.rtp_write(2, 5, 0,  1) { buffer_sym_name = "rtp25" }  
-      aiex.ipu.rtp_write(2, 4, 0,  1)  { buffer_sym_name = "rtp24" }  
-      aiex.ipu.rtp_write(2, 4, 1,  0)  { buffer_sym_name = "rtp24" } 
+      aiex.npu.rtp_write(2, 2, 0,  1) { buffer_sym_name = "rtp22" }  
+      aiex.npu.rtp_write(2, 3, 0,  1) { buffer_sym_name = "rtp23" }  
+      aiex.npu.rtp_write(2, 5, 0,  1) { buffer_sym_name = "rtp25" }  
+      aiex.npu.rtp_write(2, 4, 0,  1)  { buffer_sym_name = "rtp24" }  
+      aiex.npu.rtp_write(2, 4, 1,  0)  { buffer_sym_name = "rtp24" } 
 
       %c0 = arith.constant 0 : i32
       %c1 = arith.constant 1 : i32
@@ -1000,13 +1000,13 @@ aie.device(ipu) {
       %total_wts_3_off = arith.constant  35840 : i64 
 
       //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
-      aiex.ipu.dma_memcpy_nd(0, 0, %in0[0, 0, 0, 0][1, 1, 1, %act_in][0, 0, 0]) {id = 0 : i64, metadata = @inOF_act_L3L2} : memref<16384xi32>
-      aiex.ipu.dma_memcpy_nd(0, 0, %out[0, 0, 0, 0][1, 1, 1, %act_out][0, 0, 0]) {id = 2 : i64, metadata = @outOFL2L3} : memref<65536xi32>
-      aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, 0][1, 1, 1, %total_wts][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_0_L3L2} : memref<53248xi32>
-      aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts][1, 1, 1, %total_wts_2][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_1_L3L2} : memref<53248xi32>
-      aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts_3_off][1, 1, 1, %total_wts_3][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_2_L3L2} : memref<53248xi32>
+      aiex.npu.dma_memcpy_nd(0, 0, %in0[0, 0, 0, 0][1, 1, 1, %act_in][0, 0, 0]) {id = 0 : i64, metadata = @inOF_act_L3L2} : memref<16384xi32>
+      aiex.npu.dma_memcpy_nd(0, 0, %out[0, 0, 0, 0][1, 1, 1, %act_out][0, 0, 0]) {id = 2 : i64, metadata = @outOFL2L3} : memref<65536xi32>
+      aiex.npu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, 0][1, 1, 1, %total_wts][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_0_L3L2} : memref<53248xi32>
+      aiex.npu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts][1, 1, 1, %total_wts_2][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_1_L3L2} : memref<53248xi32>
+      aiex.npu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts_3_off][1, 1, 1, %total_wts_3][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_2_L3L2} : memref<53248xi32>
 
-      aiex.ipu.sync {channel = 0 : i32, column = 1 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
+      aiex.npu.sync {channel = 0 : i32, column = 1 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
       return
     }
 
diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie2.py b/programming_examples/ml/resnet/layers_conv2_x/aie2.py
index 385a4fc7a5..e26f16b549 100755
--- a/programming_examples/ml/resnet/layers_conv2_x/aie2.py
+++ b/programming_examples/ml/resnet/layers_conv2_x/aie2.py
@@ -38,7 +38,7 @@
 def bottleneck4AIEs():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def deviceBody():
 
             # define types
@@ -543,9 +543,9 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
                     # EVENTS_CORE_PORT_RUNNING_0 (0x4B)
 
                     # Trace_Event0  (4 slots)
-                    ipu_write32(0, 4, 0x340E0, 0x4B222125)
+                    npu_write32(0, 4, 0x340E0, 0x4B222125)
                     # Trace_Event1  (4 slots)
-                    ipu_write32(0, 4, 0x340E4, 0x2D2C1A4F)
+                    npu_write32(0, 4, 0x340E4, 0x2D2C1A4F)
 
                     # Event slots as configured above:
                     # 0: Kernel executes vector instruction
@@ -559,13 +559,13 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
 
                     # Stream_Switch_Event_Port_Selection_0
                     # This is necessary to capture the Port_Running_0 and Port_Running_1 events
-                    ipu_write32(0, 4, 0x3FF00, 0x121)
+                    npu_write32(0, 4, 0x3FF00, 0x121)
 
                     # Trace_Control0: Define trace start and stop triggers. Set start event TRUE.
-                    ipu_write32(0, 4, 0x340D0, 0x10000)
+                    npu_write32(0, 4, 0x340D0, 0x10000)
 
                     # Start trace copy out.
-                    ipu_writebd_shimtile(
+                    npu_writebd_shimtile(
                         bd_id=3,
                         buffer_length=trace_sz_in_i32s,
                         buffer_offset=acitivationsOutSize32b,
@@ -593,45 +593,45 @@ def sequence(inputFromL3, weightsFromL3, outputToL3):
                         use_next_bd=0,
                         valid_bd=1,
                     )
-                    ipu_write32(0, 2, 0x1D20C, 0x3)
+                    npu_write32(0, 2, 0x1D20C, 0x3)
 
                 # write RTP parameters
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile2", col=0, row=2, index=0, value=1
                 )  # scale
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile3", col=0, row=3, index=0, value=1
                 )  # scale
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile5", col=0, row=5, index=0, value=1
                 )  # scale
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile4", col=0, row=4, index=0, value=1
                 )  # scale: conv1x1 with the same scale as the input so we match the scaling factor of output after conv1x1 and the initial input
-                IpuWriteRTPOp(
+                NpuWriteRTPOp(
                     "rtpComputeTile4", col=0, row=4, index=1, value=0
                 )  # skip_scale
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_act_L3L2",
                     bd_id=0,
                     mem=inputFromL3,
                     sizes=[1, 1, 1, activationsInSize32b],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOFL2L3",
                     bd_id=2,
                     mem=outputToL3,
                     sizes=[1, 1, 1, acitivationsOutSize32b],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_wts_0_L3L2",
                     bd_id=1,
                     mem=weightsFromL3,
                     sizes=[1, 1, 1, totalWeightsSize32b],
                 )
 
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/ml/resnet/layers_conv2_x/run.lit b/programming_examples/ml/resnet/layers_conv2_x/run.lit
index 61f43e45e6..a8e86282a6 100755
--- a/programming_examples/ml/resnet/layers_conv2_x/run.lit
+++ b/programming_examples/ml/resnet/layers_conv2_x/run.lit
@@ -9,6 +9,6 @@
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DSCALAR -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1_ui8.o
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DSCALAR -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1_skip.cc -o conv2dk1_skip.o
 // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
-// RUN: %run_on_ipu %python %S/test.py | FileCheck %s
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
+// RUN: %run_on_npu %python %S/test.py | FileCheck %s
 // CHECK: PASS!
\ No newline at end of file
diff --git a/programming_examples/ml/softmax/CMakeLists.txt b/programming_examples/ml/softmax/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/ml/softmax/CMakeLists.txt
+++ b/programming_examples/ml/softmax/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/ml/softmax/Makefile b/programming_examples/ml/softmax/Makefile
index 4f27c07551..9048de8c69 100755
--- a/programming_examples/ml/softmax/Makefile
+++ b/programming_examples/ml/softmax/Makefile
@@ -37,7 +37,7 @@ build/aie.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/kernels.a
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/ml/softmax/aie2.py b/programming_examples/ml/softmax/aie2.py
index 5672819f7a..29c52bcb60 100755
--- a/programming_examples/ml/softmax/aie2.py
+++ b/programming_examples/ml/softmax/aie2.py
@@ -32,7 +32,7 @@ def my_eltwise_add():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_ty = T.memref(n, T.bf16())
 
@@ -109,13 +109,13 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, tensor_ty)
             def sequence(A, C):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/ml/softmax/run.lit b/programming_examples/ml/softmax/run.lit
index 54c7ccff98..42441e898a 100644
--- a/programming_examples/ml/softmax/run.lit
+++ b/programming_examples/ml/softmax/run.lit
@@ -9,7 +9,7 @@
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/softmax.cc -o softmax.o
 // RUN: ar rvs kernels.a dut.o lut_based_ops.o softmax.o
 // RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/ml/weight_expand/CMakeLists.txt b/programming_examples/ml/weight_expand/CMakeLists.txt
index c4ca0825d4..20f5d8a4a3 100644
--- a/programming_examples/ml/weight_expand/CMakeLists.txt
+++ b/programming_examples/ml/weight_expand/CMakeLists.txt
@@ -27,7 +27,7 @@ if (NOT WSL)
 else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif()
 
 set(TARGET_NAME test CACHE STRING "Target to be built")
diff --git a/programming_examples/ml/weight_expand/Makefile b/programming_examples/ml/weight_expand/Makefile
index 641b4902b3..b4967596fb 100755
--- a/programming_examples/ml/weight_expand/Makefile
+++ b/programming_examples/ml/weight_expand/Makefile
@@ -23,7 +23,7 @@ build/aie.mlir: aie2.py
 build/final.xclbin: build/aie.mlir build/expand.o
 	mkdir -p ${@D}
 	cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
-				--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
+				--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/ml/weight_expand/aie2.py b/programming_examples/ml/weight_expand/aie2.py
index 3ca1f7aee3..32fe95429f 100755
--- a/programming_examples/ml/weight_expand/aie2.py
+++ b/programming_examples/ml/weight_expand/aie2.py
@@ -45,7 +45,7 @@ def my_expand():
 
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             memRef_i_ty = T.memref(
                 input_buffer_size_bytes, T.i8()
@@ -91,13 +91,13 @@ def core_body():
             @FuncOp.from_py_func(tensor_ty, tensor_ty)
             def sequence(A, C):
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outB", bd_id=0, mem=C, sizes=[1, 1, 1, B_sz_in_i32s]
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/utils/README.md b/programming_examples/utils/README.md
index 9dc2731012..1d59d46e08 100644
--- a/programming_examples/utils/README.md
+++ b/programming_examples/utils/README.md
@@ -54,7 +54,7 @@ The parse script create a temporary directory `tmpTrace` performs the following
 We prepend `0x` before each hex line and save it `prep.<trace file>` since the `hwfrontend` utility expects it.
 
 ### <u>2. Parse MLIR to build event table</u>
-The MLIR parser is pretty rudimentary as it scans the source mlir file looking for `aiex.ipu.write32` calls and does a pattern match for trace unit config address and then grab the hex events, which it looks up from an internal table to provide waveform labels. It would be better to use an MLIR pass that already has the config information and cross reference it with a more official event-to-label lookup table instead. 
+The MLIR parser is pretty rudimentary as it scans the source mlir file looking for `aiex.npu.write32` calls and does a pattern match for trace unit config address and then grab the hex events, which it looks up from an internal table to provide waveform labels. It would be better to use an MLIR pass that already has the config information and cross reference it with a more official event-to-label lookup table instead. 
 
 ### <u>3. Create .target file</u>
 Create a dummy file (`.target`) in the `tmpTrace` with the file content 'hw' since `hwfrontend` utility expects it.
diff --git a/programming_examples/utils/parse_eventIR.py b/programming_examples/utils/parse_eventIR.py
index b7c989ca3c..b41ff9c74a 100755
--- a/programming_examples/utils/parse_eventIR.py
+++ b/programming_examples/utils/parse_eventIR.py
@@ -594,9 +594,9 @@ def parse_mlir_trace_events(lines):
 
     # TODO Need to check if this line is commented out, check for // ? (harder to check of /* */)
     # TODO Need to support value in hex with 0x or decimal
-    # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}"
-    # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
-    pattern = r"aiex.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
+    # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}"
+    # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
+    pattern = r"aiex.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
 
     pid_events = list()
     for t in range(NumTraceTypes):
diff --git a/programming_examples/utils/parse_trace.py b/programming_examples/utils/parse_trace.py
index ed45353f31..9d2cd144a6 100755
--- a/programming_examples/utils/parse_trace.py
+++ b/programming_examples/utils/parse_trace.py
@@ -582,9 +582,9 @@ def parse_mlir_trace_events(lines):
 
     # TODO Need to check if this line is commented out, check for // ? (harder to check of /* */)
     # TODO Need to support value in hex with 0x or decimal
-    # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}"
-    # pattern = r"AIEX.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
-    pattern = r"aiex.ipu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
+    # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\d+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(\w+)\s*:\s*\w+\s*\}"
+    # pattern = r"AIEX.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
+    pattern = r"aiex.npu.write32\s*\{\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*,\s*(\w+)\s*=\s*(0x)?(\w+)\s*:\s*\w+\s*\}"
 
     pid_events = list()
     for t in range(NumTraceTypes):
diff --git a/programming_examples/vision/color_detect/CMakeLists.txt b/programming_examples/vision/color_detect/CMakeLists.txt
index d850efcad5..f743789b61 100644
--- a/programming_examples/vision/color_detect/CMakeLists.txt
+++ b/programming_examples/vision/color_detect/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(COLORDETECT_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/vision/color_detect/Makefile b/programming_examples/vision/color_detect/Makefile
index c8feea4cb6..ffb8ca55d1 100755
--- a/programming_examples/vision/color_detect/Makefile
+++ b/programming_examples/vision/color_detect/Makefile
@@ -39,8 +39,8 @@ build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir: aie2_colorDetect.py
 
 build/final_${COLORDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir build/rgba2hue.cc.o build/threshold.cc.o build/combined_bitwiseOR_gray2rgba_bitwiseAND.a
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 build/${targetname}.exe: test.cpp
 	mkdir -p ${@D}
diff --git a/programming_examples/vision/color_detect/README.md b/programming_examples/vision/color_detect/README.md
index 33d41a2339..f2f24dbea6 100644
--- a/programming_examples/vision/color_detect/README.md
+++ b/programming_examples/vision/color_detect/README.md
@@ -12,7 +12,7 @@
 
 The Color Detect pipeline design consists of the following blocks arranged in a pipeline fashion for the detecting of 2 colors in a sequence of images : `rgba2hue`, `threshold`, `threshold`, `bitwiseOR`, `gray2rgba`, `bitwiseAND`.
 
-The pipeline is mapped onto a single column of the ipu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2hue`, and the two `threshold` kernels are each mapped onto one compute tile, while `bitwiseOR`, `gray2rgba` and `bitwiseAND` are mapped together on AIE tile (0, 5). 
+The pipeline is mapped onto a single column of the npu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2hue`, and the two `threshold` kernels are each mapped onto one compute tile, while `bitwiseOR`, `gray2rgba` and `bitwiseAND` are mapped together on AIE tile (0, 5). 
 
 <p align="center">
   <img
diff --git a/programming_examples/vision/color_detect/aie2_colorDetect.py b/programming_examples/vision/color_detect/aie2_colorDetect.py
index f7410a4d12..6675b6fda4 100644
--- a/programming_examples/vision/color_detect/aie2_colorDetect.py
+++ b/programming_examples/vision/color_detect/aie2_colorDetect.py
@@ -32,7 +32,7 @@
 def color_detect():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def deviceBody():
             line_bytes_ty = MemRefType.get((lineWidthInBytes,), T.ui8())
             line_ty = MemRefType.get((lineWidth,), T.ui8())
@@ -254,19 +254,19 @@ def coreBody():
 
             @FuncOp.from_py_func(tensor_ty, memRef_16x16_ty, tensor_ty)
             def sequence(I, B, O):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_L3L2",
                     bd_id=1,
                     mem=I,
                     sizes=[1, 1, 1, height * lineWidthInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOF_L2L3",
                     bd_id=0,
                     mem=O,
                     sizes=[1, 1, 1, height * lineWidthInInt32s],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/vision/color_detect/run.lit b/programming_examples/vision/color_detect/run.lit
index 766ddab92e..20b80f50fc 100644
--- a/programming_examples/vision/color_detect/run.lit
+++ b/programming_examples/vision/color_detect/run.lit
@@ -10,7 +10,7 @@
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie2/bitwiseAND.cc -o ./bitwiseAND.cc.o
 // RUN: ar rvs combined_bitwiseOR_gray2rgba_bitwiseAND.a bitwiseOR.cc.o gray2rgba.cc.o bitwiseAND.cc.o
 // RUN: %python %S/aie2_colorDetect.py 1920 1080 > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DCOLORDETECT_WIDTH=1920 -DCOLORDETECT_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags  -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/vision/color_threshold/CMakeLists.txt b/programming_examples/vision/color_threshold/CMakeLists.txt
index 040bc74533..f630f55106 100644
--- a/programming_examples/vision/color_threshold/CMakeLists.txt
+++ b/programming_examples/vision/color_threshold/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(COLORTHRESHOLD_WIDTH 128 CACHE STRING "image width")
diff --git a/programming_examples/vision/color_threshold/Makefile b/programming_examples/vision/color_threshold/Makefile
index 286f342b08..69958f4c2e 100644
--- a/programming_examples/vision/color_threshold/Makefile
+++ b/programming_examples/vision/color_threshold/Makefile
@@ -36,8 +36,8 @@ build/aie2_${COLORTHRESHOLD_WIDTH}.mlir: aie2_colorThreshold.py
 
 build/final_${COLORTHRESHOLD_WIDTH}.xclbin: build/aie2_${COLORTHRESHOLD_WIDTH}.mlir build/threshold.cc.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/vision/color_threshold/README.md b/programming_examples/vision/color_threshold/README.md
index fbab6235cc..ad8613544a 100644
--- a/programming_examples/vision/color_threshold/README.md
+++ b/programming_examples/vision/color_threshold/README.md
@@ -12,7 +12,7 @@
 
 The Color Threshold pipeline design consists of 4 threshold blocks in separate AIE tiles that process a different region of an input image, as shown in the image below.
 
-The pipeline is mapped onto a single column of the ipu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). 
+The pipeline is mapped onto a single column of the npu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). 
 
 <p align="center">
   <img
diff --git a/programming_examples/vision/color_threshold/aie2_colorThreshold.py b/programming_examples/vision/color_threshold/aie2_colorThreshold.py
index 6a49466b64..b8528b26a9 100644
--- a/programming_examples/vision/color_threshold/aie2_colorThreshold.py
+++ b/programming_examples/vision/color_threshold/aie2_colorThreshold.py
@@ -30,7 +30,7 @@
 def color_threshold():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             line_channels_ty = T.memref(lineWidthChannels, T.ui8())
             line_ty = T.memref(lineWidth, T.ui8())
@@ -256,35 +256,35 @@ def core_body():
             )
             def sequence(inTensor, notUsed, outTensor):
                 # thresholdValue, maxValue, thresholdType
-                IpuWriteRTPOp("rtpComputeTile2", col=0, row=2, index=0, value=50)
-                IpuWriteRTPOp("rtpComputeTile2", col=0, row=2, index=1, value=255)
-                IpuWriteRTPOp("rtpComputeTile2", col=0, row=2, index=2, value=0)
+                NpuWriteRTPOp("rtpComputeTile2", col=0, row=2, index=0, value=50)
+                NpuWriteRTPOp("rtpComputeTile2", col=0, row=2, index=1, value=255)
+                NpuWriteRTPOp("rtpComputeTile2", col=0, row=2, index=2, value=0)
 
-                IpuWriteRTPOp("rtpComputeTile3", col=0, row=3, index=0, value=50)
-                IpuWriteRTPOp("rtpComputeTile3", col=0, row=3, index=1, value=255)
-                IpuWriteRTPOp("rtpComputeTile3", col=0, row=3, index=2, value=0)
+                NpuWriteRTPOp("rtpComputeTile3", col=0, row=3, index=0, value=50)
+                NpuWriteRTPOp("rtpComputeTile3", col=0, row=3, index=1, value=255)
+                NpuWriteRTPOp("rtpComputeTile3", col=0, row=3, index=2, value=0)
 
-                IpuWriteRTPOp("rtpComputeTile4", col=0, row=4, index=0, value=50)
-                IpuWriteRTPOp("rtpComputeTile4", col=0, row=4, index=1, value=255)
-                IpuWriteRTPOp("rtpComputeTile4", col=0, row=4, index=2, value=0)
+                NpuWriteRTPOp("rtpComputeTile4", col=0, row=4, index=0, value=50)
+                NpuWriteRTPOp("rtpComputeTile4", col=0, row=4, index=1, value=255)
+                NpuWriteRTPOp("rtpComputeTile4", col=0, row=4, index=2, value=0)
 
-                IpuWriteRTPOp("rtpComputeTile5", col=0, row=5, index=0, value=50)
-                IpuWriteRTPOp("rtpComputeTile5", col=0, row=5, index=1, value=255)
-                IpuWriteRTPOp("rtpComputeTile5", col=0, row=5, index=2, value=0)
+                NpuWriteRTPOp("rtpComputeTile5", col=0, row=5, index=0, value=50)
+                NpuWriteRTPOp("rtpComputeTile5", col=0, row=5, index=1, value=255)
+                NpuWriteRTPOp("rtpComputeTile5", col=0, row=5, index=2, value=0)
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOOB_L3L2",
                     bd_id=1,
                     mem=inTensor,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOOB_L2L3",
                     bd_id=0,
                     mem=outTensor,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     # print(ctx.module.operation.verify())
     print(ctx.module)
diff --git a/programming_examples/vision/color_threshold/run.lit b/programming_examples/vision/color_threshold/run.lit
index 3033daed44..f452502155 100644
--- a/programming_examples/vision/color_threshold/run.lit
+++ b/programming_examples/vision/color_threshold/run.lit
@@ -5,7 +5,7 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie2/threshold.cc -o ./threshold.cc.o
 // RUN: %python %S/aie2_colorThreshold.py 1920 1080 > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DCOLORTHRESHOLD_WIDTH=1920 -DCOLORTHRESHOLD_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags  -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
diff --git a/programming_examples/vision/edge_detect/CMakeLists.txt b/programming_examples/vision/edge_detect/CMakeLists.txt
index 59fe331831..c0ceb81739 100644
--- a/programming_examples/vision/edge_detect/CMakeLists.txt
+++ b/programming_examples/vision/edge_detect/CMakeLists.txt
@@ -25,7 +25,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/vision/edge_detect/Makefile b/programming_examples/vision/edge_detect/Makefile
index d40e606e63..71c2012432 100755
--- a/programming_examples/vision/edge_detect/Makefile
+++ b/programming_examples/vision/edge_detect/Makefile
@@ -39,8 +39,8 @@ build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir: aie2_edgeDetect.py
 
 build/final_${EDGEDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir build/rgba2gray.cc.o build/gray2rgba.cc.o build/filter2d.cc.o build/threshold.cc.o build/addWeighted.cc.o build/combined_gray2rgba_addWeighted.a
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/vision/edge_detect/README.md b/programming_examples/vision/edge_detect/README.md
index 2450f019ec..26f2d4aff8 100644
--- a/programming_examples/vision/edge_detect/README.md
+++ b/programming_examples/vision/edge_detect/README.md
@@ -12,7 +12,7 @@
 
 The Edge Detect pipeline design consists of the following blocks arranged in a pipeline fashion for the detection of edges in a sequence of images : `rgba2gray`, `filter2D`, `threshold`, `gray2rgba`, `addWeighted`.
 
-The pipeline is mapped onto a single column of the ipu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2gray`, `filter2D` and `threshold` kernels are each mapped onto one compute tile, while `gray2rgba` and `addWeighted` are mapped together on AIE tile (0, 5). 
+The pipeline is mapped onto a single column of the npu device, with one Shim tile (0, 0), one Mem tile (0, 1) and four AIE compute tiles (0, 2) through (0, 5). As shown in the image below, the `rgba2gray`, `filter2D` and `threshold` kernels are each mapped onto one compute tile, while `gray2rgba` and `addWeighted` are mapped together on AIE tile (0, 5). 
 
 <p align="center">
   <img
diff --git a/programming_examples/vision/edge_detect/aie2_edgeDetect.py b/programming_examples/vision/edge_detect/aie2_edgeDetect.py
index dd481cfc65..91ed5f9ed4 100644
--- a/programming_examples/vision/edge_detect/aie2_edgeDetect.py
+++ b/programming_examples/vision/edge_detect/aie2_edgeDetect.py
@@ -32,7 +32,7 @@
 def edge_detect():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             line_bytes_ty = T.memref(lineWidthInBytes, T.ui8())
             line_ty = T.memref(lineWidth, T.ui8())
@@ -300,19 +300,19 @@ def core_body():
 
             @FuncOp.from_py_func(tensor_ty, memRef_16x16_ty, tensor_ty)
             def sequence(I, B, O):
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="outOF_L2L3",
                     bd_id=0,
                     mem=O,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="inOF_L3L2",
                     bd_id=1,
                     mem=I,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     #    print(ctx.module.operation.verify())
     print(ctx.module)
diff --git a/programming_examples/vision/edge_detect/run.lit b/programming_examples/vision/edge_detect/run.lit
index bf5b6eff92..5f7ab37e0e 100644
--- a/programming_examples/vision/edge_detect/run.lit
+++ b/programming_examples/vision/edge_detect/run.lit
@@ -10,8 +10,8 @@
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie2/filter2d.cc -o ./filter2d.cc.o
 // RUN: ar rvs combined_gray2rgba_addWeighted.a gray2rgba.cc.o addWeighted.cc.o
 // RUN: %python %S/aie2_edgeDetect.py 1920 1080 > ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DEDGEDETECT_WIDTH=1920 -DEDGEDETECT_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags  -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!
 
diff --git a/programming_examples/vision/vision_passthrough/CMakeLists.txt b/programming_examples/vision/vision_passthrough/CMakeLists.txt
index 7ba68b268b..a2bb8ac761 100644
--- a/programming_examples/vision/vision_passthrough/CMakeLists.txt
+++ b/programming_examples/vision/vision_passthrough/CMakeLists.txt
@@ -28,7 +28,7 @@ else()
     set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
     set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install")
     set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
-    set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
+    set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
 endif ()
 
 set(PASSTHROUGH_WIDTH 1920 CACHE STRING "image width")
diff --git a/programming_examples/vision/vision_passthrough/Makefile b/programming_examples/vision/vision_passthrough/Makefile
index f07d90fda2..1ae853d942 100644
--- a/programming_examples/vision/vision_passthrough/Makefile
+++ b/programming_examples/vision/vision_passthrough/Makefile
@@ -32,8 +32,8 @@ build/passThrough.cc.o: passThrough.cc
 	
 build/final_${PASSTHROUGH_WIDTH}.xclbin: build/aie2_lineBased_8b_${PASSTHROUGH_WIDTH}.mlir build/passThrough.cc.o
 	mkdir -p ${@D}
-	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
-		--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%)
+	cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \
+		--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
 
 ${targetname}.exe: test.cpp
 	rm -rf _build
diff --git a/programming_examples/vision/vision_passthrough/aie2.py b/programming_examples/vision/vision_passthrough/aie2.py
index 5422f803d1..920d109cfa 100644
--- a/programming_examples/vision/vision_passthrough/aie2.py
+++ b/programming_examples/vision/vision_passthrough/aie2.py
@@ -29,7 +29,7 @@
 def passThroughAIE2():
     with mlir_mod_ctx() as ctx:
 
-        @device(AIEDevice.ipu)
+        @device(AIEDevice.npu)
         def device_body():
             # define types
             line_ty = T.memref(lineWidthInBytes, T.ui8())
@@ -101,9 +101,9 @@ def sequence(inTensor, notUsed, outTensor):
                     # EVENTS_CORE_PORT_RUNNING_0 (0x4B)
 
                     # Trace_Event0  (4 slots)
-                    IpuWrite32(0, 2, 0x340E0, 0x4B222125)
+                    NpuWrite32(0, 2, 0x340E0, 0x4B222125)
                     # Trace_Event1  (4 slots)
-                    IpuWrite32(0, 2, 0x340E4, 0x2D2C1A4F)
+                    NpuWrite32(0, 2, 0x340E4, 0x2D2C1A4F)
 
                     # Event slots as configured above:
                     # 0: Kernel executes vector instruction
@@ -117,13 +117,13 @@ def sequence(inTensor, notUsed, outTensor):
 
                     # Stream_Switch_Event_Port_Selection_0
                     # This is necessary to capture the Port_Running_0 and Port_Running_1 events
-                    IpuWrite32(0, 2, 0x3FF00, 0x121)
+                    NpuWrite32(0, 2, 0x3FF00, 0x121)
 
                     # Trace_Control0: Define trace start and stop triggers. Set start event TRUE.
-                    IpuWrite32(0, 2, 0x340D0, 0x10000)
+                    NpuWrite32(0, 2, 0x340D0, 0x10000)
 
                     # Start trace copy out.
-                    IpuWriteBdShimTile(
+                    NpuWriteBdShimTile(
                         bd_id=3,
                         buffer_length=traceSizeInBytes,
                         buffer_offset=tensorSize,
@@ -151,21 +151,21 @@ def sequence(inTensor, notUsed, outTensor):
                         use_next_bd=0,
                         valid_bd=1,
                     )
-                    IpuWrite32(0, 0, 0x1D20C, 0x3)
+                    NpuWrite32(0, 0, 0x1D20C, 0x3)
 
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="in",
                     bd_id=1,
                     mem=inTensor,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_dma_memcpy_nd(
+                npu_dma_memcpy_nd(
                     metadata="out",
                     bd_id=0,
                     mem=outTensor,
                     sizes=[1, 1, 1, tensorSizeInInt32s],
                 )
-                ipu_sync(column=0, row=0, direction=0, channel=0)
+                npu_sync(column=0, row=0, direction=0, channel=0)
 
     print(ctx.module)
 
diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir
index 3c547e4016..0621e0b622 100644
--- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir
+++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir
@@ -12,7 +12,7 @@
 // AIE tiles, buffers, and communication in an AI Engine design
 module @passThroughLine_aie2 {
 
- 	aie.device(ipu) {
+ 	aie.device(npu) {
         // declare kernel external kernel function 
         func.func private @passThroughLine(%in: memref<1920xui8>, %out: memref<1920xui8>, %tilewidth: i32) -> ()
         
@@ -53,9 +53,9 @@ module @passThroughLine_aie2 {
             %tilewidth  = arith.constant 480 : i64  // in 32b words so tileWidth/4
 
             //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
-            aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<518400xi32>
-            aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<518400xi32>
-            aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
+            aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<518400xi32>
+            aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<518400xi32>
+            aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
             return
         }
     }
diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir
index d07ba213c4..c2c31b0d9b 100644
--- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir
+++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir
@@ -12,7 +12,7 @@
 // AIE tiles, buffers, and communication in an AI Engine design
 module @passThroughLine_aie2 {
 
- 	aie.device(ipu) {
+ 	aie.device(npu) {
         // declare kernel external kernel function 
         func.func private @passThroughLine(%in: memref<7680xui8>, %out: memref<7680xui8>, %tilewidth: i32) -> ()
         
@@ -54,9 +54,9 @@ module @passThroughLine_aie2 {
             %totalLenRGBA = arith.constant 2073600 : i64
 
             //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
-            aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @inOF, id = 1 : i64 } : memref<2073600xi32>
-            aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @outOF, id = 0 : i64 } : memref<2073600xi32>
-            aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
+            aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @inOF, id = 1 : i64 } : memref<2073600xi32>
+            aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @outOF, id = 0 : i64 } : memref<2073600xi32>
+            aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
             return
         }
     }
diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir
index 13f7417166..dd66475ca5 100644
--- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir
+++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir
@@ -12,7 +12,7 @@
 // AIE tiles, buffers, and communication in an AI Engine design
 module @passThroughLine_aie2 {
 
- 	aie.device(ipu) {
+ 	aie.device(npu) {
         // declare kernel external kernel function 
         func.func private @passThroughLine(%in: memref<512xui8>, %out: memref<512xui8>, %tilewidth: i32) -> ()
         
@@ -53,9 +53,9 @@ module @passThroughLine_aie2 {
             %tilewidth  = arith.constant 128 : i64  // in 32b words so tileWidth/4
 
             //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words])
-            aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<1152xi32>
-            aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<1152xi32>
-            aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
+            aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<1152xi32>
+            aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<1152xi32>
+            aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
             return
         }
     }
diff --git a/programming_examples/vision/vision_passthrough/run.lit b/programming_examples/vision/vision_passthrough/run.lit
index 5093e3c80c..58f914861c 100644
--- a/programming_examples/vision/vision_passthrough/run.lit
+++ b/programming_examples/vision/vision_passthrough/run.lit
@@ -5,7 +5,7 @@
 //
 // RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/generic/passThrough.cc -o passThrough.cc.o
 // RUN: %python %S/aie2.py 1920 1080 | aie-opt -cse -canonicalize -o ./aie.mlir
-// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
+// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir
 // RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -DPASSTHROUGH_WIDTH=1920 -DPASSTHROUGH_HEIGHT=1080 -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp -I%S/../../utils %S/../../utils/OpenCVUtils.cpp %xrt_flags %opencv_flags  -lrt -lstdc++ -lboost_program_options -lboost_filesystem
-// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
+// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
 // CHECK: PASS!