diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp
index ec54b67a0..15bcb682d 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp
@@ -15,6 +15,7 @@
 #include "air/Dialect/AIR/AIRDialect.h"
 #include "air/Dialect/AIRRt/AIRRtDialect.h"
 #include "iree-amd-aie/Transforms/Passes.h"
+#include "iree-dialects/Dialect/LinalgExt/IR/LinalgExtDialect.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
 #include "iree/compiler/Utils/FlatbufferUtils.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
@@ -73,6 +74,7 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend {
 
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::iree_compiler::IREE::Codegen::IREECodegenDialect,
+                    IREE::LinalgExt::IREELinalgExtDialect,
                     transform::TransformDialect, xilinx::AIE::AIEDialect,
                     xilinx::AIEX::AIEXDialect, xilinx::air::airDialect,
                     xilinx::airrt::AIRRtDialect>();
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp
index 0fc4a5020..07d437ede 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp
@@ -18,9 +18,8 @@ namespace mlir::iree_compiler::AMDAIE {
 
 namespace {
 /// Add the lowering strategy configurations to be used for ops.
-class AMDAIEAddLoweringStrategyPass
-    : public impl::AMDAIEAddLoweringStrategyBase<
-          AMDAIEAddLoweringStrategyPass> {
+class AMDAIELoweringStrategyPass
+    : public impl::AMDAIELoweringStrategyBase<AMDAIELoweringStrategyPass> {
  public:
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<
@@ -31,16 +30,16 @@ class AMDAIEAddLoweringStrategyPass
         vector::VectorDialect>();
   }
 
-  AMDAIEAddLoweringStrategyPass() = default;
-  AMDAIEAddLoweringStrategyPass(const AMDAIEAddLoweringStrategyOptions &options)
-      : AMDAIEAddLoweringStrategyBase(options) {}
-  AMDAIEAddLoweringStrategyPass(const AMDAIEAddLoweringStrategyPass &pass){};
+  AMDAIELoweringStrategyPass() = default;
+  AMDAIELoweringStrategyPass(const AMDAIELoweringStrategyOptions &options)
+      : AMDAIELoweringStrategyBase(options) {}
+  AMDAIELoweringStrategyPass(const AMDAIELoweringStrategyPass &pass){};
 
   void runOnOperation() override;
 };
 }  // namespace
 
-void AMDAIEAddLoweringStrategyPass::runOnOperation() {
+void AMDAIELoweringStrategyPass::runOnOperation() {
   IREE::HAL::ExecutableVariantOp variantOp = getOperation();
   ModuleOp moduleOp = variantOp.getInnerModule();
   if (!moduleOp) {
@@ -48,14 +47,14 @@ void AMDAIEAddLoweringStrategyPass::runOnOperation() {
         "Expected a variantOp root with an inner ModuleOp");
     return signalPassFailure();
   }
-  if (failed(initAIELaunchConfig(moduleOp, useUKernelStrategy))) {
+  if (failed(initAIELaunchConfig(moduleOp, usePassPipeline))) {
     return signalPassFailure();
   }
 }
 
-std::unique_ptr<Pass> createAMDAIEAddLoweringStrategyPass(
-    AMDAIEAddLoweringStrategyOptions options) {
-  return std::make_unique<AMDAIEAddLoweringStrategyPass>(options);
+std::unique_ptr<Pass> createAMDAIELoweringStrategyPass(
+    AMDAIELoweringStrategyOptions options) {
+  return std::make_unique<AMDAIELoweringStrategyPass>(options);
 }
 
 }  // namespace mlir::iree_compiler::AMDAIE
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerExecutableTarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerExecutableTarget.cpp
index 1d06a20f7..474a30ea5 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerExecutableTarget.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerExecutableTarget.cpp
@@ -34,6 +34,7 @@ using mlir::iree_compiler::IREE::Codegen::LoweringConfigAttr;
 namespace mlir::iree_compiler::AMDAIE {
 
 namespace {
+
 /// Lowers an hal.executable.variant operation to scalar/native-vector
 /// code. Invokes different compilation pipeline to
 /// - first lower to scalar/native-vector code
@@ -56,6 +57,9 @@ class AMDAIELowerExecutableTargetPass
   AMDAIELowerExecutableTargetPass() = default;
   AMDAIELowerExecutableTargetPass(
       const AMDAIELowerExecutableTargetPass &pass){};
+  AMDAIELowerExecutableTargetPass(
+      const AMDAIELowerExecutableTargetOptions &options)
+      : AMDAIELowerExecutableTargetBase(options) {}
 
   void runOnOperation() override;
 };
@@ -138,12 +142,17 @@ void AMDAIELowerExecutableTargetPass::runOnOperation() {
       case IREE::Codegen::DispatchLoweringPassPipeline::TransformDialectCodegen:
         addTransformDialectPasses(executableLoweringPipeline);
         break;
-      // TODO(avarma): Currently we are using "CPUDefault" but resorting to use
-      //               the default case. Will soon have corresponding AIE enum.
-      default:
+      case IREE::Codegen::DispatchLoweringPassPipeline::None: {
         TilingConfig tilingConfig = getTilingConfigForPipeline(moduleOp);
-        addPadBasedPassPipeline(executableLoweringPipeline, tilingConfig);
-        break;
+        if (usePassPipeline == AIEPassPipeline::SimplePackPipeline) {
+          addPackBasedPassPipeline(executableLoweringPipeline, tilingConfig);
+        } else if (usePassPipeline == AIEPassPipeline::PadPipeline) {
+          addPadBasedPassPipeline(executableLoweringPipeline, tilingConfig);
+        }
+      } break;
+      default:
+        variantOp.emitOpError("unhandled pass pipeline value set");
+        return signalPassFailure();
     }
   }
 
@@ -152,8 +161,9 @@ void AMDAIELowerExecutableTargetPass::runOnOperation() {
   }
 }
 
-std::unique_ptr<Pass> createAMDAIELowerExecutableTargetPass() {
-  return std::make_unique<AMDAIELowerExecutableTargetPass>();
+std::unique_ptr<Pass> createAMDAIELowerExecutableTargetPass(
+    AMDAIELowerExecutableTargetOptions options) {
+  return std::make_unique<AMDAIELowerExecutableTargetPass>(options);
 }
 
 }  // namespace mlir::iree_compiler::AMDAIE
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEPackAndTranspose.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEPackAndTranspose.cpp
index 5d4d81ded..28c33348e 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEPackAndTranspose.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEPackAndTranspose.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "iree-amd-aie/Transforms/Passes.h"
+#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 
@@ -30,11 +31,11 @@ struct PackConfig {
 static FailureOr<PackConfig> getPackConfig(RewriterBase &rewriter,
                                            int packLevel) {
   PackConfig config;
-  if (packLevel == 1) {
+  if (packLevel == 0) {
     // packed size for [M, N, K]
-    config.packedSizes = {rewriter.getI64IntegerAttr(16),
-                          rewriter.getI64IntegerAttr(64),
-                          rewriter.getI64IntegerAttr(64)};
+    config.packedSizes = {rewriter.getI64IntegerAttr(8),
+                          rewriter.getI64IntegerAttr(16),
+                          rewriter.getI64IntegerAttr(16)};
     // Transpose B matrix from [K N n k] to [K N k n]
     config.transposePackIndices = {1};
     // There is no corresponding unpack for the specified pack operation
@@ -42,7 +43,7 @@ static FailureOr<PackConfig> getPackConfig(RewriterBase &rewriter,
     config.unpackEmpty = {0};
     config.innerPerm = {{1, 0}};
     config.outerPerm = {{0, 1}};
-  } else if (packLevel == 2) {
+  } else if (packLevel == 1) {
     // packed size for [M, N, K, m, n, k]
     config.packedSizes = {
         rewriter.getI64IntegerAttr(0), rewriter.getI64IntegerAttr(0),
@@ -163,6 +164,12 @@ void AMDAIEPackAndTransposePass::runOnOperation() {
     // Update packed linalg op
     packedOp = packTransResult->transposedLinalgOp;
   }
+
+  // Get the lowering config from the previous linalgOp and add it to the
+  // packedOp
+  if (auto config = getLoweringConfig(linalgOp)) {
+    setLoweringConfig(packedOp, config);
+  }
 }
 
 }  // namespace
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETileAndFuse.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETileAndFuse.cpp
index d06ed89ea..4cfac19f1 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETileAndFuse.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETileAndFuse.cpp
@@ -63,6 +63,11 @@ static bool isTilingReductionDimension(TilingInterface consumerOp,
   return false;
 }
 
+static bool consumerToSkip(TilingInterface op) {
+  if (isa<linalg::CopyOp>(op) || isa<tensor::UnPackOp>(op)) return true;
+  return false;
+}
+
 LogicalResult applyTileAndFuse(RewriterBase &rewriter, TilingInterface rootOp,
                                DominanceInfo &dominanceInfo,
                                scf::SCFTileAndFuseOptions &tileAndFuseOptions) {
@@ -106,9 +111,9 @@ void AMDAIETileAndFusePass::runOnOperation() {
 
   TilingInterface consumerOp;
   funcOp->walk<WalkOrder::PostOrder, ReverseIterator>([&](TilingInterface op) {
-    // Find the next consumer op if it does not have loops OR if it is a
-    // linalg.copy op.
-    if (op.getLoopIteratorTypes().empty() || isa<linalg::CopyOp>(op))
+    // Find the next consumer op if it does not have loops OR it is from
+    // the skip ops list which currently contains linalg.copy and tensor.unpack.
+    if (op.getLoopIteratorTypes().empty() || consumerToSkip(op))
       return WalkResult::advance();
     consumerOp = op;
     return WalkResult::interrupt();
@@ -146,7 +151,7 @@ void AMDAIETileAndFusePass::runOnOperation() {
       getAsIndexOpFoldResult(context, tileSizesVal);
   auto options = scf::SCFTilingOptions().setTileSizes(tileSizes);
   // When tiling using scf.for we do not need to set any mapping.
-  if (tilingLevel != 2) {
+  if (!useSCFFor) {
     options.setMapping(
         {gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimY),
          gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimX)});
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp
index e3b3035be..8a252cc35 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp
@@ -19,7 +19,7 @@ namespace mlir::iree_compiler::AMDAIE {
 /// implements the contraction operation interface.
 static LogicalResult setRootConfig(func::FuncOp entryPointFn,
                                    linalg::MatmulOp matmulOp,
-                                   bool useUKernelStrategy) {
+                                   AIEPassPipeline usePassPipeline) {
   assert(!getLoweringConfig(matmulOp) && "expected lowering_config is not set");
   auto linalgOp = cast<linalg::LinalgOp>(matmulOp.getOperation());
   unsigned numLoops = linalgOp.getNumLoops();
@@ -35,34 +35,47 @@ static LogicalResult setRootConfig(func::FuncOp entryPointFn,
   // TODO (nmeshram) : This needs to be moved in a separate more generalized
   // logic. Also, need a flag to experiment between pad based and pack based
   // approach which will have different tile sizes and pass pipelines
-  TileSizesListType tileSizes;
-  if (useUKernelStrategy) {
-    SmallVector<int64_t> TileSizeLevel0 = {16, 64};
-    SmallVector<int64_t> TileSizeLevel1 = {0, 0, 64};
-    SmallVector<int64_t> TileSizeLevel2 = {1, 1};
-    tileSizes = {TileSizeLevel0, TileSizeLevel1, TileSizeLevel2};
-  } else {
+  if (usePassPipeline == AIEPassPipeline::PadPipeline) {
     SmallVector<int64_t> TileSizeLevel0 = {8, 8};
     SmallVector<int64_t> TileSizeLevel1 = {4, 4};
     SmallVector<int64_t> TileSizeLevel2 = {0, 0, 4};
-    tileSizes = {TileSizeLevel0, TileSizeLevel1, TileSizeLevel2};
+    TileSizesListType tileSizes = {TileSizeLevel0, TileSizeLevel1,
+                                   TileSizeLevel2};
+    return setOpConfigAndEntryPointFnTranslation(
+        entryPointFn, matmulOp, tileSizes,
+        IREE::Codegen::DispatchLoweringPassPipeline::None);
+  } else if (usePassPipeline == AIEPassPipeline::SimplePackPipeline) {
+    SmallVector<int64_t> TileSizeLevel0 = {8, 16};
+    SmallVector<int64_t> TileSizeLevel1 = {1, 1};
+    SmallVector<int64_t> TileSizeLevel2 = {0, 0, 1};
+    TileSizesListType tileSizes = {TileSizeLevel0, TileSizeLevel1,
+                                   TileSizeLevel2};
+    return setOpConfigAndEntryPointFnTranslation(
+        entryPointFn, matmulOp, tileSizes,
+        IREE::Codegen::DispatchLoweringPassPipeline::None);
+  } else if (usePassPipeline == AIEPassPipeline::PackPipeline) {
+    SmallVector<int64_t> TileSizeLevel0 = {16, 64};
+    SmallVector<int64_t> TileSizeLevel1 = {0, 0, 64};
+    SmallVector<int64_t> TileSizeLevel2 = {1, 1};
+    TileSizesListType tileSizes = {TileSizeLevel0, TileSizeLevel1,
+                                   TileSizeLevel2};
+    return setOpConfigAndEntryPointFnTranslation(
+        entryPointFn, matmulOp, tileSizes,
+        IREE::Codegen::DispatchLoweringPassPipeline::None);
   }
-
-  return setOpConfigAndEntryPointFnTranslation(
-      entryPointFn, matmulOp, tileSizes,
-      IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault);
+  return matmulOp.emitOpError("unhandled pass pipeline");
 }
 
 /// Redirects to methods that set the configuration based on operation type.
 static LogicalResult setRootConfigImpl(func::FuncOp entryPointFn, Operation *op,
-                                       bool useUKernelStrategy) {
+                                       AIEPassPipeline usePassPipeline) {
   auto setRootConfigFn = [&](Operation *op) -> LogicalResult {
     return TypeSwitch<Operation *, LogicalResult>(op)
         // TODO (nmeshram): This is very limited for now, plan is to
         // let it first crash for all the other ops and then consiously
         // add support for them, this way we can verify our work.
         .Case<linalg::MatmulOp>([&](auto op) {
-          return setRootConfig(entryPointFn, op, useUKernelStrategy);
+          return setRootConfig(entryPointFn, op, usePassPipeline);
         })
         .Default([&](Operation *op) { return success(); });
   };
@@ -72,7 +85,7 @@ static LogicalResult setRootConfigImpl(func::FuncOp entryPointFn, Operation *op,
 /// Sets the translation information to use for a dispatch region.
 static LogicalResult setTranslationInfoAndRootConfig(
     func::FuncOp entryPointFn, ArrayRef<Operation *> computeOps,
-    bool useUKernelStrategy) {
+    AIEPassPipeline usePassPipeline) {
   // Make sure that lowering_config is not preset on any compute ops.
   for (auto computeOp : computeOps) {
     if (getLoweringConfig(computeOp)) return failure();
@@ -87,8 +100,7 @@ static LogicalResult setTranslationInfoAndRootConfig(
     return entryPointFn.emitError("Case with no root ops not yet supported.");
   }
 
-  if (failed(
-          setRootConfigImpl(entryPointFn, rootOperation, useUKernelStrategy))) {
+  if (failed(setRootConfigImpl(entryPointFn, rootOperation, usePassPipeline))) {
     return failure();
   }
 
@@ -98,7 +110,8 @@ static LogicalResult setTranslationInfoAndRootConfig(
   return success();
 }
 
-LogicalResult initAIELaunchConfig(ModuleOp moduleOp, bool useUKernelStrategy) {
+LogicalResult initAIELaunchConfig(ModuleOp moduleOp,
+                                  AIEPassPipeline usePassPipeline) {
   llvm::StringMap<IREE::HAL::ExecutableExportOp> exportOps =
       getAllEntryPoints(moduleOp);
   for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
@@ -113,7 +126,7 @@ LogicalResult initAIELaunchConfig(ModuleOp moduleOp, bool useUKernelStrategy) {
 
     SmallVector<Operation *> computeOps = getComputeOps(funcOp);
     if (failed(setTranslationInfoAndRootConfig(funcOp, computeOps,
-                                               useUKernelStrategy))) {
+                                               usePassPipeline))) {
       return failure();
     }
   }
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h
index 0d40ed0d8..f71faf089 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h
@@ -12,8 +12,18 @@
 
 namespace mlir::iree_compiler::AMDAIE {
 
+/// Enum for pass pipelines to pick. Because of how the pass-pipeline
+/// enums are implemented using tablegen in IREE, it isnt extensible.
+/// This is an enum to pick different pass pipelines in IREE.
+enum class AIEPassPipeline : int32_t {
+  PadPipeline = 0,
+  PackPipeline = 1,
+  SimplePackPipeline = 2,
+  None = 3
+};
+
 LogicalResult initAIELaunchConfig(ModuleOp moduleOp,
-                                  bool useUKernelStrategy = false);
+                                  AIEPassPipeline usePassPipeline);
 
 }  // namespace mlir::iree_compiler::AMDAIE
 
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h
index 892626d76..ffa88fa2c 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h
@@ -1,4 +1,5 @@
-// Copyright 2023 The IREE Authors
+// Copyright 2023 The IREE Authors#include "irer"
+
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,6 +8,7 @@
 #ifndef IREE_AMD_AIE_TRANSFORMS_PASSDETAIL_H_
 #define IREE_AMD_AIE_TRANSFORMS_PASSDETAIL_H_
 
+#include "iree-amd-aie/Transforms/KernelDispatch.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -22,7 +24,7 @@ namespace mlir::iree_compiler::AMDAIE {
 #define GEN_PASS_DEF_AMDAIECLEANUP
 #define GEN_PASS_DEF_AMDAIEFUSEFILLINTOFORALL
 #define GEN_PASS_DEF_AMDAIELOWEREXECUTABLETARGET
-#define GEN_PASS_DEF_AMDAIEADDLOWERINGSTRATEGY
+#define GEN_PASS_DEF_AMDAIELOWERINGSTRATEGY
 #define GEN_PASS_DEF_AMDAIELOWERWORKGROUPCOUNT
 #define GEN_PASS_DEF_AMDAIEPACKANDTRANSPOSE
 #define GEN_PASS_DEF_AMDAIEPAD
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp
index e03aee37c..70c69f599 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp
@@ -22,6 +22,22 @@
 
 namespace mlir::iree_compiler::AMDAIE {
 
+/// Command line options used purely for development purposes. Not to be relied
+/// on in any way.
+static llvm::cl::opt<AIEPassPipeline> clUsePipeline(
+    "iree-amdaie-use-pipeline",
+    llvm::cl::desc("Pick the lowering pipeline to use"),
+    llvm::cl::values(
+        clEnumValN(AIEPassPipeline::PadPipeline, "pad",
+                   "Use IREE lowering to AIR dialect through pad operations"),
+        clEnumValN(
+            AIEPassPipeline::PackPipeline, "pack",
+            "Use the IREE lowering to AIR dialect through pack operation"),
+        clEnumValN(AIEPassPipeline::SimplePackPipeline, "simple-pack",
+                   "Use the simplified IREE lowering to AIR dialect through "
+                   "pack operation")),
+    llvm::cl::init(AIEPassPipeline::SimplePackPipeline));
+
 //===---------------------------------------------------------------------===//
 // Default allocation functions for AIE backend
 //===---------------------------------------------------------------------===//
@@ -103,11 +119,84 @@ void addPadBasedPassPipeline(OpPassManager &pm, TilingConfig &tilingConfig) {
   pm.addPass(createCSEPass());
 }
 
+void addPackBasedPassPipeline(OpPassManager &pm, TilingConfig &tilingConfig) {
+  auto &modulePassManager = pm.nest<ModuleOp>();
+  modulePassManager.addNestedPass<func::FuncOp>(createAMDAIECleanupPass());
+  pm.addPass(createCanonicalizerPass());
+  pm.addPass(createCSEPass());
+
+  AMDAIETileAndFuseOptions tileOptions;
+  AMDAIEPackAndTransposeOptions packOptions;
+  AMDAIEBufferizeToAllocationOptions bufferizeOptions;
+
+  // First level tiling using scf.forall
+  tileOptions.tilingLevel = 0;
+  tileOptions.useSCFFor = false;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIETileAndFusePass(tileOptions));
+  modulePassManager.addNestedPass<func::FuncOp>(createAMDAIECleanupPass());
+  modulePassManager.addPass(createCanonicalizerPass());
+  modulePassManager.addPass(createCSEPass());
+
+  // First level packing and bufferize to allocation
+  packOptions.packLevel = 0;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIEPackAndTransposePass(packOptions));
+  bufferizeOptions.memorySpace = 1;
+  bufferizeOptions.bufferizeLevel = -1;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIEBufferizeToAllocationPass(bufferizeOptions));
+
+  // Second level tiling using scf.forall
+  tileOptions.tilingLevel = 1;
+  tileOptions.useSCFFor = false;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIETileAndFusePass(tileOptions));
+  modulePassManager.addNestedPass<func::FuncOp>(createAMDAIECleanupPass());
+  modulePassManager.addPass(createCanonicalizerPass());
+  modulePassManager.addPass(createCSEPass());
+
+  // Fuse fill into forall loop
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIEFuseFillIntoForallPass());
+  modulePassManager.addNestedPass<func::FuncOp>(createAMDAIECleanupPass());
+  modulePassManager.addPass(createCanonicalizerPass());
+  modulePassManager.addPass(createCSEPass());
+
+  // Second level packing and bufferize to allocation
+  packOptions.packLevel = 1;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIEPackAndTransposePass(packOptions));
+  bufferizeOptions.memorySpace = 2;
+  bufferizeOptions.bufferizeLevel = -1;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIEBufferizeToAllocationPass(bufferizeOptions));
+
+  // Tile the reduction loops
+  tileOptions.tilingLevel = 2;
+  tileOptions.useSCFFor = true;
+  modulePassManager.addNestedPass<func::FuncOp>(
+      createAMDAIETileAndFusePass(tileOptions));
+  modulePassManager.addNestedPass<func::FuncOp>(createAMDAIECleanupPass());
+  modulePassManager.addPass(createCanonicalizerPass());
+  modulePassManager.addPass(createCSEPass());
+
+  // Comprehensive bufferization
+  addAMDAIEBufferizePasses(modulePassManager);
+}
+
 void buildAMDAIETransformPassPipeline(OpPassManager &pm) {
   addCommonTargetExecutablePreprocessingPasses(pm);
-  pm.addPass(createEraseHALDescriptorTypeFromMemRefPass());
-  pm.addPass(createAMDAIEAddLoweringStrategyPass());
-  pm.addPass(createAMDAIELowerExecutableTargetPass());
+  {
+    AMDAIELoweringStrategyOptions options;
+    options.usePassPipeline = clUsePipeline;
+    pm.addPass(createAMDAIELoweringStrategyPass(options));
+  }
+  {
+    AMDAIELowerExecutableTargetOptions options;
+    options.usePassPipeline = clUsePipeline;
+    pm.addPass(createAMDAIELowerExecutableTargetPass(options));
+  }
   pm.addPass(createAMDAIELowerWorkgroupCountPass());
 
   auto &modulePassManager = pm.nest<ModuleOp>();
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h
index 59b906dfe..1f9ecdb1f 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h
@@ -35,6 +35,10 @@ void buildAMDAIETransformPassPipeline(OpPassManager &pm);
 void addPadBasedPassPipeline(OpPassManager &passManager,
                              TilingConfig &tilingConfig);
 
+/// Populates passes needed to lower the IR via a Pack based approach.
+void addPackBasedPassPipeline(OpPassManager &passManager,
+                              TilingConfig &tilingConfig);
+
 /// Create a pass to do some rewrites that help bridging the path to AIR/AIE
 /// lowering.
 std::unique_ptr<Pass> createAMDAIEBridgeToAIRPass();
@@ -54,12 +58,13 @@ std::unique_ptr<Pass> createAMDAIEDecomposeLinalgExtPackUnPackToAIRPass();
 /// Create a pass to fuse the linalg.fill into the forall loops.
 std::unique_ptr<Pass> createAMDAIEFuseFillIntoForallPass();
 
-/// Create pass for adding lowering strategy configurations.
-std::unique_ptr<Pass> createAMDAIEAddLoweringStrategyPass(
-    AMDAIEAddLoweringStrategyOptions options = {});
-
 /// Create pass calling the dynamic pipeline for AMDAIE.
-std::unique_ptr<Pass> createAMDAIELowerExecutableTargetPass();
+std::unique_ptr<Pass> createAMDAIELowerExecutableTargetPass(
+    AMDAIELowerExecutableTargetOptions options = {});
+
+/// Create pass for adding lowering strategy configurations.
+std::unique_ptr<Pass> createAMDAIELoweringStrategyPass(
+    AMDAIELoweringStrategyOptions options = {});
 
 /// Create a pass to lower workgroup count region of entry point operations.
 std::unique_ptr<Pass> createAMDAIELowerWorkgroupCountPass();
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td
index bf99a5ecf..9bfe89df6 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td
@@ -58,20 +58,28 @@ def AMDAIEFuseFillIntoForall :
   let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEFuseFillIntoForallPass()";
 }
 
-def AMDAIEAddLoweringStrategy :
-    Pass<"iree-amdaie-add-lowering-strategy", "mlir::iree_compiler::IREE::HAL::ExecutableVariantOp"> {
-  let summary = "Add lowering strategy configurations to be used";
-  let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEAddLoweringStrategyPass()";
-  let options = [
-    Option<"useUKernelStrategy", "use-ukernel-strategy", "bool", /*default=*/"false",
-      "Whether to use the ukernel tiling strategy">  
-  ];
-}
-
 def AMDAIELowerExecutableTarget :
     Pass<"iree-amdaie-lower-executable-target", "mlir::iree_compiler::IREE::HAL::ExecutableVariantOp"> {
   let summary = "Perform lowering of executable target using one of the IREE::HAL::DispatchLoweringPassPipeline";
   let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIELowerExecutableTargetPass()";
+  let options = [
+    Option<"usePassPipeline", "use-pass-pipeline",
+      "mlir::iree_compiler::AMDAIE::AIEPassPipeline", 
+      /*default=*/"mlir::iree_compiler::AMDAIE::AIEPassPipeline::PadPipeline",
+      "Pass pipeline to use while lowering to AIR dialect">
+  ];
+}
+
+def AMDAIELoweringStrategy :
+    Pass<"iree-amdaie-lowering-strategy", "mlir::iree_compiler::IREE::HAL::ExecutableVariantOp"> {
+  let summary = "Add lowering strategy configurations to be used";
+  let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIELoweringStrategyPass()";
+  let options = [
+    Option<"usePassPipeline", "use-pass-pipeline",
+      "mlir::iree_compiler::AMDAIE::AIEPassPipeline", 
+      /*default=*/"mlir::iree_compiler::AMDAIE::AIEPassPipeline::PadPipeline",
+      "Pass pipeline to use while lowering to AIR dialect">  
+  ];
 }
 
 def AMDAIELowerWorkgroupCount :
@@ -86,7 +94,7 @@ def AMDAIEPackAndTranspose :
   let constructor =
       "mlir::iree_compiler::AMDAIE::createAMDAIEPackAndTransposePass()";
   let options = [
-    Option<"packLevel", "pack-level", "int64_t", /*default=*/"1",
+    Option<"packLevel", "pack-level", "int64_t", /*default=*/"-1",
       "Set the packing level number">
   ];
 }
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level1.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level1.mlir
index 42e14ab6a..01888c513 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level1.mlir
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level1.mlir
@@ -1,13 +1,13 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-amdaie-pack-and-transpose{pack-level=1}))' --split-input-file %s | FileCheck --check-prefix=CHECK-1 %s
+// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-amdaie-pack-and-transpose{pack-level=0}))' --split-input-file %s | FileCheck %s
 
 func.func @matmul_example_dispatch_0_matmul_16x256x256_i8xi8xi32(%arg0 : tensor<16x256xi8>, %arg1 : tensor<256x256xi8>) -> tensor<16x256xi32> {
   %c0 = arith.constant 0 : index
   %c0_i32 = arith.constant 0 : i32
   %5 = tensor.empty() : tensor<16x256xi32>
   %6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<16x256xi32>) -> tensor<16x256xi32>
-  // CHECK-1: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %{{.*}} : tensor<16x256xi8> -> tensor<1x4x16x64xi8>
-  // CHECK-1: tensor.pack %{{.*}} outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %{{.*}} : tensor<256x256xi8> -> tensor<4x4x64x64xi8>
-  // CHECK-1: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %{{.*}} : tensor<16x256xi32> -> tensor<1x4x16x64xi32>
+  // CHECK: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %{{.*}} : tensor<16x256xi8> -> tensor<2x16x8x16xi8>
+  // CHECK: tensor.pack %{{.*}} outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %{{.*}} : tensor<256x256xi8> -> tensor<16x16x16x16xi8>
+  // CHECK: tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %{{.*}} : tensor<16x256xi32> -> tensor<2x16x8x16xi32>
   %7 = linalg.matmul ins(%arg0, %arg1 : tensor<16x256xi8>, tensor<256x256xi8>) outs(%6 : tensor<16x256xi32>) -> tensor<16x256xi32>
   return %7 : tensor<16x256xi32>
 }
diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level2.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level2.mlir
index 99fa2a64b..7f6fef3dd 100644
--- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level2.mlir
+++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level2.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-amdaie-pack-and-transpose{pack-level=2}))' --split-input-file %s | FileCheck --check-prefix=CHECK-2 %s
+// RUN: iree-opt --pass-pipeline='builtin.module(func.func(iree-amdaie-pack-and-transpose{pack-level=1}))' --split-input-file %s | FileCheck %s
 
 #map = affine_map<(d0) -> (d0 * 16)>
 #map1 = affine_map<(d0) -> (d0 * 64)>
@@ -25,9 +25,9 @@ func.func @matmul_example_dispatch_0_matmul_16x256x256_i8xi8xi32(%arg0: tensor<1
       %extracted_slice_4 = tensor.extract_slice %pack_2[0, %arg4, 0, 0] [4, 1, 64, 64] [1, 1, 1, 1] : tensor<4x1x64x64xi8> to tensor<4x1x64x64xi8>
       %extracted_slice_5 = tensor.extract_slice %arg5[%arg3, %arg4, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32>
       %13 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_5 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32>
-      // CHECK-2: tensor.pack %{{.*}} outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %{{.*}} : tensor<1x4x16x64xi8> -> tensor<1x4x8x4x4x8xi8>
-      // CHECK-2: tensor.pack %{{.*}} outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [8, 8] into %{{.*}} : tensor<4x1x64x64xi8> -> tensor<4x1x8x8x8x8xi8>
-      // CHECK-2: tensor.pack %{{.*}} outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %{{.*}} : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
+      // CHECK: tensor.pack %{{.*}} outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %{{.*}} : tensor<1x4x16x64xi8> -> tensor<1x4x8x4x4x8xi8>
+      // CHECK: tensor.pack %{{.*}} outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [8, 8] into %{{.*}} : tensor<4x1x64x64xi8> -> tensor<4x1x8x8x8x8xi8>
+      // CHECK: tensor.pack %{{.*}} outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %{{.*}} : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
       %14 = linalg.generic {indexing_maps = [#map2, #map3, #map4], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%extracted_slice_3, %extracted_slice_4 : tensor<1x4x16x64xi8>, tensor<4x1x64x64xi8>) outs(%13 : tensor<1x1x16x64xi32>) {
       ^bb0(%in: i8, %in_6: i8, %out: i32):
         %15 = arith.extsi %in : i8 to i32
diff --git a/tests/samples/CMakeLists.txt b/tests/samples/CMakeLists.txt
index 18268c68c..09d0d7b52 100644
--- a/tests/samples/CMakeLists.txt
+++ b/tests/samples/CMakeLists.txt
@@ -8,8 +8,8 @@ iree_lit_test_suite(
   NAME
     lit
   SRCS
-    "matmul_fill_static_i32.mlir"
-    "matmul_fill_static_i32_config.mlir"
+    "pack_pipeline_e2e.mlir"
+    "pad_pipeline_e2e.mlir"
   TOOLS
     ${IREE_LLD_TARGET}
     FileCheck
diff --git a/tests/samples/matmul_fill_static_i32_config.mlir b/tests/samples/matmul_fill_static_i32_config.mlir
deleted file mode 100644
index 84669436b..000000000
--- a/tests/samples/matmul_fill_static_i32_config.mlir
+++ /dev/null
@@ -1,41 +0,0 @@
-// RUN: iree-opt %s --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" | FileCheck %s
-
-// This test case is just to demonstrate that TransformDialectCodegen is not being set
-// and the e2e still works.
-// CHECK-LABEL: hal.executable.export public @matmul_static_dispatch_0_matmul_8x8x16_i32
-//       CHECK:    aie.device(ipu)
-//       CHECK:    aie.shim_dma_allocation
-//       CHECK:    aie.shim_dma_allocation
-//       CHECK:    aie.shim_dma_allocation
-//       CHECK:    func.func @matmul_static_dispatch_0_matmul_8x8x16_i32(%arg0: memref<8x16xi32>, %arg1: memref<16x8xi32>, %arg2: memref<8x8xi32>)
-//       CHECK:      aiex.ipu.dma_memcpy_nd
-//       CHECK:      aiex.ipu.dma_memcpy_nd
-//       CHECK:      aiex.ipu.dma_memcpy_nd
-//       CHECK:      aiex.ipu.sync
-#config = #iree_codegen.lowering_config<tile_sizes = [[8, 8], [4, 4], [0, 0, 4]]>
-#translation = #iree_codegen.translation_info<CPUDefault>
-hal.executable private @matmul_static {
-  hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_arch = "chip-tbd"}>) {
-    hal.executable.export public @matmul_static_dispatch_0_matmul_8x8x16_i32 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) attributes {translation_info = #translation} {
-    ^bb0(%arg0: !hal.device):
-      %x, %y, %z = flow.dispatch.workgroup_count_from_slice 
-      hal.return %x, %y, %z : index, index, index
-    }
-    builtin.module {
-      func.func @matmul_static_dispatch_0_matmul_8x8x16_i32() {
-        %c0 = arith.constant 0 : index
-        %c0_i32 = arith.constant 0 : i32
-        %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<8x16xi32>>
-        %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<16x8xi32>>
-        %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x8xi32>>
-        %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [8, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<8x16xi32>> -> tensor<8x16xi32>
-        %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x8xi32>> -> tensor<16x8xi32>
-        %5 = tensor.empty() : tensor<8x8xi32>
-        %6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<8x8xi32>) -> tensor<8x8xi32>
-        %7 = linalg.matmul {lowering_config = #config} ins(%3, %4 : tensor<8x16xi32>, tensor<16x8xi32>) outs(%6 : tensor<8x8xi32>) -> tensor<8x8xi32>
-        flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [8, 8], strides = [1, 1] : tensor<8x8xi32> -> !flow.dispatch.tensor<writeonly:tensor<8x8xi32>>
-        return
-      }
-    }
-  }
-}
diff --git a/tests/samples/pack_pipeline_e2e.mlir b/tests/samples/pack_pipeline_e2e.mlir
new file mode 100644
index 000000000..6000db124
--- /dev/null
+++ b/tests/samples/pack_pipeline_e2e.mlir
@@ -0,0 +1,24 @@
+// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" | FileCheck %s --check-prefix=CPP
+
+// This test demonstrates Pack pipeline based e2e lowering.
+
+// To check the cpp path equivalent to the transform dialect script.
+// CPP-LABEL: hal.executable.export public @matmul_static_dispatch_0_matmul_8x32x16_i32
+//       CPP:    aie.device(ipu)
+//       CPP:    aie.shim_dma_allocation
+//       CPP:    aie.shim_dma_allocation
+//       CPP:    aie.shim_dma_allocation
+//       CPP:    func.func @matmul_static_dispatch_0_matmul_8x32x16_i32(%arg0: memref<8x16xi32>, %arg1: memref<16x32xi32>, %arg2: memref<8x32xi32>)
+//       CPP:      aiex.ipu.dma_memcpy_nd
+//       CPP:      aiex.ipu.dma_memcpy_nd
+//       CPP:      aiex.ipu.dma_memcpy_nd
+//       CPP:      aiex.ipu.sync
+func.func @matmul_static(%lhs : tensor<8x16xi32>,
+    %rhs : tensor<16x32xi32>) -> tensor<8x32xi32> {
+  %empty = tensor.empty() : tensor<8x32xi32>
+  %cst = arith.constant 0 : i32
+  %fill = linalg.fill ins(%cst : i32) outs(%empty : tensor<8x32xi32>) -> tensor<8x32xi32>
+  %2 = linalg.matmul ins(%lhs, %rhs : tensor<8x16xi32>, tensor<16x32xi32>)
+      outs(%fill : tensor<8x32xi32>) -> tensor<8x32xi32>
+  return %2 : tensor<8x32xi32>
+}
diff --git a/tests/samples/matmul_fill_static_i32.mlir b/tests/samples/pad_pipeline_e2e.mlir
similarity index 92%
rename from tests/samples/matmul_fill_static_i32.mlir
rename to tests/samples/pad_pipeline_e2e.mlir
index bf669807e..99c5de6a4 100644
--- a/tests/samples/matmul_fill_static_i32.mlir
+++ b/tests/samples/pad_pipeline_e2e.mlir
@@ -1,5 +1,7 @@
 // RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-codegen-transform-dialect-library=%S/matmul_fill_spec_pad.mlir | FileCheck %s --check-prefix=TRANSFORM
-// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" | FileCheck %s --check-prefix=CPP
+// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-use-pipeline=pad | FileCheck %s --check-prefix=CPP
+
+// This test demonstrates Pad pipeline based e2e lowering.
 
 // To check the transform dialect script path.
 // TRANSFORM-LABEL: hal.executable.export public @matmul_static_dispatch_0_matmul_8x8x16_i32