Skip to content

Commit

Permalink
[BACKEND] Dead code tmem_alloc that are not mutable (#6047)
Browse files Browse the repository at this point in the history
This sets the right side effects on tmem_alloc in order to have dead
code eliminication and CSE kick off.
  • Loading branch information
ThomasRaoux authored Feb 27, 2025
1 parent 9f4c7fc commit 3fe035c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def TTNG_TMEMStoreOp : TTNG_Op<"tmem_store", [MemoryEffects<[MemWrite]>]> {
let hasVerifier = 1;
}

def TTNG_TMEMAllocOp : TTNG_Op<"tmem_alloc", [MemoryEffects<[MemWrite]>]> {
def TTNG_TMEMAllocOp : TTNG_Op<"tmem_alloc", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
let summary = "allocate tensor memory";
let description = [{
This operation allocates buffer in tensor memory and return a descriptor
Expand Down
19 changes: 19 additions & 0 deletions lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,25 @@ LogicalResult TMEMAllocOp::verify() {
return success();
}

// TMEMAllocOp
void TMEMAllocOp::getEffects(
SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
&effects) {
Operation *op = getOperation();
// If allocation is immutable, mark it as no side effect allow things like
// CSE, DCE to work in early compiler passes.
// After the memory offset is computed, we attach the true side effect to the
// op.
if (!getType().getMutableMemory() && !op->hasAttr("tensor_memory_col_offset"))
return;
effects.emplace_back(MemoryEffects::Allocate::get(),
mlir::triton::nvidia_gpu::TensorMemory::get());
if (getSrc())
effects.emplace_back(MemoryEffects::Write::get(),
getOperation()->getOpResult(0),
mlir::triton::nvidia_gpu::TensorMemory::get());
}

bool isDescendingOrder(triton::gpu::MemDescType type) {
auto order = triton::gpu::getOrder(type);
auto rank = type.getRank();
Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/TritonNvidiaGPU/Transforms/MMALowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ struct TCGen5MMAScaleSharedToTmemConversion
cast<SwizzledSharedEncodingAttr>(oldType.getEncoding());
CTALayoutAttr CTALayout = getCTALayout(oldEncoding);
ArrayRef<unsigned> CTASplitNum = CTALayout.getCTASplitNum();
ArrayRef<int64_t> shape = oldType.getAllocShape();
ArrayRef<int64_t> shape = oldType.getShape();
Attribute scaleEncoding = TensorMemoryScalesEncodingAttr::get(
context, CTASplitNum[0], CTASplitNum[1]);
Type scaleAType =
Expand Down

0 comments on commit 3fe035c

Please sign in to comment.