Skip to content

Commit

Permalink
[vecz] Clear getelementptr flags.
Browse files Browse the repository at this point in the history
If a getelementptr has inbounds/nusw/nuw flags, it is valid for the
result to be out of bounds or to wrap so long as the result is not used
to access any object. However, if we vectorize, multiple elements will
all be loaded based on the first element's address, and in that we may
add accesses, so we need to clear the inbounds/nusw/nuw flags unless we
can prove they are still valid.
  • Loading branch information
hvdijk committed Sep 11, 2024
1 parent 4acf063 commit 00ed327
Show file tree
Hide file tree
Showing 22 changed files with 88 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,14 @@ void UniformValueResult::markVaryingValues(Value *V, Value *From) {
markVaryingValues(Alloca);
}
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(VIns)) {
// We need to clear the flags because the initial address may be out of
// bounds but masked out.
#if LLVM_VERSION_GREATER_EQUAL(19, 0)
GEP->setNoWrapFlags(GEPNoWrapFlags::none());
#else
GEP->setIsInBounds(false);
#endif

// Same as with the stores
AllocaInst *Alloca = findAllocaFromPointer(GEP->getPointerOperand());
if (Alloca) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ entry:
; CHECK-LABEL: @__vecz_nxv4_vector_broadcast_const(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: store <vscale x 16 x float> shufflevector (<vscale x 16 x float> insertelement (<vscale x 16 x float> {{(undef|poison)}}, float 0x7FF8000020000000, {{(i32|i64)}} 0), <vscale x 16 x float> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer), ptr addrspace(1) [[ARRAYIDX3]], align 16
; CHECK-NEXT: ret void

Expand All @@ -113,17 +113,17 @@ entry:
; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds float, ptr [[FIXLEN_ALLOC]], <vscale x 16 x i64> [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{(i32|i64)}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x float> undef)
; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x float>, ptr addrspace(1) [[ARRAYIDX]], align 16
; CHECK-NEXT: [[TMP4:%.*]] = fadd <vscale x 16 x float> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: store <vscale x 16 x float> [[TMP4]], ptr addrspace(1) [[ARRAYIDX3]], align 16
; CHECK-NEXT: ret void

; CHECK-LABEL: @__vecz_nxv4_vector_broadcast_regression(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i32>, ptr addrspace(1) [[ARRAYIDX]], align 16
; CHECK-NEXT: [[AND1_I_I_I1_I1:%.*]] = and <vscale x 16 x i32> [[TMP1]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 2139095040, {{i32|i64}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
; CHECK-NEXT: [[CMP_I_I_I2_I2:%.*]] = icmp ne <vscale x 16 x i32> [[AND1_I_I_I1_I1]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 2139095040, {{i32|i64}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
Expand All @@ -132,7 +132,7 @@ entry:
; CHECK-NEXT: [[TMP2:%.*]] = or <vscale x 16 x i1> [[CMP_I_I_I2_I2]], [[CMP3_I_I_I4_I4]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <vscale x 16 x i32> [[TMP1]] to <vscale x 16 x float>
; CHECK-NEXT: [[TMP4:%.*]] = select <vscale x 16 x i1> [[TMP2]], <vscale x 16 x float> [[TMP3]], <vscale x 16 x float> shufflevector (<vscale x 16 x float> insertelement (<vscale x 16 x float> {{(undef|poison)}}, float 0x7FF0000020000000, {{i32|i64}} 0), <vscale x 16 x float> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: store <vscale x 16 x float> [[TMP4]], ptr addrspace(1) [[ARRAYIDX3]], align 16
; CHECK-NEXT: ret void
;
Expand All @@ -156,12 +156,12 @@ entry:
; CHECK-NEXT: [[TMP2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[IDX14]] to <vscale x 16 x i64>
; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds i32, ptr [[FIXLEN_ALLOC]], <vscale x 16 x i64> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{i32|i64}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i32> {{(undef|poison)}})
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]]
; CHECK-NEXT: store <vscale x 16 x i32> [[TMP3]], ptr addrspace(1) [[ARRAYIDX4]], align 16
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[TMP6:%.*]] = load <vscale x 16 x float>, ptr addrspace(1) [[ARRAYIDX]], align 16
; CHECK-NEXT: [[V46:%.*]] = fadd <vscale x 16 x float> [[TMP6]], [[TMP1]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
; CHECK-NEXT: store <vscale x 16 x float> [[V46]], ptr addrspace(1) [[ARRAYIDX3]], align 16
; CHECK-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ entry:
; EE: [[ALLOC:%.*]] = alloca <vscale x 16 x float>, align 64
; EE: store <vscale x 16 x float> {{.*}}, ptr [[ALLOC]], align 64
; EE: [[IDX:%.*]] = sext i32 %idx to i64
; EE: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], i64 [[IDX]]
; EE: [[GATHER:%.*]] = call <vscale x 4 x float> @__vecz_b_interleaved_load4_4_u5nxv4fu3ptr(ptr nonnull [[ADDR]])
; EE: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], i64 [[IDX]]
; EE: [[GATHER:%.*]] = call <vscale x 4 x float> @__vecz_b_interleaved_load4_4_u5nxv4fu3ptr(ptr [[ADDR]])

; Both the vector and index are uniform, so check we're not unnecessarily packetizing

Expand All @@ -120,21 +120,21 @@ entry:
; LLVM 16 deduces add/or equivalence and uses `or` instead.
; EE-UNI-VEC: [[T7:%.*]] = {{add|or}} {{(disjoint )?}}<vscale x 4 x i64> [[T6]], [[MOD]]

; EE-UNI-VEC: [[T8:%.*]] = getelementptr inbounds float, ptr {{%.*}}, <vscale x 4 x i64> [[T7]]
; EE-UNI-VEC: [[T8:%.*]] = getelementptr float, ptr {{%.*}}, <vscale x 4 x i64> [[T7]]
; EE-UNI-VEC: [[T9:%.*]] = call <vscale x 4 x float> @__vecz_b_gather_load4_u5nxv4fu9nxv4u3ptr(<vscale x 4 x ptr> [[T8]])
; EE-UNI-VEC: store <vscale x 4 x float> [[T9]], ptr addrspace(1) {{%.*}}, align 4

; EE-INDICES-LABEL: @__vecz_nxv4_extract_element_varying_indices(
; EE-INDICES: [[ALLOC:%.*]] = alloca <vscale x 16 x float>, align 64
; EE-INDICES: [[T0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) %idxs, i64 %call
; EE-INDICES: [[T0:%.*]] = getelementptr i32, ptr addrspace(1) %idxs, i64 %call
; EE-INDICES: [[T2:%.*]] = load <vscale x 4 x i32>, ptr addrspace(1) [[T0]], align 4
; EE-INDICES: [[T3:%.*]] = and <vscale x 4 x i32> [[T2]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> {{(undef|poison)}}, i32 3, {{i32|i64}} 0), <vscale x 4 x i32> {{(undef|poison)}}, <vscale x 4 x i32> zeroinitializer)
; EE-INDICES: store <vscale x 16 x float> {{.*}}, ptr [[ALLOC]], align 64
; EE-INDICES: [[STEP:%.*]] = call <vscale x 4 x i32> @llvm.{{(experimental\.)?}}stepvector.nxv4i32()
; EE-INDICES: [[T4:%.*]] = shl <vscale x 4 x i32> [[STEP]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> {{(undef|poison)}}, i32 2, {{i32|i64}} 0), <vscale x 4 x i32> {{(undef|poison)}}, <vscale x 4 x i32> zeroinitializer)
; EE-INDICES: [[T5:%.*]] = {{add|or}} {{(disjoint )?}}<vscale x 4 x i32> [[T4]], [[T3]]
; EE-INDICES: [[IDX:%.*]] = sext <vscale x 4 x i32> [[T5]] to <vscale x 4 x i64>
; EE-INDICES: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], <vscale x 4 x i64> [[IDX]]
; EE-INDICES: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], <vscale x 4 x i64> [[IDX]]
; EE-INDICES: [[GATHER:%.*]] = call <vscale x 4 x float> @__vecz_b_gather_load4_u5nxv4fu9nxv4u3ptr(<vscale x 4 x ptr> [[ADDR]])

; Check we promote from i1 to i8 before doing our memops
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ entry:
; IE: [[VAL1:%.*]] = shufflevector <vscale x 4 x float> [[VAL0]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
; IE: store <vscale x 16 x float> {{.*}}, ptr [[ALLOC]], align 64
; IE: [[IDX:%.*]] = sext i32 %idx to i64
; IE: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], i64 [[IDX]]
; IE: call void @__vecz_b_interleaved_store4_4_u5nxv4fu3ptr(<vscale x 4 x float> [[VAL1]], ptr nonnull [[ADDR]])
; IE: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], i64 [[IDX]]
; IE: call void @__vecz_b_interleaved_store4_4_u5nxv4fu3ptr(<vscale x 4 x float> [[VAL1]], ptr [[ADDR]])
; IE: = load <vscale x 16 x float>, ptr [[ALLOC]], align 64

; Both the vector and index are uniform, so check we're not unnecessarily packetizing
Expand All @@ -105,7 +105,7 @@ entry:
; IE-INDICES: [[T3:%.*]] = {{add|or}} {{(disjoint )?}}<vscale x 4 x i32> [[T2]], {{%.*}}

; IE-INDICES: [[T4:%.*]] = sext <vscale x 4 x i32> [[T3]] to <vscale x 4 x i64>
; IE-INDICES: [[ADDR:%.*]] = getelementptr inbounds float, ptr %0, <vscale x 4 x i64> [[T4]]
; IE-INDICES: [[ADDR:%.*]] = getelementptr float, ptr %0, <vscale x 4 x i64> [[T4]]
; IE-INDICES: call void @__vecz_b_scatter_store4_u5nxv4fu9nxv4u3ptr(<vscale x 4 x float> [[VAL]], <vscale x 4 x ptr> [[ADDR]])
; IE-INDICES: = load <vscale x 16 x float>, ptr [[ALLOC]], align 64

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ if.end:
; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>

; CHECK: [[t1:%.*]] = getelementptr inbounds i8, ptr {{.*}}, <vscale x 16 x i64> [[idx2]]
; CHECK: [[t1:%.*]] = getelementptr i8, ptr {{.*}}, <vscale x 16 x i64> [[idx2]]
; CHECK: [[t2:%.*]] = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> [[t1]],
; CHECK: [[splat:%.*]] = trunc <vscale x 16 x i8> [[t2]] to <vscale x 16 x i1>
; CHECK: call void @__vecz_b_masked_store16_u6nxv16ju3ptru6nxv16b(<vscale x 16 x i32> {{.*}}, ptr %arrayidxz, <vscale x 16 x i1> [[splat]])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ entry:
; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
; CHECK: [[sext2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 8 x i32> [[idx1]] to <vscale x 8 x i64>

; CHECK: [[addrs:%.*]] = getelementptr inbounds i8, ptr [[alloc]], <vscale x 8 x i64> [[sext2]]
; CHECK: [[addrs:%.*]] = getelementptr i8, ptr [[alloc]], <vscale x 8 x i64> [[sext2]]
; CHECK: [[gather:%.*]] = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> [[addrs]],
; CHECK: [[cmp:%.*]] = trunc <vscale x 8 x i8> [[gather]] to <vscale x 8 x i1>
; CHECK: [[sel:%.*]] = select <vscale x 8 x i1> [[cmp]], <vscale x 8 x i32> [[rhs]], <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> {{(undef|poison)}}, i32 4, {{(i32|i64)}} 0), <vscale x 8 x i32> {{(undef|poison)}}, <vscale x 8 x i32> zeroinitializer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ define spir_kernel void @do_shuffle_splat(i32* %aptr, <4 x i32>* %bptr, <4 x i32
; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>

; CHECK: [[alloc:%.*]] = getelementptr inbounds i32, ptr %{{.*}}, <vscale x 16 x i64> [[idx2]]
; CHECK: [[alloc:%.*]] = getelementptr i32, ptr %{{.*}}, <vscale x 16 x i64> [[idx2]]
; CHECK: [[splat:%.*]] = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> [[alloc]],
; CHECK: store <vscale x 16 x i32> [[splat]], ptr
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ define spir_kernel void @get_sub_group_local_id(i32 addrspace(1)* %in, i32 addrs
; CHECK: [[STEPVEC:%.*]] = call <vscale x 4 x i32> @llvm.{{(experimental\.)?}}stepvector.nxv4i32()
; CHECK: [[LID:%.*]] = add <vscale x 4 x i32> [[SPLAT]], [[STEPVEC]]
; CHECK: [[EXT:%.*]] = sext i32 %call to i64
; CHECK: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 [[EXT]]
; CHECK: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 [[EXT]]
; CHECK: store <vscale x 4 x i32> [[LID]], ptr addrspace(1) %arrayidx
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ declare i64 @__mux_get_global_id(i32)
; CHECK: for.body:
; CHECK: %add = add nsw i32 %storemerge, %a
; CHECK: %idxprom = sext i32 %add2 to i64
; CHECK: %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
; CHECK: %arrayidx = getelementptr i32, ptr %b, i64 %idxprom
; CHECK: store i32 %add, ptr %arrayidx, align 4
; CHECK: %inc = add nsw i32 %storemerge, 1
; CHECK: br label %for.cond
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ declare i64 @__mux_get_global_id(i32) #1

; CHECK: define spir_kernel void @__vecz_v4_extract_constant_index
; CHECK: call <4 x float> @__vecz_b_interleaved_load4_4_Dv4
; CHECK: getelementptr inbounds float
; CHECK: getelementptr float
; CHECK: store <4 x float>
; CHECK: ret void
6 changes: 3 additions & 3 deletions modules/compiler/vecz/test/lit/llvm/load_add_store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ entry:
ret void
; CHECK-LABEL: @__vecz_v4_load_add_store(ptr %aptr, ptr %bptr, ptr %zptr)
; CHECK: %idx = call i64 @__mux_get_global_id(i32 0)
; CHECK: %arrayidxa = getelementptr inbounds i32, ptr %aptr, i64 %idx
; CHECK: %arrayidxb = getelementptr inbounds i32, ptr %bptr, i64 %idx
; CHECK: %arrayidxz = getelementptr inbounds i32, ptr %zptr, i64 %idx
; CHECK: %arrayidxa = getelementptr i32, ptr %aptr, i64 %idx
; CHECK: %arrayidxb = getelementptr i32, ptr %bptr, i64 %idx
; CHECK: %arrayidxz = getelementptr i32, ptr %zptr, i64 %idx
; CHECK: %[[TMP0:.*]] = load <4 x i32>, ptr %arrayidxa, align 4
; CHECK: %[[TMP1:.*]] = load <4 x i32>, ptr %arrayidxb, align 4
; CHECK: %sum1 = add <4 x i32> %[[TMP0]], %[[TMP1]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ declare i64 @__mux_get_global_id(i32)
; CHECK: br i1 %cmp, label %if.then, label %if.else

; CHECK: if.then:
; CHECK: %[[GEP1:.+]] = getelementptr inbounds i32, ptr %b, <4 x i64>
; CHECK: %[[GEP1:.+]] = getelementptr i32, ptr %b, <4 x i64>
; CHECK: store <4 x i32> <i32 11, i32 11, i32 11, i32 11>, ptr %{{.+}}, align 4
; CHECK: br label %if.end

; CHECK: if.else:
; CHECK: %[[GEP2:.+]] = getelementptr inbounds i32, ptr %b, <4 x i64>
; CHECK: %[[GEP2:.+]] = getelementptr i32, ptr %b, <4 x i64>
; CHECK: store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr %{{.+}}, align 4
; CHECK: br label %if.end

Expand Down
4 changes: 2 additions & 2 deletions modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ declare i64 @__mux_get_global_id(i32)
; Check if we can packetize GEPs on structs
; Note that we only need to packetize the non-uniform operands..
; CHECK: define spir_kernel void @__vecz_v4_test
; CHECK: getelementptr inbounds %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
; CHECK: getelementptr inbounds %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
; CHECK: getelementptr %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
; CHECK: getelementptr %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ declare i64 @__mux_get_global_id(i32)
; CHECK: IR Dump After Simplify masked memory operations{{( on __vecz_v2_foo)?}}
; CHECK-NEXT: define spir_kernel void @__vecz_v2_foo(ptr addrspace(1) %out) #0 {
; CHECK-NEXT: %idx = call i64 @__mux_get_global_id(i32 0)
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %idx
; CHECK-NEXT: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 %idx
; CHECK-NEXT: store i32 0, ptr addrspace(1) %arrayidx, align 4
; CHECK-NEXT: ret void
; CHECK-NEXT: }

; CHECK: define spir_kernel void @__vecz_v2_foo(ptr addrspace(1) %out) {{.*}} {
; CHECK-NEXT: %idx = call i64 @__mux_get_global_id(i32 0)
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %idx
; CHECK-NEXT: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 %idx
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(1) %arrayidx, align 4
; CHECK-NEXT: ret void
; CHECK-NEXT: }
Expand Down
Loading

0 comments on commit 00ed327

Please sign in to comment.