[vecz] Clear getelementptr flags.

If a getelementptr has inbounds/nusw/nuw flags, it is valid for the result to be out of bounds or to wrap so long as the result is not used to access any object. However, if we vectorize, multiple elements will all be loaded based on the first element's address, and in that we may add accesses, so we need to clear the inbounds/nusw/nuw flags unless we can prove they are still valid.
uxlfoundation · Sep 11, 2024 · 00ed327 · 00ed327
1 parent 4acf063
commit 00ed327
Show file tree

Hide file tree

Showing 22 changed files with 88 additions and 80 deletions.
diff --git a/modules/compiler/vecz/source/analysis/uniform_value_analysis.cpp b/modules/compiler/vecz/source/analysis/uniform_value_analysis.cpp
@@ -385,6 +385,14 @@ void UniformValueResult::markVaryingValues(Value *V, Value *From) {
       markVaryingValues(Alloca);
     }
   } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(VIns)) {
+    // We need to clear the flags because the initial address may be out of
+    // bounds but masked out.
+#if LLVM_VERSION_GREATER_EQUAL(19, 0)
+    GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+#else
+    GEP->setIsInBounds(false);
+#endif
+
     // Same as with the stores
     AllocaInst *Alloca = findAllocaFromPointer(GEP->getPointerOperand());
     if (Alloca) {

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/broadcast_vector.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/broadcast_vector.ll
@@ -99,7 +99,7 @@ entry:
 ; CHECK-LABEL: @__vecz_nxv4_vector_broadcast_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    store <vscale x 16 x float> shufflevector (<vscale x 16 x float> insertelement (<vscale x 16 x float> {{(undef|poison)}}, float 0x7FF8000020000000, {{(i32|i64)}} 0), <vscale x 16 x float> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer), ptr addrspace(1) [[ARRAYIDX3]], align 16
 ; CHECK-NEXT:    ret void
 
@@ -113,17 +113,17 @@ entry:
 ; CHECK-NEXT:    [[VEC_ALLOC:%.*]] = getelementptr inbounds float, ptr [[FIXLEN_ALLOC]], <vscale x 16 x i64> [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{(i32|i64)}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x float> undef)
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <vscale x 16 x float>, ptr addrspace(1) [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <vscale x 16 x float> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    store <vscale x 16 x float> [[TMP4]], ptr addrspace(1) [[ARRAYIDX3]], align 16
 ; CHECK-NEXT:    ret void
 
 ; CHECK-LABEL: @__vecz_nxv4_vector_broadcast_regression(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0)
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 16 x i32>, ptr addrspace(1) [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[AND1_I_I_I1_I1:%.*]] = and <vscale x 16 x i32> [[TMP1]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 2139095040, {{i32|i64}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[CMP_I_I_I2_I2:%.*]] = icmp ne <vscale x 16 x i32> [[AND1_I_I_I1_I1]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> {{(undef|poison)}}, i32 2139095040, {{i32|i64}} 0), <vscale x 16 x i32> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
@@ -132,7 +132,7 @@ entry:
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <vscale x 16 x i1> [[CMP_I_I_I2_I2]], [[CMP3_I_I_I4_I4]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <vscale x 16 x i32> [[TMP1]] to <vscale x 16 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <vscale x 16 x i1> [[TMP2]], <vscale x 16 x float> [[TMP3]], <vscale x 16 x float> shufflevector (<vscale x 16 x float> insertelement (<vscale x 16 x float> {{(undef|poison)}}, float 0x7FF0000020000000, {{i32|i64}} 0), <vscale x 16 x float> {{(undef|poison)}}, <vscale x 16 x i32> zeroinitializer)
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    store <vscale x 16 x float> [[TMP4]], ptr addrspace(1) [[ARRAYIDX3]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -156,12 +156,12 @@ entry:
 ; CHECK-NEXT:    [[TMP2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[IDX14]] to <vscale x 16 x i64>
 ; CHECK-NEXT:    [[VEC_ALLOC:%.*]] = getelementptr inbounds i32, ptr [[FIXLEN_ALLOC]], <vscale x 16 x i64> [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> [[VEC_ALLOC]], i32 4, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, {{i32|i64}} 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i32> {{(undef|poison)}})
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    store <vscale x 16 x i32> [[TMP3]], ptr addrspace(1) [[ARRAYIDX4]], align 16
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load <vscale x 16 x float>, ptr addrspace(1) [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[V46:%.*]] = fadd <vscale x 16 x float> [[TMP6]], [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]]
 ; CHECK-NEXT:    store <vscale x 16 x float> [[V46]], ptr addrspace(1) [[ARRAYIDX3]], align 16
 ; CHECK-NEXT:    ret void
 ;

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/extract_element.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/extract_element.ll
@@ -95,8 +95,8 @@ entry:
 ; EE: [[ALLOC:%.*]] = alloca <vscale x 16 x float>, align 64
 ; EE: store <vscale x 16 x float> {{.*}}, ptr [[ALLOC]], align 64
 ; EE: [[IDX:%.*]] = sext i32 %idx to i64
-; EE: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], i64 [[IDX]]
-; EE: [[GATHER:%.*]] = call <vscale x 4 x float> @__vecz_b_interleaved_load4_4_u5nxv4fu3ptr(ptr nonnull [[ADDR]])
+; EE: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], i64 [[IDX]]
+; EE: [[GATHER:%.*]] = call <vscale x 4 x float> @__vecz_b_interleaved_load4_4_u5nxv4fu3ptr(ptr [[ADDR]])
 
 ; Both the vector and index are uniform, so check we're not unnecessarily packetizing 
 
@@ -120,21 +120,21 @@ entry:
 ; LLVM 16 deduces add/or equivalence and uses `or` instead.
 ; EE-UNI-VEC: [[T7:%.*]] = {{add|or}} {{(disjoint )?}}<vscale x 4 x i64> [[T6]], [[MOD]]
 
-; EE-UNI-VEC: [[T8:%.*]] = getelementptr inbounds float, ptr {{%.*}}, <vscale x 4 x i64> [[T7]]
+; EE-UNI-VEC: [[T8:%.*]] = getelementptr float, ptr {{%.*}}, <vscale x 4 x i64> [[T7]]
 ; EE-UNI-VEC: [[T9:%.*]] = call <vscale x 4 x float> @__vecz_b_gather_load4_u5nxv4fu9nxv4u3ptr(<vscale x 4 x ptr> [[T8]])
 ; EE-UNI-VEC: store <vscale x 4 x float> [[T9]], ptr addrspace(1) {{%.*}}, align 4
 
 ; EE-INDICES-LABEL: @__vecz_nxv4_extract_element_varying_indices(
 ; EE-INDICES: [[ALLOC:%.*]] = alloca <vscale x 16 x float>, align 64
-; EE-INDICES: [[T0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) %idxs, i64 %call
+; EE-INDICES: [[T0:%.*]] = getelementptr i32, ptr addrspace(1) %idxs, i64 %call
 ; EE-INDICES: [[T2:%.*]] = load <vscale x 4 x i32>, ptr addrspace(1) [[T0]], align 4
 ; EE-INDICES: [[T3:%.*]] = and <vscale x 4 x i32> [[T2]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> {{(undef|poison)}}, i32 3, {{i32|i64}} 0), <vscale x 4 x i32> {{(undef|poison)}}, <vscale x 4 x i32> zeroinitializer)
 ; EE-INDICES: store <vscale x 16 x float> {{.*}}, ptr [[ALLOC]], align 64
 ; EE-INDICES: [[STEP:%.*]] = call <vscale x 4 x i32> @llvm.{{(experimental\.)?}}stepvector.nxv4i32()
 ; EE-INDICES: [[T4:%.*]] = shl <vscale x 4 x i32> [[STEP]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> {{(undef|poison)}}, i32 2, {{i32|i64}} 0), <vscale x 4 x i32> {{(undef|poison)}}, <vscale x 4 x i32> zeroinitializer)
 ; EE-INDICES: [[T5:%.*]] = {{add|or}} {{(disjoint )?}}<vscale x 4 x i32> [[T4]], [[T3]]
 ; EE-INDICES: [[IDX:%.*]] = sext <vscale x 4 x i32> [[T5]] to <vscale x 4 x i64>
-; EE-INDICES: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], <vscale x 4 x i64> [[IDX]]
+; EE-INDICES: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], <vscale x 4 x i64> [[IDX]]
 ; EE-INDICES: [[GATHER:%.*]] = call <vscale x 4 x float> @__vecz_b_gather_load4_u5nxv4fu9nxv4u3ptr(<vscale x 4 x ptr> [[ADDR]])
 
 ; Check we promote from i1 to i8 before doing our memops

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/insert_element.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/insert_element.ll
@@ -85,8 +85,8 @@ entry:
 ; IE: [[VAL1:%.*]] = shufflevector <vscale x 4 x float> [[VAL0]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 ; IE: store <vscale x 16 x float> {{.*}}, ptr [[ALLOC]], align 64
 ; IE: [[IDX:%.*]] = sext i32 %idx to i64
-; IE: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], i64 [[IDX]]
-; IE: call void @__vecz_b_interleaved_store4_4_u5nxv4fu3ptr(<vscale x 4 x float> [[VAL1]], ptr nonnull [[ADDR]])
+; IE: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], i64 [[IDX]]
+; IE: call void @__vecz_b_interleaved_store4_4_u5nxv4fu3ptr(<vscale x 4 x float> [[VAL1]], ptr [[ADDR]])
 ; IE: = load <vscale x 16 x float>, ptr [[ALLOC]], align 64
 
 ; Both the vector and index are uniform, so check we're not unnecessarily packetizing
@@ -105,7 +105,7 @@ entry:
 ; IE-INDICES: [[T3:%.*]] = {{add|or}} {{(disjoint )?}}<vscale x 4 x i32> [[T2]], {{%.*}}
 
 ; IE-INDICES: [[T4:%.*]] = sext <vscale x 4 x i32> [[T3]] to <vscale x 4 x i64>
-; IE-INDICES: [[ADDR:%.*]] = getelementptr inbounds float, ptr %0, <vscale x 4 x i64> [[T4]]
+; IE-INDICES: [[ADDR:%.*]] = getelementptr float, ptr %0, <vscale x 4 x i64> [[T4]]
 ; IE-INDICES: call void @__vecz_b_scatter_store4_u5nxv4fu9nxv4u3ptr(<vscale x 4 x float> [[VAL]], <vscale x 4 x ptr> [[ADDR]])
 ; IE-INDICES: = load <vscale x 16 x float>, ptr [[ALLOC]], align 64
 

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/packetize_mask_varying.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/packetize_mask_varying.ll
@@ -45,7 +45,7 @@ if.end:
 ; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
 ; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>
 
-; CHECK: [[t1:%.*]] = getelementptr inbounds i8, ptr {{.*}}, <vscale x 16 x i64> [[idx2]]
+; CHECK: [[t1:%.*]] = getelementptr i8, ptr {{.*}}, <vscale x 16 x i64> [[idx2]]
 ; CHECK: [[t2:%.*]] = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> [[t1]],
 ; CHECK: [[splat:%.*]] = trunc <vscale x 16 x i8> [[t2]] to <vscale x 16 x i1>
 ; CHECK: call void @__vecz_b_masked_store16_u6nxv16ju3ptru6nxv16b(<vscale x 16 x i32> {{.*}}, ptr %arrayidxz, <vscale x 16 x i1> [[splat]])

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/select_scalar_vector.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/select_scalar_vector.ll
@@ -50,7 +50,7 @@ entry:
 ; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
 ; CHECK: [[sext2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 8 x i32> [[idx1]] to <vscale x 8 x i64>
 
-; CHECK: [[addrs:%.*]] = getelementptr inbounds i8, ptr [[alloc]], <vscale x 8 x i64> [[sext2]]
+; CHECK: [[addrs:%.*]] = getelementptr i8, ptr [[alloc]], <vscale x 8 x i64> [[sext2]]
 ; CHECK: [[gather:%.*]] = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> [[addrs]],
 ; CHECK: [[cmp:%.*]] = trunc <vscale x 8 x i8> [[gather]] to <vscale x 8 x i1>
 ; CHECK: [[sel:%.*]] = select <vscale x 8 x i1> [[cmp]], <vscale x 8 x i32> [[rhs]], <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> {{(undef|poison)}}, i32 4, {{(i32|i64)}} 0), <vscale x 8 x i32> {{(undef|poison)}}, <vscale x 8 x i32> zeroinitializer)

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/shuffle.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/shuffle.ll
@@ -39,7 +39,7 @@ define spir_kernel void @do_shuffle_splat(i32* %aptr, <4 x i32>* %bptr, <4 x i32
 ; make any difference whether it's a zext or sext, but LLVM 16 prefers zext.
 ; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} <vscale x 16 x i32> [[idx1]] to <vscale x 16 x i64>
 
-; CHECK: [[alloc:%.*]] = getelementptr inbounds i32, ptr %{{.*}}, <vscale x 16 x i64> [[idx2]]
+; CHECK: [[alloc:%.*]] = getelementptr i32, ptr %{{.*}}, <vscale x 16 x i64> [[idx2]]
 ; CHECK: [[splat:%.*]] = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> [[alloc]],
 ; CHECK: store <vscale x 16 x i32> [[splat]], ptr
 }

diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/subgroup_builtins.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/subgroup_builtins.ll
@@ -54,7 +54,7 @@ define spir_kernel void @get_sub_group_local_id(i32 addrspace(1)* %in, i32 addrs
 ; CHECK: [[STEPVEC:%.*]] = call <vscale x 4 x i32> @llvm.{{(experimental\.)?}}stepvector.nxv4i32()
 ; CHECK: [[LID:%.*]] = add <vscale x 4 x i32> [[SPLAT]], [[STEPVEC]]
 ; CHECK: [[EXT:%.*]] = sext i32 %call to i64
-; CHECK: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 [[EXT]]
+; CHECK: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 [[EXT]]
 ; CHECK: store <vscale x 4 x i32> [[LID]], ptr addrspace(1) %arrayidx
 }
 

diff --git a/modules/compiler/vecz/test/lit/llvm/control_flow_conversion_uniform_loop.ll b/modules/compiler/vecz/test/lit/llvm/control_flow_conversion_uniform_loop.ll
@@ -167,7 +167,7 @@ declare i64 @__mux_get_global_id(i32)
 ; CHECK: for.body:
 ; CHECK: %add = add nsw i32 %storemerge, %a
 ; CHECK: %idxprom = sext i32 %add2 to i64
-; CHECK: %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+; CHECK: %arrayidx = getelementptr i32, ptr %b, i64 %idxprom
 ; CHECK: store i32 %add, ptr %arrayidx, align 4
 ; CHECK: %inc = add nsw i32 %storemerge, 1
 ; CHECK: br label %for.cond

diff --git a/modules/compiler/vecz/test/lit/llvm/extractelement_constant_index.ll b/modules/compiler/vecz/test/lit/llvm/extractelement_constant_index.ll
@@ -35,6 +35,6 @@ declare i64 @__mux_get_global_id(i32) #1
 
 ; CHECK: define spir_kernel void @__vecz_v4_extract_constant_index
 ; CHECK: call <4 x float> @__vecz_b_interleaved_load4_4_Dv4
-; CHECK: getelementptr inbounds float
+; CHECK: getelementptr float
 ; CHECK: store <4 x float>
 ; CHECK: ret void
diff --git a/modules/compiler/vecz/test/lit/llvm/load_add_store.ll b/modules/compiler/vecz/test/lit/llvm/load_add_store.ll
@@ -34,9 +34,9 @@ entry:
   ret void
 ; CHECK-LABEL: @__vecz_v4_load_add_store(ptr %aptr, ptr %bptr, ptr %zptr)
 ; CHECK: %idx = call i64 @__mux_get_global_id(i32 0)
-; CHECK: %arrayidxa = getelementptr inbounds i32, ptr %aptr, i64 %idx
-; CHECK: %arrayidxb = getelementptr inbounds i32, ptr %bptr, i64 %idx
-; CHECK: %arrayidxz = getelementptr inbounds i32, ptr %zptr, i64 %idx
+; CHECK: %arrayidxa = getelementptr i32, ptr %aptr, i64 %idx
+; CHECK: %arrayidxb = getelementptr i32, ptr %bptr, i64 %idx
+; CHECK: %arrayidxz = getelementptr i32, ptr %zptr, i64 %idx
 ; CHECK: %[[TMP0:.*]] = load <4 x i32>, ptr %arrayidxa, align 4
 ; CHECK: %[[TMP1:.*]] = load <4 x i32>, ptr %arrayidxb, align 4
 ; CHECK: %sum1 = add <4 x i32> %[[TMP0]], %[[TMP1]]

diff --git a/modules/compiler/vecz/test/lit/llvm/packetization_uniform_branch.ll b/modules/compiler/vecz/test/lit/llvm/packetization_uniform_branch.ll
@@ -91,12 +91,12 @@ declare i64 @__mux_get_global_id(i32)
 ; CHECK: br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then:
-; CHECK: %[[GEP1:.+]] = getelementptr inbounds i32, ptr %b, <4 x i64>
+; CHECK: %[[GEP1:.+]] = getelementptr i32, ptr %b, <4 x i64>
 ; CHECK: store <4 x i32> <i32 11, i32 11, i32 11, i32 11>, ptr %{{.+}}, align 4
 ; CHECK: br label %if.end
 
 ; CHECK: if.else:
-; CHECK: %[[GEP2:.+]] = getelementptr inbounds i32, ptr %b, <4 x i64>
+; CHECK: %[[GEP2:.+]] = getelementptr i32, ptr %b, <4 x i64>
 ; CHECK: store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr %{{.+}}, align 4
 ; CHECK: br label %if.end
 

diff --git a/modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll b/modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll
@@ -42,5 +42,5 @@ declare i64 @__mux_get_global_id(i32)
 ; Check if we can packetize GEPs on structs
 ; Note that we only need to packetize the non-uniform operands..
 ; CHECK: define spir_kernel void @__vecz_v4_test
-; CHECK: getelementptr inbounds %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
-; CHECK: getelementptr inbounds %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
+; CHECK: getelementptr %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
+; CHECK: getelementptr %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2
diff --git a/modules/compiler/vecz/test/lit/llvm/pass_pipeline_printafter.ll b/modules/compiler/vecz/test/lit/llvm/pass_pipeline_printafter.ll
@@ -25,14 +25,14 @@ declare i64 @__mux_get_global_id(i32)
 ; CHECK: IR Dump After Simplify masked memory operations{{( on __vecz_v2_foo)?}}
 ; CHECK-NEXT: define spir_kernel void @__vecz_v2_foo(ptr addrspace(1) %out) #0 {
 ; CHECK-NEXT:   %idx = call i64 @__mux_get_global_id(i32 0)
-; CHECK-NEXT:   %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %idx
+; CHECK-NEXT:   %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 %idx
 ; CHECK-NEXT:   store i32 0, ptr addrspace(1) %arrayidx, align 4
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 
 ; CHECK: define spir_kernel void @__vecz_v2_foo(ptr addrspace(1) %out) {{.*}} {
 ; CHECK-NEXT:   %idx = call i64 @__mux_get_global_id(i32 0)
-; CHECK-NEXT:   %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %idx
+; CHECK-NEXT:   %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 %idx
 ; CHECK-NEXT:   store <2 x i32> zeroinitializer, ptr addrspace(1) %arrayidx, align 4
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }