From 00ed327740b24be8877c2b259e5205713ea5db99 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Wed, 11 Sep 2024 15:20:56 +0100 Subject: [PATCH] [vecz] Clear getelementptr flags. If a getelementptr has inbounds/nusw/nuw flags, it is valid for the result to be out of bounds or to wrap so long as the result is not used to access any object. However, if we vectorize, multiple elements will all be loaded based on the first element's address, and in that we may add accesses, so we need to clear the inbounds/nusw/nuw flags unless we can prove they are still valid. --- .../analysis/uniform_value_analysis.cpp | 8 +++ .../llvm/ScalableVectors/broadcast_vector.ll | 16 +++--- .../llvm/ScalableVectors/extract_element.ll | 10 ++-- .../llvm/ScalableVectors/insert_element.ll | 6 +-- .../ScalableVectors/packetize_mask_varying.ll | 2 +- .../ScalableVectors/select_scalar_vector.ll | 2 +- .../test/lit/llvm/ScalableVectors/shuffle.ll | 2 +- .../llvm/ScalableVectors/subgroup_builtins.ll | 2 +- .../control_flow_conversion_uniform_loop.ll | 2 +- .../lit/llvm/extractelement_constant_index.ll | 2 +- .../vecz/test/lit/llvm/load_add_store.ll | 6 +-- .../lit/llvm/packetization_uniform_branch.ll | 4 +- .../test/lit/llvm/packetize_struct_gep.ll | 4 +- .../test/lit/llvm/pass_pipeline_printafter.ll | 4 +- .../lit/llvm/scalarization_instructions.ll | 50 +++++++++---------- .../vecz/test/lit/llvm/scalarize_mixed_gep.ll | 2 +- .../test/lit/llvm/squash_float2_gather.ll | 6 +-- .../vecz/test/lit/llvm/stride_analysis.ll | 28 +++++------ .../vecz/test/lit/llvm/subgroup_broadcast.ll | 2 +- .../vecz/test/lit/llvm/subgroup_builtins.ll | 2 +- .../test/lit/llvm/uniform_address_base.ll | 4 +- .../test/lit/llvm/uniform_address_index.ll | 4 +- 22 files changed, 88 insertions(+), 80 deletions(-) diff --git a/modules/compiler/vecz/source/analysis/uniform_value_analysis.cpp b/modules/compiler/vecz/source/analysis/uniform_value_analysis.cpp index ff64eccda..558d22525 100644 --- a/modules/compiler/vecz/source/analysis/uniform_value_analysis.cpp +++ b/modules/compiler/vecz/source/analysis/uniform_value_analysis.cpp @@ -385,6 +385,14 @@ void UniformValueResult::markVaryingValues(Value *V, Value *From) { markVaryingValues(Alloca); } } else if (GetElementPtrInst *GEP = dyn_cast(VIns)) { + // We need to clear the flags because the initial address may be out of + // bounds but masked out. +#if LLVM_VERSION_GREATER_EQUAL(19, 0) + GEP->setNoWrapFlags(GEPNoWrapFlags::none()); +#else + GEP->setIsInBounds(false); +#endif + // Same as with the stores AllocaInst *Alloca = findAllocaFromPointer(GEP->getPointerOperand()); if (Alloca) { diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/broadcast_vector.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/broadcast_vector.ll index 3fdb36fd0..519bb696c 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/broadcast_vector.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/broadcast_vector.ll @@ -99,7 +99,7 @@ entry: ; CHECK-LABEL: @__vecz_nxv4_vector_broadcast_const( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0) -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] ; CHECK-NEXT: store shufflevector ( insertelement ( {{(undef|poison)}}, float 0x7FF8000020000000, {{(i32|i64)}} 0), {{(undef|poison)}}, zeroinitializer), ptr addrspace(1) [[ARRAYIDX3]], align 16 ; CHECK-NEXT: ret void @@ -113,17 +113,17 @@ entry: ; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds float, ptr [[FIXLEN_ALLOC]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.masked.gather.nxv16f32.nxv16p0( [[VEC_ALLOC]], i32 4, shufflevector ( insertelement ( poison, i1 true, {{(i32|i64)}} 0), poison, zeroinitializer), undef) ; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0) -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]] ; CHECK-NEXT: [[TMP3:%.*]] = load , ptr addrspace(1) [[ARRAYIDX]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = fadd [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] ; CHECK-NEXT: store [[TMP4]], ptr addrspace(1) [[ARRAYIDX3]], align 16 ; CHECK-NEXT: ret void ; CHECK-LABEL: @__vecz_nxv4_vector_broadcast_regression( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @__mux_get_global_id(i32 0) -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]] ; CHECK-NEXT: [[TMP1:%.*]] = load , ptr addrspace(1) [[ARRAYIDX]], align 16 ; CHECK-NEXT: [[AND1_I_I_I1_I1:%.*]] = and [[TMP1]], shufflevector ( insertelement ( {{(undef|poison)}}, i32 2139095040, {{i32|i64}} 0), {{(undef|poison)}}, zeroinitializer) ; CHECK-NEXT: [[CMP_I_I_I2_I2:%.*]] = icmp ne [[AND1_I_I_I1_I1]], shufflevector ( insertelement ( {{(undef|poison)}}, i32 2139095040, {{i32|i64}} 0), {{(undef|poison)}}, zeroinitializer) @@ -132,7 +132,7 @@ entry: ; CHECK-NEXT: [[TMP2:%.*]] = or [[CMP_I_I_I2_I2]], [[CMP3_I_I_I4_I4]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast [[TMP1]] to ; CHECK-NEXT: [[TMP4:%.*]] = select [[TMP2]], [[TMP3]], shufflevector ( insertelement ( {{(undef|poison)}}, float 0x7FF0000020000000, {{i32|i64}} 0), {{(undef|poison)}}, zeroinitializer) -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] ; CHECK-NEXT: store [[TMP4]], ptr addrspace(1) [[ARRAYIDX3]], align 16 ; CHECK-NEXT: ret void ; @@ -156,12 +156,12 @@ entry: ; CHECK-NEXT: [[TMP2:%.*]] = {{s|z}}ext{{( nneg)?}} [[IDX14]] to ; CHECK-NEXT: [[VEC_ALLOC:%.*]] = getelementptr inbounds i32, ptr [[FIXLEN_ALLOC]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.gather.nxv16i32.nxv16p0( [[VEC_ALLOC]], i32 4, shufflevector ( insertelement ( poison, i1 true, {{i32|i64}} 0), poison, zeroinitializer), {{(undef|poison)}}) -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr <4 x i32>, ptr addrspace(1) [[OUT2:%.*]], i64 [[CALL]] ; CHECK-NEXT: store [[TMP3]], ptr addrspace(1) [[ARRAYIDX4]], align 16 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN:%.*]], i64 [[CALL]] ; CHECK-NEXT: [[TMP6:%.*]] = load , ptr addrspace(1) [[ARRAYIDX]], align 16 ; CHECK-NEXT: [[V46:%.*]] = fadd [[TMP6]], [[TMP1]] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[OUT:%.*]], i64 [[CALL]] ; CHECK-NEXT: store [[V46]], ptr addrspace(1) [[ARRAYIDX3]], align 16 ; CHECK-NEXT: ret void ; diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/extract_element.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/extract_element.ll index d5a92d13b..91c989df3 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/extract_element.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/extract_element.ll @@ -95,8 +95,8 @@ entry: ; EE: [[ALLOC:%.*]] = alloca , align 64 ; EE: store {{.*}}, ptr [[ALLOC]], align 64 ; EE: [[IDX:%.*]] = sext i32 %idx to i64 -; EE: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], i64 [[IDX]] -; EE: [[GATHER:%.*]] = call @__vecz_b_interleaved_load4_4_u5nxv4fu3ptr(ptr nonnull [[ADDR]]) +; EE: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], i64 [[IDX]] +; EE: [[GATHER:%.*]] = call @__vecz_b_interleaved_load4_4_u5nxv4fu3ptr(ptr [[ADDR]]) ; Both the vector and index are uniform, so check we're not unnecessarily packetizing @@ -120,13 +120,13 @@ entry: ; LLVM 16 deduces add/or equivalence and uses `or` instead. ; EE-UNI-VEC: [[T7:%.*]] = {{add|or}} {{(disjoint )?}} [[T6]], [[MOD]] -; EE-UNI-VEC: [[T8:%.*]] = getelementptr inbounds float, ptr {{%.*}}, [[T7]] +; EE-UNI-VEC: [[T8:%.*]] = getelementptr float, ptr {{%.*}}, [[T7]] ; EE-UNI-VEC: [[T9:%.*]] = call @__vecz_b_gather_load4_u5nxv4fu9nxv4u3ptr( [[T8]]) ; EE-UNI-VEC: store [[T9]], ptr addrspace(1) {{%.*}}, align 4 ; EE-INDICES-LABEL: @__vecz_nxv4_extract_element_varying_indices( ; EE-INDICES: [[ALLOC:%.*]] = alloca , align 64 -; EE-INDICES: [[T0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) %idxs, i64 %call +; EE-INDICES: [[T0:%.*]] = getelementptr i32, ptr addrspace(1) %idxs, i64 %call ; EE-INDICES: [[T2:%.*]] = load , ptr addrspace(1) [[T0]], align 4 ; EE-INDICES: [[T3:%.*]] = and [[T2]], shufflevector ( insertelement ( {{(undef|poison)}}, i32 3, {{i32|i64}} 0), {{(undef|poison)}}, zeroinitializer) ; EE-INDICES: store {{.*}}, ptr [[ALLOC]], align 64 @@ -134,7 +134,7 @@ entry: ; EE-INDICES: [[T4:%.*]] = shl [[STEP]], shufflevector ( insertelement ( {{(undef|poison)}}, i32 2, {{i32|i64}} 0), {{(undef|poison)}}, zeroinitializer) ; EE-INDICES: [[T5:%.*]] = {{add|or}} {{(disjoint )?}} [[T4]], [[T3]] ; EE-INDICES: [[IDX:%.*]] = sext [[T5]] to -; EE-INDICES: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], [[IDX]] +; EE-INDICES: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], [[IDX]] ; EE-INDICES: [[GATHER:%.*]] = call @__vecz_b_gather_load4_u5nxv4fu9nxv4u3ptr( [[ADDR]]) ; Check we promote from i1 to i8 before doing our memops diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/insert_element.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/insert_element.ll index 8b4bbb459..b2dcb47b5 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/insert_element.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/insert_element.ll @@ -85,8 +85,8 @@ entry: ; IE: [[VAL1:%.*]] = shufflevector [[VAL0]], poison, zeroinitializer ; IE: store {{.*}}, ptr [[ALLOC]], align 64 ; IE: [[IDX:%.*]] = sext i32 %idx to i64 -; IE: [[ADDR:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], i64 [[IDX]] -; IE: call void @__vecz_b_interleaved_store4_4_u5nxv4fu3ptr( [[VAL1]], ptr nonnull [[ADDR]]) +; IE: [[ADDR:%.*]] = getelementptr float, ptr [[ALLOC]], i64 [[IDX]] +; IE: call void @__vecz_b_interleaved_store4_4_u5nxv4fu3ptr( [[VAL1]], ptr [[ADDR]]) ; IE: = load , ptr [[ALLOC]], align 64 ; Both the vector and index are uniform, so check we're not unnecessarily packetizing @@ -105,7 +105,7 @@ entry: ; IE-INDICES: [[T3:%.*]] = {{add|or}} {{(disjoint )?}} [[T2]], {{%.*}} ; IE-INDICES: [[T4:%.*]] = sext [[T3]] to -; IE-INDICES: [[ADDR:%.*]] = getelementptr inbounds float, ptr %0, [[T4]] +; IE-INDICES: [[ADDR:%.*]] = getelementptr float, ptr %0, [[T4]] ; IE-INDICES: call void @__vecz_b_scatter_store4_u5nxv4fu9nxv4u3ptr( [[VAL]], [[ADDR]]) ; IE-INDICES: = load , ptr [[ALLOC]], align 64 diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/packetize_mask_varying.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/packetize_mask_varying.ll index da232d896..61682c1ba 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/packetize_mask_varying.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/packetize_mask_varying.ll @@ -45,7 +45,7 @@ if.end: ; make any difference whether it's a zext or sext, but LLVM 16 prefers zext. ; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} [[idx1]] to -; CHECK: [[t1:%.*]] = getelementptr inbounds i8, ptr {{.*}}, [[idx2]] +; CHECK: [[t1:%.*]] = getelementptr i8, ptr {{.*}}, [[idx2]] ; CHECK: [[t2:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[t1]], ; CHECK: [[splat:%.*]] = trunc [[t2]] to ; CHECK: call void @__vecz_b_masked_store16_u6nxv16ju3ptru6nxv16b( {{.*}}, ptr %arrayidxz, [[splat]]) diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/select_scalar_vector.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/select_scalar_vector.ll index 0d2fb8b16..f4fa88cb1 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/select_scalar_vector.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/select_scalar_vector.ll @@ -50,7 +50,7 @@ entry: ; make any difference whether it's a zext or sext, but LLVM 16 prefers zext. ; CHECK: [[sext2:%.*]] = {{s|z}}ext{{( nneg)?}} [[idx1]] to -; CHECK: [[addrs:%.*]] = getelementptr inbounds i8, ptr [[alloc]], [[sext2]] +; CHECK: [[addrs:%.*]] = getelementptr i8, ptr [[alloc]], [[sext2]] ; CHECK: [[gather:%.*]] = call @llvm.masked.gather.nxv8i8.nxv8p0( [[addrs]], ; CHECK: [[cmp:%.*]] = trunc [[gather]] to ; CHECK: [[sel:%.*]] = select [[cmp]], [[rhs]], shufflevector ( insertelement ( {{(undef|poison)}}, i32 4, {{(i32|i64)}} 0), {{(undef|poison)}}, zeroinitializer) diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/shuffle.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/shuffle.ll index 332769b3a..074502779 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/shuffle.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/shuffle.ll @@ -39,7 +39,7 @@ define spir_kernel void @do_shuffle_splat(i32* %aptr, <4 x i32>* %bptr, <4 x i32 ; make any difference whether it's a zext or sext, but LLVM 16 prefers zext. ; CHECK: [[idx2:%.*]] = {{s|z}}ext{{( nneg)?}} [[idx1]] to -; CHECK: [[alloc:%.*]] = getelementptr inbounds i32, ptr %{{.*}}, [[idx2]] +; CHECK: [[alloc:%.*]] = getelementptr i32, ptr %{{.*}}, [[idx2]] ; CHECK: [[splat:%.*]] = call @llvm.masked.gather.nxv16i32.nxv16p0( [[alloc]], ; CHECK: store [[splat]], ptr } diff --git a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/subgroup_builtins.ll b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/subgroup_builtins.ll index 6100a08e8..19093a2f1 100644 --- a/modules/compiler/vecz/test/lit/llvm/ScalableVectors/subgroup_builtins.ll +++ b/modules/compiler/vecz/test/lit/llvm/ScalableVectors/subgroup_builtins.ll @@ -54,7 +54,7 @@ define spir_kernel void @get_sub_group_local_id(i32 addrspace(1)* %in, i32 addrs ; CHECK: [[STEPVEC:%.*]] = call @llvm.{{(experimental\.)?}}stepvector.nxv4i32() ; CHECK: [[LID:%.*]] = add [[SPLAT]], [[STEPVEC]] ; CHECK: [[EXT:%.*]] = sext i32 %call to i64 -; CHECK: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 [[EXT]] +; CHECK: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 [[EXT]] ; CHECK: store [[LID]], ptr addrspace(1) %arrayidx } diff --git a/modules/compiler/vecz/test/lit/llvm/control_flow_conversion_uniform_loop.ll b/modules/compiler/vecz/test/lit/llvm/control_flow_conversion_uniform_loop.ll index 2e9d56243..f3081873d 100644 --- a/modules/compiler/vecz/test/lit/llvm/control_flow_conversion_uniform_loop.ll +++ b/modules/compiler/vecz/test/lit/llvm/control_flow_conversion_uniform_loop.ll @@ -167,7 +167,7 @@ declare i64 @__mux_get_global_id(i32) ; CHECK: for.body: ; CHECK: %add = add nsw i32 %storemerge, %a ; CHECK: %idxprom = sext i32 %add2 to i64 -; CHECK: %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom +; CHECK: %arrayidx = getelementptr i32, ptr %b, i64 %idxprom ; CHECK: store i32 %add, ptr %arrayidx, align 4 ; CHECK: %inc = add nsw i32 %storemerge, 1 ; CHECK: br label %for.cond diff --git a/modules/compiler/vecz/test/lit/llvm/extractelement_constant_index.ll b/modules/compiler/vecz/test/lit/llvm/extractelement_constant_index.ll index 60efefe3b..f01721884 100644 --- a/modules/compiler/vecz/test/lit/llvm/extractelement_constant_index.ll +++ b/modules/compiler/vecz/test/lit/llvm/extractelement_constant_index.ll @@ -35,6 +35,6 @@ declare i64 @__mux_get_global_id(i32) #1 ; CHECK: define spir_kernel void @__vecz_v4_extract_constant_index ; CHECK: call <4 x float> @__vecz_b_interleaved_load4_4_Dv4 -; CHECK: getelementptr inbounds float +; CHECK: getelementptr float ; CHECK: store <4 x float> ; CHECK: ret void diff --git a/modules/compiler/vecz/test/lit/llvm/load_add_store.ll b/modules/compiler/vecz/test/lit/llvm/load_add_store.ll index abcbc465a..436540a51 100644 --- a/modules/compiler/vecz/test/lit/llvm/load_add_store.ll +++ b/modules/compiler/vecz/test/lit/llvm/load_add_store.ll @@ -34,9 +34,9 @@ entry: ret void ; CHECK-LABEL: @__vecz_v4_load_add_store(ptr %aptr, ptr %bptr, ptr %zptr) ; CHECK: %idx = call i64 @__mux_get_global_id(i32 0) -; CHECK: %arrayidxa = getelementptr inbounds i32, ptr %aptr, i64 %idx -; CHECK: %arrayidxb = getelementptr inbounds i32, ptr %bptr, i64 %idx -; CHECK: %arrayidxz = getelementptr inbounds i32, ptr %zptr, i64 %idx +; CHECK: %arrayidxa = getelementptr i32, ptr %aptr, i64 %idx +; CHECK: %arrayidxb = getelementptr i32, ptr %bptr, i64 %idx +; CHECK: %arrayidxz = getelementptr i32, ptr %zptr, i64 %idx ; CHECK: %[[TMP0:.*]] = load <4 x i32>, ptr %arrayidxa, align 4 ; CHECK: %[[TMP1:.*]] = load <4 x i32>, ptr %arrayidxb, align 4 ; CHECK: %sum1 = add <4 x i32> %[[TMP0]], %[[TMP1]] diff --git a/modules/compiler/vecz/test/lit/llvm/packetization_uniform_branch.ll b/modules/compiler/vecz/test/lit/llvm/packetization_uniform_branch.ll index 0b448e8f8..7218a621c 100644 --- a/modules/compiler/vecz/test/lit/llvm/packetization_uniform_branch.ll +++ b/modules/compiler/vecz/test/lit/llvm/packetization_uniform_branch.ll @@ -91,12 +91,12 @@ declare i64 @__mux_get_global_id(i32) ; CHECK: br i1 %cmp, label %if.then, label %if.else ; CHECK: if.then: -; CHECK: %[[GEP1:.+]] = getelementptr inbounds i32, ptr %b, <4 x i64> +; CHECK: %[[GEP1:.+]] = getelementptr i32, ptr %b, <4 x i64> ; CHECK: store <4 x i32> , ptr %{{.+}}, align 4 ; CHECK: br label %if.end ; CHECK: if.else: -; CHECK: %[[GEP2:.+]] = getelementptr inbounds i32, ptr %b, <4 x i64> +; CHECK: %[[GEP2:.+]] = getelementptr i32, ptr %b, <4 x i64> ; CHECK: store <4 x i32> , ptr %{{.+}}, align 4 ; CHECK: br label %if.end diff --git a/modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll b/modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll index 49b67c205..5046107c4 100644 --- a/modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll +++ b/modules/compiler/vecz/test/lit/llvm/packetize_struct_gep.ll @@ -42,5 +42,5 @@ declare i64 @__mux_get_global_id(i32) ; Check if we can packetize GEPs on structs ; Note that we only need to packetize the non-uniform operands.. ; CHECK: define spir_kernel void @__vecz_v4_test -; CHECK: getelementptr inbounds %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2 -; CHECK: getelementptr inbounds %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2 +; CHECK: getelementptr %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2 +; CHECK: getelementptr %struct.T, ptr addrspace(1) %{{.+}}, <4 x i64> %{{.+}}, i32 2 diff --git a/modules/compiler/vecz/test/lit/llvm/pass_pipeline_printafter.ll b/modules/compiler/vecz/test/lit/llvm/pass_pipeline_printafter.ll index 43079dfa9..d861f5f93 100644 --- a/modules/compiler/vecz/test/lit/llvm/pass_pipeline_printafter.ll +++ b/modules/compiler/vecz/test/lit/llvm/pass_pipeline_printafter.ll @@ -25,14 +25,14 @@ declare i64 @__mux_get_global_id(i32) ; CHECK: IR Dump After Simplify masked memory operations{{( on __vecz_v2_foo)?}} ; CHECK-NEXT: define spir_kernel void @__vecz_v2_foo(ptr addrspace(1) %out) #0 { ; CHECK-NEXT: %idx = call i64 @__mux_get_global_id(i32 0) -; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %idx +; CHECK-NEXT: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 %idx ; CHECK-NEXT: store i32 0, ptr addrspace(1) %arrayidx, align 4 ; CHECK-NEXT: ret void ; CHECK-NEXT: } ; CHECK: define spir_kernel void @__vecz_v2_foo(ptr addrspace(1) %out) {{.*}} { ; CHECK-NEXT: %idx = call i64 @__mux_get_global_id(i32 0) -; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %idx +; CHECK-NEXT: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 %idx ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(1) %arrayidx, align 4 ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/modules/compiler/vecz/test/lit/llvm/scalarization_instructions.ll b/modules/compiler/vecz/test/lit/llvm/scalarization_instructions.ll index 1940065a6..f286d5c81 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalarization_instructions.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalarization_instructions.ll @@ -78,20 +78,20 @@ entry: ; CHECK: store i32 %[[ADD2]], ptr %[[C_1]] ; CHECK: store i32 %[[ADD3]], ptr %[[C_2]] ; CHECK: store i32 %[[ADD4]], ptr %[[C_3]] -; CHECK: %arrayidx3 = getelementptr inbounds <4 x i32>, ptr %a, i64 1 -; CHECK: %[[A1_0:.+]] = getelementptr inbounds i32, ptr %arrayidx3, i32 0 -; CHECK: %[[A1_1:.+]] = getelementptr inbounds i32, ptr %arrayidx3, i32 1 -; CHECK: %[[A1_2:.+]] = getelementptr inbounds i32, ptr %arrayidx3, i32 2 -; CHECK: %[[A1_3:.+]] = getelementptr inbounds i32, ptr %arrayidx3, i32 3 +; CHECK: %arrayidx3 = getelementptr <4 x i32>, ptr %a, i64 1 +; CHECK: %[[A1_0:.+]] = getelementptr i32, ptr %arrayidx3, i32 0 +; CHECK: %[[A1_1:.+]] = getelementptr i32, ptr %arrayidx3, i32 1 +; CHECK: %[[A1_2:.+]] = getelementptr i32, ptr %arrayidx3, i32 2 +; CHECK: %[[A1_3:.+]] = getelementptr i32, ptr %arrayidx3, i32 3 ; CHECK: %[[LA1_0:.+]] = load i32, ptr %[[A1_0]] ; CHECK: %[[LA1_1:.+]] = load i32, ptr %[[A1_1]] ; CHECK: %[[LA1_2:.+]] = load i32, ptr %[[A1_2]] ; CHECK: %[[LA1_3:.+]] = load i32, ptr %[[A1_3]] -; CHECK: %arrayidx4 = getelementptr inbounds <4 x i32>, ptr %b, i64 1 -; CHECK: %[[B1_0:.+]] = getelementptr inbounds i32, ptr %arrayidx4, i32 0 -; CHECK: %[[B1_1:.+]] = getelementptr inbounds i32, ptr %arrayidx4, i32 1 -; CHECK: %[[B1_2:.+]] = getelementptr inbounds i32, ptr %arrayidx4, i32 2 -; CHECK: %[[B1_3:.+]] = getelementptr inbounds i32, ptr %arrayidx4, i32 3 +; CHECK: %arrayidx4 = getelementptr <4 x i32>, ptr %b, i64 1 +; CHECK: %[[B1_0:.+]] = getelementptr i32, ptr %arrayidx4, i32 0 +; CHECK: %[[B1_1:.+]] = getelementptr i32, ptr %arrayidx4, i32 1 +; CHECK: %[[B1_2:.+]] = getelementptr i32, ptr %arrayidx4, i32 2 +; CHECK: %[[B1_3:.+]] = getelementptr i32, ptr %arrayidx4, i32 3 ; CHECK: %[[LB1_0:.+]] = load i32, ptr %[[B1_0]] ; CHECK: %[[LB1_1:.+]] = load i32, ptr %[[B1_1]] ; CHECK: %[[LB1_2:.+]] = load i32, ptr %[[B1_2]] @@ -104,20 +104,20 @@ entry: ; CHECK: %[[SEXT11:.+]] = sext i1 %[[CMP6]] to i32 ; CHECK: %[[SEXT12:.+]] = sext i1 %[[CMP8]] to i32 ; CHECK: %[[SEXT13:.+]] = sext i1 %[[CMP9]] to i32 -; CHECK: %arrayidx5 = getelementptr inbounds <4 x i32>, ptr %c, i64 1 -; CHECK: %[[C1_0:.+]] = getelementptr inbounds i32, ptr %arrayidx5, i32 0 -; CHECK: %[[C1_1:.+]] = getelementptr inbounds i32, ptr %arrayidx5, i32 1 -; CHECK: %[[C1_2:.+]] = getelementptr inbounds i32, ptr %arrayidx5, i32 2 -; CHECK: %[[C1_3:.+]] = getelementptr inbounds i32, ptr %arrayidx5, i32 3 +; CHECK: %arrayidx5 = getelementptr <4 x i32>, ptr %c, i64 1 +; CHECK: %[[C1_0:.+]] = getelementptr i32, ptr %arrayidx5, i32 0 +; CHECK: %[[C1_1:.+]] = getelementptr i32, ptr %arrayidx5, i32 1 +; CHECK: %[[C1_2:.+]] = getelementptr i32, ptr %arrayidx5, i32 2 +; CHECK: %[[C1_3:.+]] = getelementptr i32, ptr %arrayidx5, i32 3 ; CHECK: store i32 %[[SEXT10]], ptr %[[C1_0]] ; CHECK: store i32 %[[SEXT11]], ptr %[[C1_1]] ; CHECK: store i32 %[[SEXT12]], ptr %[[C1_2]] ; CHECK: store i32 %[[SEXT13]], ptr %[[C1_3]] -; CHECK: %arrayidx6 = getelementptr inbounds <4 x i32>, ptr %a, i64 2 -; CHECK: %[[A2_0:.+]] = getelementptr inbounds i32, ptr %arrayidx6, i32 0 -; CHECK: %[[A2_1:.+]] = getelementptr inbounds i32, ptr %arrayidx6, i32 1 -; CHECK: %[[A2_2:.+]] = getelementptr inbounds i32, ptr %arrayidx6, i32 2 -; CHECK: %[[A2_3:.+]] = getelementptr inbounds i32, ptr %arrayidx6, i32 3 +; CHECK: %arrayidx6 = getelementptr <4 x i32>, ptr %a, i64 2 +; CHECK: %[[A2_0:.+]] = getelementptr i32, ptr %arrayidx6, i32 0 +; CHECK: %[[A2_1:.+]] = getelementptr i32, ptr %arrayidx6, i32 1 +; CHECK: %[[A2_2:.+]] = getelementptr i32, ptr %arrayidx6, i32 2 +; CHECK: %[[A2_3:.+]] = getelementptr i32, ptr %arrayidx6, i32 3 ; CHECK: %[[LA2_0:.+]] = load i32, ptr %[[A2_0]] ; CHECK: %[[LA2_1:.+]] = load i32, ptr %[[A2_1]] ; CHECK: %[[LA2_2:.+]] = load i32, ptr %[[A2_2]] @@ -130,11 +130,11 @@ entry: ; CHECK: %[[SEXT819:.+]] = sext i1 %[[CMP715]] to i32 ; CHECK: %[[SEXT820:.+]] = sext i1 %[[CMP716]] to i32 ; CHECK: %[[SEXT821:.+]] = sext i1 %[[CMP717]] to i32 -; CHECK: %arrayidx9 = getelementptr inbounds <4 x i32>, ptr %c, i64 2 -; CHECK: %[[C2_0:.+]] = getelementptr inbounds i32, ptr %arrayidx9, i32 0 -; CHECK: %[[C2_1:.+]] = getelementptr inbounds i32, ptr %arrayidx9, i32 1 -; CHECK: %[[C2_2:.+]] = getelementptr inbounds i32, ptr %arrayidx9, i32 2 -; CHECK: %[[C2_3:.+]] = getelementptr inbounds i32, ptr %arrayidx9, i32 3 +; CHECK: %arrayidx9 = getelementptr <4 x i32>, ptr %c, i64 2 +; CHECK: %[[C2_0:.+]] = getelementptr i32, ptr %arrayidx9, i32 0 +; CHECK: %[[C2_1:.+]] = getelementptr i32, ptr %arrayidx9, i32 1 +; CHECK: %[[C2_2:.+]] = getelementptr i32, ptr %arrayidx9, i32 2 +; CHECK: %[[C2_3:.+]] = getelementptr i32, ptr %arrayidx9, i32 3 ; CHECK: store i32 %[[SEXT818]], ptr %[[C2_0]] ; CHECK: store i32 %[[SEXT819]], ptr %[[C2_1]] ; CHECK: store i32 %[[SEXT820]], ptr %[[C2_2]] diff --git a/modules/compiler/vecz/test/lit/llvm/scalarize_mixed_gep.ll b/modules/compiler/vecz/test/lit/llvm/scalarize_mixed_gep.ll index 8abeed7bc..d995fd652 100644 --- a/modules/compiler/vecz/test/lit/llvm/scalarize_mixed_gep.ll +++ b/modules/compiler/vecz/test/lit/llvm/scalarize_mixed_gep.ll @@ -42,5 +42,5 @@ define void @bar(i64** %ptrptrs, i64 %val) { ; gets scalarized/re-packetized correctly ; CHECK: define void @__vecz_v4_bar -; CHECK: %[[ADDR:.+]] = getelementptr inbounds {{i64|i8}}, <4 x ptr> %{{.+}}, {{i64 2|i64 16}} +; CHECK: %[[ADDR:.+]] = getelementptr {{i64|i8}}, <4 x ptr> %{{.+}}, {{i64 2|i64 16}} ; CHECK: call void @__vecz_b_scatter_store8_Dv4_mDv4_u3ptr(<4 x i64> %.splat{{.*}}, <4 x ptr> %[[ADDR]]) diff --git a/modules/compiler/vecz/test/lit/llvm/squash_float2_gather.ll b/modules/compiler/vecz/test/lit/llvm/squash_float2_gather.ll index 8c2b80f72..01c0d0d79 100644 --- a/modules/compiler/vecz/test/lit/llvm/squash_float2_gather.ll +++ b/modules/compiler/vecz/test/lit/llvm/squash_float2_gather.ll @@ -44,12 +44,12 @@ attributes #2 = { nobuiltin nounwind } ; ; CHECK: void @__vecz_v4_squash ; CHECK: %[[GID:.+]] = call i64 @__mux_get_global_id(i64 0) #[[ATTRS:[0-9]+]] -; CHECK: %[[IDX_PTR:.+]] = getelementptr inbounds i64, ptr addrspace(1) %idx, i64 %[[GID]] +; CHECK: %[[IDX_PTR:.+]] = getelementptr i64, ptr addrspace(1) %idx, i64 %[[GID]] ; CHECK: %[[WIDE_LOAD:.+]] = load <4 x i64>, ptr addrspace(1) %[[IDX_PTR]], align 8 -; CHECK: %[[DATA_PTR:.+]] = getelementptr inbounds <2 x float>, ptr addrspace(1) %data, <4 x i64> %[[WIDE_LOAD]] +; CHECK: %[[DATA_PTR:.+]] = getelementptr <2 x float>, ptr addrspace(1) %data, <4 x i64> %[[WIDE_LOAD]] ; CHECK: %[[GATHER:.+]] = call <4 x i64> @__vecz_b_gather_load8_Dv4_mDv4_u3ptrU3AS1(<4 x ptr addrspace(1)> %[[DATA_PTR]]) ; CHECK: %[[UNSQUASH:.+]] = bitcast <4 x i64> %[[GATHER]] to <8 x float> -; CHECK: %[[OUTPUT_PTR:.+]] = getelementptr inbounds <2 x float>, ptr addrspace(1) %output, i64 %[[GID]] +; CHECK: %[[OUTPUT_PTR:.+]] = getelementptr <2 x float>, ptr addrspace(1) %output, i64 %[[GID]] ; CHECK: store <8 x float> %[[UNSQUASH]], ptr addrspace(1) %[[OUTPUT_PTR]], align 8 ; CHECK: ret void diff --git a/modules/compiler/vecz/test/lit/llvm/stride_analysis.ll b/modules/compiler/vecz/test/lit/llvm/stride_analysis.ll index 5e1cf09ef..3bf5a299a 100644 --- a/modules/compiler/vecz/test/lit/llvm/stride_analysis.ll +++ b/modules/compiler/vecz/test/lit/llvm/stride_analysis.ll @@ -30,48 +30,48 @@ entry: ; CHECK-NEXT: uniform %lduniform = load i8, ptr addrspace(1) %input, align 1 -; CHECK: Stride for %arrayidx0 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %globalid0 +; CHECK: Stride for %arrayidx0 = getelementptr i8, ptr addrspace(1) %input, i64 %globalid0 ; CHECK-NEXT: linear stride of 1 %arrayidx0 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %globalid0 %ld0 = load i8, ptr addrspace(1) %arrayidx0, align 1 %truncglobalid0 = trunc i64 %globalid0 to i32 -; CHECK: Stride for %arrayidx1 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %sexttruncglobalid0 +; CHECK: Stride for %arrayidx1 = getelementptr i8, ptr addrspace(1) %input, i64 %sexttruncglobalid0 ; CHECK-NEXT: linear stride of 1 %sexttruncglobalid0 = sext i32 %truncglobalid0 to i64 %arrayidx1 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %sexttruncglobalid0 %ld1 = load i8, ptr addrspace(1) %arrayidx1, align 1 -; CHECK: Stride for %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %zexttruncglobalid0 +; CHECK: Stride for %arrayidx2 = getelementptr i8, ptr addrspace(1) %input, i64 %zexttruncglobalid0 ; CHECK-NEXT: divergent %zexttruncglobalid0 = zext i32 %truncglobalid0 to i64 %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %zexttruncglobalid0 %ld2 = load i8, ptr addrspace(1) %arrayidx2, align 1 -; CHECK: Stride for %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %input, i64 %globalid0 +; CHECK: Stride for %arrayidx3 = getelementptr i32, ptr addrspace(1) %input, i64 %globalid0 ; CHECK-NEXT: linear stride of 4 %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %input, i64 %globalid0 %ld3 = load i8, ptr addrspace(1) %arrayidx3, align 1 -; CHECK: Stride for %arrayidx4 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %globalid0mul8 +; CHECK: Stride for %arrayidx4 = getelementptr i8, ptr addrspace(1) %input, i64 %globalid0mul8 ; CHECK-NEXT: linear stride of 8 %globalid0mul8 = mul i64 %globalid0, 8 %arrayidx4 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %globalid0mul8 %ld4 = load i8, ptr addrspace(1) %arrayidx4, align 1 -; CHECK: Stride for %arrayidx5 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %globalid0mul16 +; CHECK: Stride for %arrayidx5 = getelementptr i8, ptr addrspace(1) %input, i64 %globalid0mul16 ; CHECK-NEXT: linear stride of 16 %globalid0mul16 = mul i64 %globalid0mul8, 2 %arrayidx5 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %globalid0mul16 %ld5 = load i8, ptr addrspace(1) %arrayidx5, align 1 -; CHECK: Stride for %arrayidx6 = getelementptr inbounds i32, ptr addrspace(1) %input, i64 %globalid0mul8 +; CHECK: Stride for %arrayidx6 = getelementptr i32, ptr addrspace(1) %input, i64 %globalid0mul8 ; CHECK-NEXT: linear stride of 32 %arrayidx6 = getelementptr inbounds i32, ptr addrspace(1) %input, i64 %globalid0mul8 %ld6 = load i32, ptr addrspace(1) %arrayidx6, align 1 -; CHECK: Stride for %arrayidx7 = getelementptr inbounds i16, ptr addrspace(1) %input, i64 %idxprom7 +; CHECK: Stride for %arrayidx7 = getelementptr i16, ptr addrspace(1) %input, i64 %idxprom7 ; CHECK-NEXT: linear stride of 2 %mul7 = mul i64 %localsize0, %groupid0 %add7 = add i64 %mul7, %localid0 @@ -81,7 +81,7 @@ entry: %arrayidx7 = getelementptr inbounds i16, ptr addrspace(1) %input, i64 %idxprom7 %ld7 = load i16, ptr addrspace(1) %arrayidx7, align 1 -; CHECK: Stride for %arrayidx8 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom8 +; CHECK: Stride for %arrayidx8 = getelementptr i8, ptr addrspace(1) %input, i64 %idxprom8 ; CHECK-NEXT: divergent %mul8 = mul i64 %localsize0, %groupid0 %add8 = add i64 %mul8, %localid0 @@ -91,7 +91,7 @@ entry: %arrayidx8 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom8 %ld8 = load i8, ptr addrspace(1) %arrayidx8, align 1 -; CHECK: Stride for %arrayidx9 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom9 +; CHECK: Stride for %arrayidx9 = getelementptr i8, ptr addrspace(1) %input, i64 %idxprom9 ; CHECK-NEXT: divergent %mul9 = mul i64 %groupid0, %localsize0 %add9 = add nuw nsw i64 %localid0, 4294967295 @@ -115,7 +115,7 @@ entry: %conv = add i32 %0, -1 %trunclocalsize0 = trunc i64 %localsize0 to i32 -; CHECK: Stride for %arrayidx_pre = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom_pre +; CHECK: Stride for %arrayidx_pre = getelementptr i8, ptr addrspace(1) %input, i64 %idxprom_pre ; CHECK-NEXT: divergent %idxprom_pre = zext i32 %conv to i64 %arrayidx_pre = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom_pre @@ -126,7 +126,7 @@ entry: for.body: ; The below is fundamentally the same stride calculation as %arrayidx_pre - ; make sure the loop and the PHI don't throw off the analysis. -; CHECK: Stride for %arrayidx_loop = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom_loop +; CHECK: Stride for %arrayidx_loop = getelementptr i8, ptr addrspace(1) %input, i64 %idxprom_loop ; CHECK-NEXT: divergent %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %gx2.050.us = phi i32 [ %conv, %entry ], [ %conv26.us, %for.body ] @@ -154,7 +154,7 @@ entry: %add = add i64 %mul, %localid0 %addtrunc = trunc i64 %add to i32 -; CHECK: Stride for %arrayidx0 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom0 +; CHECK: Stride for %arrayidx0 = getelementptr i8, ptr addrspace(1) %input, i64 %idxprom0 ; CHECK-NEXT: divergent %idxprom0 = zext i32 %addtrunc to i64 %arrayidx0 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom0 @@ -162,7 +162,7 @@ entry: ; The below is fundamentally the same stride calculation as %arrayidx0 - make ; sure the select doesn't throw off the analysis. -; CHECK: Stride for %arrayidx1 = getelementptr inbounds i8, ptr addrspace(1) %input, i64 %idxprom1 +; CHECK: Stride for %arrayidx1 = getelementptr i8, ptr addrspace(1) %input, i64 %idxprom1 ; CHECK-NEXT: divergent %sel1 = select i1 %cmp, i32 %addtrunc, i32 %addtrunc %idxprom1 = zext i32 %sel1 to i64 diff --git a/modules/compiler/vecz/test/lit/llvm/subgroup_broadcast.ll b/modules/compiler/vecz/test/lit/llvm/subgroup_broadcast.ll index 2344f6869..d2fc09ce1 100644 --- a/modules/compiler/vecz/test/lit/llvm/subgroup_broadcast.ll +++ b/modules/compiler/vecz/test/lit/llvm/subgroup_broadcast.ll @@ -41,5 +41,5 @@ define spir_kernel void @sub_group_broadcast(i32 addrspace(1)* %in, i32 addrspac ; CHECK: [[BCAST:%.+]] = shufflevector <4 x i32> [[INS]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: %idx = tail call i32 @__mux_get_sub_group_local_id() ; CHECK: [[EXT:%.*]] = sext i32 %idx to i64 -; CHECK: %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 [[EXT]] +; CHECK: %arrayidx2 = getelementptr i32, ptr addrspace(1) %out, i64 [[EXT]] ; CHECK: store <4 x i32> [[BCAST]], ptr addrspace(1) %arrayidx2, align 4 diff --git a/modules/compiler/vecz/test/lit/llvm/subgroup_builtins.ll b/modules/compiler/vecz/test/lit/llvm/subgroup_builtins.ll index a07f2b7dd..6460d40e1 100644 --- a/modules/compiler/vecz/test/lit/llvm/subgroup_builtins.ll +++ b/modules/compiler/vecz/test/lit/llvm/subgroup_builtins.ll @@ -50,7 +50,7 @@ define spir_kernel void @get_sub_group_local_id(i32 addrspace(1)* %in, i32 addrs ; CHECK: [[SPLAT:%.*]] = shufflevector <4 x i32> [[SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: [[ID:%.*]] = or {{(disjoint )?}}<4 x i32> [[SPLAT]], ; CHECK: [[EXT:%.*]] = sext i32 %call to i64 -; CHECK: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i64 [[EXT]] +; CHECK: %arrayidx = getelementptr i32, ptr addrspace(1) %out, i64 [[EXT]] ; CHECK: store <4 x i32> [[ID]], ptr addrspace(1) %arrayidx } diff --git a/modules/compiler/vecz/test/lit/llvm/uniform_address_base.ll b/modules/compiler/vecz/test/lit/llvm/uniform_address_base.ll index 8f5e9a83e..a5d9c7b81 100644 --- a/modules/compiler/vecz/test/lit/llvm/uniform_address_base.ll +++ b/modules/compiler/vecz/test/lit/llvm/uniform_address_base.ll @@ -49,8 +49,8 @@ declare i64 @__mux_get_global_id(i32) local_unnamed_addr #1 ; CHECK: define spir_kernel void @__vecz_v4_uniform_address_index ; CHECK: entry: ; CHECK: call i64 @__mux_get_global_id(i32 0) -; CHECK-DAG: %[[INA:.+]] = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %[[X:.+]] +; CHECK-DAG: %[[INA:.+]] = getelementptr i32, ptr addrspace(1) %in, i32 %[[X:.+]] ; CHECK-DAG: %[[LOAD:.+]] = load <4 x i32>, ptr addrspace(1) %[[INA]] -; CHECK-DAG: %[[OUTA:.+]] = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %[[X:.+]] +; CHECK-DAG: %[[OUTA:.+]] = getelementptr i32, ptr addrspace(1) %out, i32 %[[X:.+]] ; CHECK-DAG: store <4 x i32> %[[LOAD]], ptr addrspace(1) %[[OUTA]] ; CHECK-NOT: call <4 x i32> diff --git a/modules/compiler/vecz/test/lit/llvm/uniform_address_index.ll b/modules/compiler/vecz/test/lit/llvm/uniform_address_index.ll index 8f5e9a83e..a5d9c7b81 100644 --- a/modules/compiler/vecz/test/lit/llvm/uniform_address_index.ll +++ b/modules/compiler/vecz/test/lit/llvm/uniform_address_index.ll @@ -49,8 +49,8 @@ declare i64 @__mux_get_global_id(i32) local_unnamed_addr #1 ; CHECK: define spir_kernel void @__vecz_v4_uniform_address_index ; CHECK: entry: ; CHECK: call i64 @__mux_get_global_id(i32 0) -; CHECK-DAG: %[[INA:.+]] = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %[[X:.+]] +; CHECK-DAG: %[[INA:.+]] = getelementptr i32, ptr addrspace(1) %in, i32 %[[X:.+]] ; CHECK-DAG: %[[LOAD:.+]] = load <4 x i32>, ptr addrspace(1) %[[INA]] -; CHECK-DAG: %[[OUTA:.+]] = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %[[X:.+]] +; CHECK-DAG: %[[OUTA:.+]] = getelementptr i32, ptr addrspace(1) %out, i32 %[[X:.+]] ; CHECK-DAG: store <4 x i32> %[[LOAD]], ptr addrspace(1) %[[OUTA]] ; CHECK-NOT: call <4 x i32>