diff --git a/test/llvm/slp_aarch64.ll b/test/llvm/slp_aarch64.ll index e8745f9..a49157b 100644 --- a/test/llvm/slp_aarch64.ll +++ b/test/llvm/slp_aarch64.ll @@ -1,15 +1,17 @@ -; RUN: opt -passes=slp-vectorizer,dce -S -mtriple=aarch64-unknown-linuxgnu %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer,dce -slp-threshold=-100 -S -mtriple=aarch64-unknown-linuxgnu %s | FileCheck %s ; The input function can be expressed in C as: ; int hadd(int *a) { ; return a[0] + a[1] + a[2] + a[3] ; } define i32 @hadd(i32* %a) { -; CHECK-LABEL: hadd -; CHECK-NEXT: %0 = bitcast i32* %a to <4 x i32>* -; CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 4 -; CHECK-NEXT: %2 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %1) -; CHECK-NEXT: ret i32 %2 +; CHECK-LABEL: @hadd( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; entry: %0 = load i32, i32* %a, align 4 diff --git a/test/llvm/slp_x86.ll b/test/llvm/slp_x86.ll index 16627be..4d0a00a 100644 --- a/test/llvm/slp_x86.ll +++ b/test/llvm/slp_x86.ll @@ -1,4 +1,5 @@ -; RUN: opt -passes=slp-vectorizer,dce -mtriple=x86_64-unknown-linuxgnu -S %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer,dce -slp-threshold=-100 -mtriple=x86_64-unknown-linuxgnu -S %s | FileCheck %s ; The input function can be expressed in C as: ; void foo(int a1, int a2, int b1, int b2, int *A) { @@ -8,22 +9,20 @@ ; A[3] = a2*(a2 + b2); ; } define void @foo(i32, i32, i32, i32, i32* nocapture) local_unnamed_addr #0 { -; CHECK-LABEL: foo -; CHECK-NEXT: %6 = insertelement <2 x i32> undef, i32 %2, i32 0 -; CHECK-NEXT: %7 = insertelement <2 x i32> %6, i32 %3, i32 1 -; CHECK-NEXT: %8 = insertelement <2 x i32> undef, i32 %0, i32 0 -; CHECK-NEXT: %9 = insertelement <2 x i32> %8, i32 %1, i32 1 +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2:%.*]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP3:%.*]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP1:%.*]], i32 1 ; %10 = [a1, a2] + [b1, b2] = [a1 + b1, a2 + b2] -; CHECK-NEXT: %10 = add nsw <2 x i32> %7, %9 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP7]], [[TMP9]] ; %11 = [a1 * (a1 + b1), a2 * (a2 + b2)] -; CHECK-NEXT: %11 = mul nsw <2 x i32> %10, %9 -; CHECK-NEXT: %shuffle = shufflevector <2 x i32> %11, <2 x i32> undef, <4 x i32> - ; Not used -; CHECK-NEXT: %15 = bitcast i32* %4 to <4 x i32>* +; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <4 x i32> ; A = [shuffle[0], shuffle[1], shuffle[2], shuffle[3]] -; CHECK-NEXT: store <4 x i32> %shuffle, <4 x i32>* %15, align 4 -; CHECK-NEXT: ret void - +; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP4:%.*]], align 4 +; CHECK-NEXT: ret void +; %6 = add nsw i32 %2, %0 %7 = mul nsw i32 %6, %0 store i32 %7, i32* %4, align 4