From 2615805adfd7fc8db8eeafff6f4549b653d93cdf Mon Sep 17 00:00:00 2001 From: Paul Date: Fri, 1 Nov 2024 11:58:25 -0400 Subject: [PATCH] Fix a type in SIMD port which the tests didn't catch (#12) --- include/sst/waveshapers/Effects.h | 6 +++--- include/sst/waveshapers/WaveshaperLUT.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/sst/waveshapers/Effects.h b/include/sst/waveshapers/Effects.h index cdd9250..7e18daf 100644 --- a/include/sst/waveshapers/Effects.h +++ b/include/sst/waveshapers/Effects.h @@ -62,9 +62,9 @@ inline SIMD_M128 SINUS_SSE2(QuadWaveshaperState *__restrict s, SIMD_M128 in, SIM // GCC seems to optimize around the XMM -> int transfers so this is needed here int e4 alignas(16)[4]; e4[0] = SIMD_MM(cvtsi128_si32)(e); - e4[1] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMDSIMD_MM_SHUFFLE(1, 1, 1, 1))); - e4[2] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMDSIMD_MM_SHUFFLE(2, 2, 2, 2))); - e4[3] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMDSIMD_MM_SHUFFLE(3, 3, 3, 3))); + e4[1] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMD_MM_SHUFFLE(1, 1, 1, 1))); + e4[2] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMD_MM_SHUFFLE(2, 2, 2, 2))); + e4[3] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMD_MM_SHUFFLE(3, 3, 3, 3))); #else // on PC write to memory & back as XMM -> GPR is slow on K8 short e4 alignas(16)[8]; diff --git a/include/sst/waveshapers/WaveshaperLUT.h b/include/sst/waveshapers/WaveshaperLUT.h index fe298eb..0480693 100644 --- a/include/sst/waveshapers/WaveshaperLUT.h +++ b/include/sst/waveshapers/WaveshaperLUT.h @@ -27,9 +27,9 @@ SIMD_M128 WS_LUT(QuadWaveshaperState *__restrict, const float *table, SIMD_M128 // this should be very fast on C2D/C1D (and there are no macs with K8's) int e4 alignas(16)[4]; e4[0] = SIMD_MM(cvtsi128_si32)(e); - e4[1] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMDSIMD_MM_SHUFFLE(1, 1, 1, 1))); - e4[2] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMDSIMD_MM_SHUFFLE(2, 2, 2, 2))); - e4[3] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMDSIMD_MM_SHUFFLE(3, 3, 3, 3))); + e4[1] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMD_MM_SHUFFLE(1, 1, 1, 1))); + e4[2] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMD_MM_SHUFFLE(2, 2, 2, 2))); + e4[3] = SIMD_MM(cvtsi128_si32)(SIMD_MM(shufflelo_epi16)(e, SIMD_MM_SHUFFLE(3, 3, 3, 3))); #else // on PC write to memory & back as XMM -> GPR is slow on K8