surge-synthesizer · baconpaul · Sep 8, 2024 · Aug 19, 2024 · Aug 21, 2024 · Sep 8, 2024
diff --git a/include/sst/waveshapers/Effects.h b/include/sst/waveshapers/Effects.h
@@ -19,20 +19,37 @@ inline __m128 DIGI_SSE2(QuadWaveshaperState *__restrict, __m128 in, __m128 drive
     return _mm_mul_ps(drive, _mm_mul_ps(m16inv, _mm_sub_ps(_mm_cvtepi32_ps(a), mofs)));
 }
 
+template<bool DO_FOLD = false>
 inline __m128 SINUS_SSE2(QuadWaveshaperState *__restrict s, __m128 in, __m128 drive)
 {
     const __m128 one = _mm_set1_ps(1.f);
     const __m128 m256 = _mm_set1_ps(256.f);
     const __m128 m512 = _mm_set1_ps(512.f);
 
+    // Scale so that -1.0 - 1.0 goes to 256 - 768
+    // +6 dB gets you the full sine wave
     __m128 x = _mm_mul_ps(in, drive);
     x = _mm_add_ps(_mm_mul_ps(x, m256), m512);
 
+    // Convert to 32 bit ints
     __m128i e = _mm_cvtps_epi32(x);
+    // Calculate the remainder due to truncation. This is used for later interpolation
     __m128 a = _mm_sub_ps(x, _mm_cvtepi32_ps(e));
-    e = _mm_packs_epi32(e, e);
-    const __m128i UB = _mm_set1_epi16(0x3fe);
-    e = _mm_max_epi16(_mm_min_epi16(e, UB), _mm_setzero_si128());
+
+    // Template arg -- Compiler will optimize this out
+    if (DO_FOLD) {
+        // Now, make sure the fold pattern repeats
+        // Fortunately, we're dealing with a power-of-two LUT so we can do a modulus by bitwise and like so:
+        e = _mm_and_si128(e, _mm_set1_epi32(0x3ff));
+        // Now pack into 16 bit ints. Should already be truncated
+        // If not, whoops, segfault
+        e = _mm_packs_epi32(e, e);
+    } else {
+        // Don't repeat; Instead, clip to zero at the boundaries
+        e = _mm_packs_epi32(e, e);
+        const __m128i UB = _mm_set1_epi16(0x3fe);
+        e = _mm_max_epi16(_mm_min_epi16(e, UB), _mm_setzero_si128());
+    }
 
 #if MAC
     // this should be very fast on C2D/C1D (and there are no macs with K8's)

diff --git a/include/sst/waveshapers/QuadWaveshaper_Impl.h b/include/sst/waveshapers/QuadWaveshaper_Impl.h
@@ -32,7 +32,7 @@ inline QuadWaveshaperPtr GetQuadWaveshaper(WaveshaperType type)
 
     // effects
     case WaveshaperType::wst_sine:
-        return SINUS_SSE2;
+        return SINUS_SSE2<false>;
     case WaveshaperType::wst_digital:
         return DIGI_SSE2;
 
@@ -79,6 +79,10 @@ inline QuadWaveshaperPtr GetQuadWaveshaper(WaveshaperType type)
         return WAVEFOLDER<dualFoldADAA>;
     case WaveshaperType::wst_westfold:
         return WAVEFOLDER<westCoastFoldADAA>;
+    case WaveshaperType::wst_linearfold:
+        return LINFOLD_SSE2;
+    case WaveshaperType::wst_sinefold:
+        return SINUS_SSE2<true>;
 
     // fuzzes
     case WaveshaperType::wst_fuzz:

diff --git a/include/sst/waveshapers/Wavefolders.h b/include/sst/waveshapers/Wavefolders.h
@@ -125,6 +125,45 @@ inline __m128 SoftOneFold(QuadWaveshaperState *__restrict, __m128 x, __m128 driv
 
     return _mm_mul_ps(y, _mm_rcp_ps(num));
 }
+
+inline __m128 LINFOLD_SSE2(QuadWaveshaperState *__restrict s, __m128 in, __m128 drive)
+{
+    // The following code is heavily derived from Vital's linear fold (GPLv3, compatible with SST)
+    // I think there might be some optimizations to be done here, but I haven't messed with it yet
+    // Lots of constants, yikes
+    const __m128 mfour = _mm_set1_ps(-4.f);
+    const __m128 two = _mm_set1_ps(2.f);
+    const __m128 one = _mm_set1_ps(1.f);
+    const __m128 p75 = _mm_set1_ps(0.75f);
+    const __m128 p25 = _mm_set1_ps(0.25f);
+    const __m128 zero = _mm_set1_ps(0.f);
+
+    __m128 x = _mm_mul_ps(in, drive);
+    // Prescale the input value
+    x = _mm_mul_ps(x, p25);
+    x = _mm_add_ps(x, p75);
+
+    // Now, perform a modulus by 1
+    __m128i e = _mm_cvtps_epi32(x);
+    __m128 a = _mm_sub_ps(x, _mm_cvtepi32_ps(e));
+    a = _mm_add_ps(a, _mm_and_ps(one, _mm_cmplt_ps(a, zero)));
+
+    // Finally, scale the output value
+    a = _mm_mul_ps(a, mfour);
+    a = _mm_add_ps(a, two);
+
+    // Absolute value
+    uint32_t v = 0x7fffffff; // Trick C++ into initializing a float mask that clears the sign bit
+    a = _mm_and_ps(a, _mm_set1_ps(*((float*)&v)));
+
+    // Now finish up by shifting down a bit...
+    a = _mm_sub_ps(a, one);
+
+    return a;
+}
+
+// Sine fold is implemented in SINUS_SSE2 in Effects.h via template args
+
 } // namespace sst::waveshapers
 
 #endif // SST_WAVESHAPERS_WAVEFOLDERS_H
diff --git a/include/sst/waveshapers/WaveshaperConfiguration.h b/include/sst/waveshapers/WaveshaperConfiguration.h
@@ -30,6 +30,8 @@ enum class WaveshaperType
     wst_singlefold,
     wst_dualfold,
     wst_westfold,
+    wst_linearfold,
+    wst_sinefold,
 
     // additive harmonics
     wst_add12,
@@ -74,6 +76,7 @@ const char wst_names[(int)WaveshaperType::n_ws_types][32] = {"Off",
                                                              "Asymmetric",
                                                              "Sine",
                                                              "Digital",
+
                                                              "Soft Harmonic 2",
                                                              "Soft Harmonic 3",
                                                              "Soft Harmonic 4",
@@ -85,6 +88,9 @@ const char wst_names[(int)WaveshaperType::n_ws_types][32] = {"Off",
                                                              "Single Fold",
                                                              "Double Fold",
                                                              "West Coast Fold",
+                                                             "Linear Fold",
+                                                             "Sine Fold",
+
                                                              "Additive 1+2",
                                                              "Additive 1+3",
                                                              "Additive 1+4",
@@ -158,6 +164,8 @@ inline std::vector<std::pair<int, std::string>> WaveshaperGroupName()
         p(sst::waveshapers::WaveshaperType::wst_singlefold, "Wavefolder");
         p(sst::waveshapers::WaveshaperType::wst_dualfold, "Wavefolder");
         p(sst::waveshapers::WaveshaperType::wst_westfold, "Wavefolder");
+        p(sst::waveshapers::WaveshaperType::wst_linearfold, "Wavefolder");
+        p(sst::waveshapers::WaveshaperType::wst_sinefold, "Wavefolder");
 
         p(sst::waveshapers::WaveshaperType::wst_fuzz, "Fuzz");
         p(sst::waveshapers::WaveshaperType::wst_fuzzheavy, "Fuzz");