Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Linear and Sine folds from Vital Synth #8

Merged
merged 4 commits into from
Sep 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions include/sst/waveshapers/Effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,37 @@ inline __m128 DIGI_SSE2(QuadWaveshaperState *__restrict, __m128 in, __m128 drive
return _mm_mul_ps(drive, _mm_mul_ps(m16inv, _mm_sub_ps(_mm_cvtepi32_ps(a), mofs)));
}

template<bool DO_FOLD = false>
inline __m128 SINUS_SSE2(QuadWaveshaperState *__restrict s, __m128 in, __m128 drive)
{
const __m128 one = _mm_set1_ps(1.f);
const __m128 m256 = _mm_set1_ps(256.f);
const __m128 m512 = _mm_set1_ps(512.f);

// Scale so that -1.0 - 1.0 goes to 256 - 768
// +6 dB gets you the full sine wave
__m128 x = _mm_mul_ps(in, drive);
x = _mm_add_ps(_mm_mul_ps(x, m256), m512);

// Convert to 32 bit ints
__m128i e = _mm_cvtps_epi32(x);
// Calculate the remainder due to truncation. This is used for later interpolation
__m128 a = _mm_sub_ps(x, _mm_cvtepi32_ps(e));
e = _mm_packs_epi32(e, e);
const __m128i UB = _mm_set1_epi16(0x3fe);
e = _mm_max_epi16(_mm_min_epi16(e, UB), _mm_setzero_si128());

// Template arg -- Compiler will optimize this out
if (DO_FOLD) {
// Now, make sure the fold pattern repeats
// Fortunately, we're dealing with a power-of-two LUT so we can do a modulus by bitwise and like so:
e = _mm_and_si128(e, _mm_set1_epi32(0x3ff));
// Now pack into 16 bit ints. Should already be truncated
// If not, whoops, segfault
e = _mm_packs_epi32(e, e);
} else {
// Don't repeat; Instead, clip to zero at the boundaries
e = _mm_packs_epi32(e, e);
const __m128i UB = _mm_set1_epi16(0x3fe);
e = _mm_max_epi16(_mm_min_epi16(e, UB), _mm_setzero_si128());
}

#if MAC
// this should be very fast on C2D/C1D (and there are no macs with K8's)
Expand Down
6 changes: 5 additions & 1 deletion include/sst/waveshapers/QuadWaveshaper_Impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ inline QuadWaveshaperPtr GetQuadWaveshaper(WaveshaperType type)

// effects
case WaveshaperType::wst_sine:
return SINUS_SSE2;
return SINUS_SSE2<false>;
case WaveshaperType::wst_digital:
return DIGI_SSE2;

Expand Down Expand Up @@ -79,6 +79,10 @@ inline QuadWaveshaperPtr GetQuadWaveshaper(WaveshaperType type)
return WAVEFOLDER<dualFoldADAA>;
case WaveshaperType::wst_westfold:
return WAVEFOLDER<westCoastFoldADAA>;
case WaveshaperType::wst_linearfold:
return LINFOLD_SSE2;
case WaveshaperType::wst_sinefold:
return SINUS_SSE2<true>;

// fuzzes
case WaveshaperType::wst_fuzz:
Expand Down
39 changes: 39 additions & 0 deletions include/sst/waveshapers/Wavefolders.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,45 @@ inline __m128 SoftOneFold(QuadWaveshaperState *__restrict, __m128 x, __m128 driv

return _mm_mul_ps(y, _mm_rcp_ps(num));
}

inline __m128 LINFOLD_SSE2(QuadWaveshaperState *__restrict s, __m128 in, __m128 drive)
{
// The following code is heavily derived from Vital's linear fold (GPLv3, compatible with SST)
// I think there might be some optimizations to be done here, but I haven't messed with it yet
// Lots of constants, yikes
const __m128 mfour = _mm_set1_ps(-4.f);
const __m128 two = _mm_set1_ps(2.f);
const __m128 one = _mm_set1_ps(1.f);
const __m128 p75 = _mm_set1_ps(0.75f);
const __m128 p25 = _mm_set1_ps(0.25f);
const __m128 zero = _mm_set1_ps(0.f);

__m128 x = _mm_mul_ps(in, drive);
// Prescale the input value
x = _mm_mul_ps(x, p25);
x = _mm_add_ps(x, p75);

// Now, perform a modulus by 1
__m128i e = _mm_cvtps_epi32(x);
__m128 a = _mm_sub_ps(x, _mm_cvtepi32_ps(e));
a = _mm_add_ps(a, _mm_and_ps(one, _mm_cmplt_ps(a, zero)));

// Finally, scale the output value
a = _mm_mul_ps(a, mfour);
a = _mm_add_ps(a, two);

// Absolute value
uint32_t v = 0x7fffffff; // Trick C++ into initializing a float mask that clears the sign bit
a = _mm_and_ps(a, _mm_set1_ps(*((float*)&v)));

// Now finish up by shifting down a bit...
a = _mm_sub_ps(a, one);

return a;
}

// Sine fold is implemented in SINUS_SSE2 in Effects.h via template args

} // namespace sst::waveshapers

#endif // SST_WAVESHAPERS_WAVEFOLDERS_H
8 changes: 8 additions & 0 deletions include/sst/waveshapers/WaveshaperConfiguration.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ enum class WaveshaperType
wst_singlefold,
wst_dualfold,
wst_westfold,
wst_linearfold,
wst_sinefold,
baconpaul marked this conversation as resolved.
Show resolved Hide resolved

// additive harmonics
wst_add12,
Expand Down Expand Up @@ -74,6 +76,7 @@ const char wst_names[(int)WaveshaperType::n_ws_types][32] = {"Off",
"Asymmetric",
"Sine",
"Digital",

"Soft Harmonic 2",
"Soft Harmonic 3",
"Soft Harmonic 4",
Expand All @@ -85,6 +88,9 @@ const char wst_names[(int)WaveshaperType::n_ws_types][32] = {"Off",
"Single Fold",
"Double Fold",
"West Coast Fold",
"Linear Fold",
"Sine Fold",

"Additive 1+2",
"Additive 1+3",
"Additive 1+4",
Expand Down Expand Up @@ -158,6 +164,8 @@ inline std::vector<std::pair<int, std::string>> WaveshaperGroupName()
p(sst::waveshapers::WaveshaperType::wst_singlefold, "Wavefolder");
p(sst::waveshapers::WaveshaperType::wst_dualfold, "Wavefolder");
p(sst::waveshapers::WaveshaperType::wst_westfold, "Wavefolder");
p(sst::waveshapers::WaveshaperType::wst_linearfold, "Wavefolder");
p(sst::waveshapers::WaveshaperType::wst_sinefold, "Wavefolder");

p(sst::waveshapers::WaveshaperType::wst_fuzz, "Fuzz");
p(sst::waveshapers::WaveshaperType::wst_fuzzheavy, "Fuzz");
Expand Down
Loading