From 784489640364f2082016b1ac7e2729e3c9ed854e Mon Sep 17 00:00:00 2001 From: Vektor Date: Mon, 2 Oct 2023 13:00:11 +0200 Subject: [PATCH] move noise canellation & normalizer to dsp add db option to level --- examples/devicetests.cpp | 2 + include/ISoundInput.h | 2 +- src/CSoundInput.cpp | 140 +++++++++++++++++++++++++-------------- src/CSoundInput.h | 10 ++- 4 files changed, 100 insertions(+), 54 deletions(-) diff --git a/examples/devicetests.cpp b/examples/devicetests.cpp index 74c975a..2813bb5 100644 --- a/examples/devicetests.cpp +++ b/examples/devicetests.cpp @@ -66,6 +66,8 @@ int main() soundInput->SetNormalizatonEnabled(true); std::cout << soundInput->GetCurrentDeviceUID() << std::endl; + //AV_DestroySoundInput(soundInput); + for(;;) { int nextDeviceIndex = 0; diff --git a/include/ISoundInput.h b/include/ISoundInput.h index 97f7b35..e546486 100644 --- a/include/ISoundInput.h +++ b/include/ISoundInput.h @@ -16,7 +16,7 @@ class ISoundInput [[nodiscard]] virtual int Read(void* data, size_t size) = 0; - [[nodiscard]] virtual float GetLevel() const = 0; + [[nodiscard]] virtual float GetLevel(bool db = false) const = 0; [[nodiscard]] virtual int GetNumDevices() const = 0; [[nodiscard]] virtual uint32_t GetDeviceIdFromIndex(int index) const = 0; diff --git a/src/CSoundInput.cpp b/src/CSoundInput.cpp index 7f1c8b2..dd97e66 100644 --- a/src/CSoundInput.cpp +++ b/src/CSoundInput.cpp @@ -49,9 +49,9 @@ int CSoundInput::Read(void* data, size_t size) return 0; } -float CSoundInput::GetLevel() const +float CSoundInput::GetLevel(bool db) const { - return micLevel; + return db ? micLevelDb : micLevel; } void CSoundInput::SetStreamEnabled(bool enabled) @@ -143,11 +143,20 @@ AltVoiceError CSoundInput::SelectDeviceByUID(const char* uid) recordChannel = BASS_RecordStart(SAMPLE_RATE, AUDIO_CHANNELS, 0, OnSoundFrame, this); BASS_ChannelSetAttribute(recordChannel, BASS_ATTRIB_GRANULE, FRAME_SIZE_SAMPLES); + levelChannel = BASS_StreamCreate(SAMPLE_RATE, AUDIO_CHANNELS, BASS_STREAM_DECODE, STREAMPROC_PUSH, this); + // Change input volume - VolumeChangeFX = BASS_ChannelSetFX(recordChannel, BASS_FX_BFX_VOLUME, 0); + VolumeChangeFX = BASS_ChannelSetFX(levelChannel, BASS_FX_BFX_VOLUME, 0); const BASS_BFX_VOLUME VolumeChangeFXParams = { BASS_BFX_CHANALL, volume }; BASS_FXSetParameters(VolumeChangeFX, &VolumeChangeFXParams); - + + BASS_ChannelSetDSP(recordChannel, NoiseDSP, this, 2); //higher prio called first + BASS_ChannelSetDSP(recordChannel, NormalizeDSP, this, 1); + //BASS_ChannelSetDSP(recordChannel, RMSDSP, this, 0); //higher prio called first + + BASS_ChannelStart(levelChannel); + BASS_ChannelPlay(levelChannel, false); + if (!recordChannel) return AltVoiceError::StartStream; @@ -230,27 +239,24 @@ bool CSoundInput::IsNoiseSuppressionEnabled() const void CSoundInput::NoiseSuppressionProcess(void* buffer, DWORD length) { - if (noiseSuppressionEnabled) - { - const auto shortSamples = static_cast(buffer); + const auto shortSamples = static_cast(buffer); - // Convert the 16-bit integer samples to floating-point samples - for (int i = 0; i < FRAME_SIZE_SAMPLES; i++) - { - floatBuffer[i] = static_cast(shortSamples[i]); - } + // Convert the 16-bit integer samples to floating-point samples + for (int i = 0; i < FRAME_SIZE_SAMPLES; i++) + { + floatBuffer[i] = static_cast(shortSamples[i]); + } - // Pass the floating-point samples to the RNNoise function - for (int i = 0; i < FRAME_SIZE_SAMPLES; i += RNNoiseFrameSize) - { - rnnoise_process_frame(denoiser, floatBuffer + i, floatBuffer + i); - } + // Pass the floating-point samples to the RNNoise function + for (int i = 0; i < FRAME_SIZE_SAMPLES; i += RNNoiseFrameSize) + { + rnnoise_process_frame(denoiser, floatBuffer + i, floatBuffer + i); + } - // Convert the floating-point samples back to 16-bit integer samples - for (int i = 0; i < FRAME_SIZE_SAMPLES; i++) - { - shortSamples[i] = static_cast(floatBuffer[i]); - } + // Convert the floating-point samples back to 16-bit integer samples + for (int i = 0; i < FRAME_SIZE_SAMPLES; i++) + { + shortSamples[i] = static_cast(floatBuffer[i]); } } @@ -264,48 +270,82 @@ bool CSoundInput::IsNormalizationEnabled() const return normalizationEnabled; } -void CSoundInput::Normalize(short* buffer, DWORD length) +void CSoundInput::Normalize(void* buffer, DWORD length) { - if(normalizationEnabled) + short maxFrame = 0; + for (int i = 0; i < length; ++i) { - short maxFrame = 0; - for (int i = 0; i < length; ++i) - { - short s = abs(buffer[i]); - if (s > maxFrame) - maxFrame = s; - } + short s = abs(((short*)buffer)[i]); + if (s > maxFrame) + maxFrame = s; + } - if (normalizeMax == 0.f || maxFrame > normalizeMax || normalizeMax / maxFrame < 0.5) - normalizeMax = maxFrame; - else - normalizeMax = (normalizeMax * (NORMALIZE_FRAME_COUNT - 1) + maxFrame) / NORMALIZE_FRAME_COUNT; + if (normalizeMax == 0.f || maxFrame > normalizeMax || normalizeMax / maxFrame < 0.5) + normalizeMax = maxFrame; + else + normalizeMax = (normalizeMax * (NORMALIZE_FRAME_COUNT - 1) + maxFrame) / NORMALIZE_FRAME_COUNT; - if (normalizeMax <= 1.f) - return; + if (normalizeMax <= 1.f) + return; - float gain = MAXSHORT / normalizeMax / 2; - gain = std::fmin(gain, 10); + float gain = MAXSHORT / normalizeMax / 2; + gain = std::fmin(gain, 10); + + for (int i = 0; i < length; ++i) + ((short*)buffer)[i] *= gain; +} - for (int i = 0; i < length; ++i) - buffer[i] *= gain; +void CSoundInput::NormalizeDSP(HDSP handle, DWORD channel, void* buffer, DWORD length, void* user) +{ + const auto self = static_cast(user); + std::unique_lock lock{ self->inputMutex }; + + if (self->IsNormalizationEnabled()) + { + for (int i = 0; i < length; i += (FRAME_SIZE_SAMPLES * sizeof(short))) + { + self->Normalize((char*)buffer + i, FRAME_SIZE_SAMPLES); + } + } +} + +void CSoundInput::NoiseDSP(HDSP handle, DWORD channel, void* buffer, DWORD length, void* user) +{ + const auto self = static_cast(user); + std::unique_lock lock{ self->inputMutex }; + + if(self->IsNoiseSuppressionEnabled()) + { + for (int i = 0; i < length; i += (FRAME_SIZE_SAMPLES * sizeof(short))) + { + self->NoiseSuppressionProcess((char*)buffer + i, FRAME_SIZE_SAMPLES); + } } } void CSoundInput::SoundFrameCaptured(HRECORD handle, const void* buffer, DWORD length) { - // Create new buffer on stack because buffer was marked as const in API - memcpy_s(writableBuffer, FRAME_SIZE_SAMPLES * sizeof(short), buffer, length); + // Put available data in the level channel + BASS_StreamPutData(levelChannel, buffer, length * sizeof(short)); + + // Get current microphone noise level from level channel + const DWORD currentMicLevel = BASS_ChannelGetLevel(levelChannel); + + // Get left channel noise level from it (because it's mono so right = left) + const uint16_t leftChannelLevel = LOWORD(currentMicLevel); - // Apply normalization - Normalize(writableBuffer, FRAME_SIZE_SAMPLES); + // Convert to float from 0.f to 1.f + micLevel = static_cast(leftChannelLevel) / MaxShortFloatValue; - // Apply noise suppression - NoiseSuppressionProcess(writableBuffer, FRAME_SIZE_SAMPLES); + // Convert level to decibels + micLevelDb = (micLevel > 0 ? 20 * log10(micLevel) : -HUGE_VAL); + micLevelDb = std::clamp(micLevelDb, -100.f, 0.f); - // Get current microphone noise level - BASS_ChannelGetLevelEx(handle, &micLevel, 0.05, BASS_LEVEL_MONO | BASS_LEVEL_RMS | BASS_LEVEL_NOREMOVE); - micLevel = std::clamp(micLevel * 2.0f, 0.0f, 1.0f); + // Remove data from level channel (is there a better way to do it?) + BASS_ChannelGetData(levelChannel, writableBuffer, length * sizeof(short)); + + // Copy mic data to output buffer + memcpy_s(writableBuffer, FRAME_SIZE_SAMPLES * sizeof(short), buffer, length); if (VoiceCallback) { diff --git a/src/CSoundInput.h b/src/CSoundInput.h index 5cffb5e..e32e4dc 100644 --- a/src/CSoundInput.h +++ b/src/CSoundInput.h @@ -15,11 +15,13 @@ class CSoundInput : public ISoundInput static constexpr int NORMALIZE_FRAME_COUNT = 20; HRECORD recordChannel = 0; + HSTREAM levelChannel = 0; COpusEncoder* encoder = nullptr; int bitrate; float volume = 1.f; - float micLevel = 0; + float micLevel = 0.f; + float micLevelDb = 0.f; bool noiseSuppressionEnabled = false; bool normalizationEnabled = false; @@ -49,7 +51,7 @@ class CSoundInput : public ISoundInput void SetStreamEnabled(bool enabled) override; int Read(void* data, size_t size) override; - [[nodiscard]] float GetLevel() const override; + [[nodiscard]] float GetLevel(bool db) const override; [[nodiscard]] int GetNumDevices() const override; [[nodiscard]] uint32_t GetDeviceIdFromIndex(int index) const override; @@ -68,9 +70,11 @@ class CSoundInput : public ISoundInput void SetNormalizatonEnabled(bool enabled) override; [[nodiscard]] bool IsNormalizationEnabled() const override; - void Normalize(short* buffer, DWORD length); + void Normalize(void* buffer, DWORD length); void SoundFrameCaptured(HRECORD handle, const void* buffer, DWORD length); static BOOL OnSoundFrame(HRECORD handle, const void* buffer, DWORD length, void* user); + static void NormalizeDSP(HDSP handle, DWORD channel, void* buffer, DWORD length, void* user); + static void NoiseDSP(HDSP handle, DWORD channel, void* buffer, DWORD length, void* user); };