From 9d7da353bba0a9098b53014a6dd51127b631150d Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sat, 11 Jan 2025 21:12:30 +0800 Subject: [PATCH 01/15] Build Sonic as a DLL and add wrapper class for SonicStream --- nvdaHelper/archBuild_sconscript | 1 + nvdaHelper/sonic/sconscript | 24 ++++ nvdaHelper/sonic/sonic.def | 31 +++++ source/speech/__init__.py | 2 + source/synthDrivers/_sonic.py | 203 ++++++++++++++++++++++++++++++++ 5 files changed, 261 insertions(+) create mode 100644 nvdaHelper/sonic/sconscript create mode 100644 nvdaHelper/sonic/sonic.def create mode 100644 source/synthDrivers/_sonic.py diff --git a/nvdaHelper/archBuild_sconscript b/nvdaHelper/archBuild_sconscript index 6877b28425e..3fb5347b91c 100644 --- a/nvdaHelper/archBuild_sconscript +++ b/nvdaHelper/archBuild_sconscript @@ -240,3 +240,4 @@ if TARGET_ARCH in ("x86_64", "arm64"): if TARGET_ARCH == "x86": thirdPartyEnv.SConscript("espeak/sconscript") thirdPartyEnv.SConscript("liblouis/sconscript") + thirdPartyEnv.SConscript("sonic/sconscript") # build the Sonic library as a DLL diff --git a/nvdaHelper/sonic/sconscript b/nvdaHelper/sonic/sconscript new file mode 100644 index 00000000000..c562701f94b --- /dev/null +++ b/nvdaHelper/sonic/sconscript @@ -0,0 +1,24 @@ +Import( + [ + "thirdPartyEnv", + "sourceDir", + ] +) + +env = thirdPartyEnv.Clone() + +sonicSrcDir = Dir("#include/sonic") + +# Rename the .obj file so that this won't conflict with the static library build in eSpeak +sonicObj = env.Object("sonicDll", srcdir=sonicSrcDir.abspath, source="sonic.c") + +# Build sonic.dll with all functions exported +sonicLib = env.SharedLibrary( + target="sonic", + source=[ + sonicObj, + "sonic.def", + ], +) + +env.Install(sourceDir, sonicLib) diff --git a/nvdaHelper/sonic/sonic.def b/nvdaHelper/sonic/sonic.def new file mode 100644 index 00000000000..6063a219a19 --- /dev/null +++ b/nvdaHelper/sonic/sonic.def @@ -0,0 +1,31 @@ +EXPORTS + sonicCreateStream + sonicDestroyStream + sonicSetUserData + sonicGetUserData + sonicWriteFloatToStream + sonicWriteShortToStream + sonicWriteUnsignedCharToStream + sonicReadFloatFromStream + sonicReadShortFromStream + sonicReadUnsignedCharFromStream + sonicFlushStream + sonicSamplesAvailable + sonicGetSpeed + sonicSetSpeed + sonicGetPitch + sonicSetPitch + sonicGetRate + sonicSetRate + sonicGetVolume + sonicSetVolume + sonicGetChordPitch + sonicSetChordPitch + sonicGetQuality + sonicSetQuality + sonicGetSampleRate + sonicSetSampleRate + sonicGetNumChannels + sonicSetNumChannels + sonicChangeFloatSpeed + sonicChangeShortSpeed diff --git a/source/speech/__init__.py b/source/speech/__init__.py index 7090bd8ab50..937fc2f74c6 100644 --- a/source/speech/__init__.py +++ b/source/speech/__init__.py @@ -149,6 +149,7 @@ import config from .speech import initialize as speechInitialize from .sayAll import initialize as sayAllInitialize +from synthDrivers._sonic import initialize as sonicInitialize def initialize(): @@ -156,6 +157,7 @@ def initialize(): Initializes the state of speech and initializes the sayAllHandler """ synthDriverHandler.initialize() + sonicInitialize() synthDriverHandler.setSynth(config.conf["speech"]["synth"]) speechInitialize() sayAllInitialize( diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py new file mode 100644 index 00000000000..df659a9a304 --- /dev/null +++ b/source/synthDrivers/_sonic.py @@ -0,0 +1,203 @@ +from ctypes import Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll +import os +import globalVars + +sonicLib = None + + +class SonicStreamP(c_void_p): + pass + + +def initialize(): + """Initialize the Sonic DLL. + The sonic.dll file should be in the installation directory.""" + global sonicLib + sonicLib = cdll.LoadLibrary(os.path.join(globalVars.appDir, "sonic.dll")) + sonicLib.sonicCreateStream.restype = SonicStreamP + sonicLib.sonicCreateStream.argtypes = [c_int, c_int] + sonicLib.sonicDestroyStream.restype = None + sonicLib.sonicDestroyStream.argtypes = [SonicStreamP] + sonicLib.sonicWriteFloatToStream.restype = c_int + sonicLib.sonicWriteFloatToStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicWriteShortToStream.restype = c_int + sonicLib.sonicWriteShortToStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicWriteUnsignedCharToStream.restype = c_int + sonicLib.sonicWriteUnsignedCharToStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicReadFloatFromStream.restype = c_int + sonicLib.sonicReadFloatFromStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicReadShortFromStream.restype = c_int + sonicLib.sonicReadShortFromStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicReadUnsignedCharFromStream.restype = c_int + sonicLib.sonicReadUnsignedCharFromStream.argtypes = [SonicStreamP, c_void_p, c_int] + sonicLib.sonicFlushStream.restype = c_int + sonicLib.sonicFlushStream.argtypes = [SonicStreamP] + sonicLib.sonicSamplesAvailable.restype = c_int + sonicLib.sonicSamplesAvailable.argtypes = [SonicStreamP] + sonicLib.sonicGetSpeed.restype = c_float + sonicLib.sonicGetSpeed.argtypes = [SonicStreamP] + sonicLib.sonicSetSpeed.restype = None + sonicLib.sonicSetSpeed.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetPitch.restype = c_float + sonicLib.sonicGetPitch.argtypes = [SonicStreamP] + sonicLib.sonicSetPitch.restype = None + sonicLib.sonicSetPitch.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetRate.restype = c_float + sonicLib.sonicGetRate.argtypes = [SonicStreamP] + sonicLib.sonicSetRate.restype = None + sonicLib.sonicSetRate.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetVolume.restype = c_float + sonicLib.sonicGetVolume.argtypes = [SonicStreamP] + sonicLib.sonicSetVolume.restype = None + sonicLib.sonicSetVolume.argtypes = [SonicStreamP, c_float] + sonicLib.sonicGetQuality.restype = c_int + sonicLib.sonicGetQuality.argtypes = [SonicStreamP] + sonicLib.sonicSetQuality.restype = None + sonicLib.sonicSetQuality.argtypes = [SonicStreamP, c_int] + sonicLib.sonicGetSampleRate.restype = c_int + sonicLib.sonicGetSampleRate.argtypes = [SonicStreamP] + sonicLib.sonicSetSampleRate.restype = None + sonicLib.sonicSetSampleRate.argtypes = [SonicStreamP, c_int] + sonicLib.sonicGetNumChannels.restype = c_int + sonicLib.sonicGetNumChannels.argtypes = [SonicStreamP] + sonicLib.sonicSetNumChannels.restype = None + sonicLib.sonicSetNumChannels.argtypes = [SonicStreamP, c_int] + + +class SonicStream: + """ + Audio stream that wraps the Sonic library to process audio, + which is optimised for speeding up speech by high factors. + Audio data are stored internally as 16-bit integers. + """ + + def __init__(self, sampleRate: int, channels: int): + self.stream: SonicStreamP = sonicLib.sonicCreateStream(sampleRate, channels) + if not self.stream: + raise MemoryError() + + def __del__(self): + sonicLib.sonicDestroyStream(self.stream) + + def writeFloat(self, data: c_void_p, numSamples: int) -> None: + """Write 32-bit floating point data to be processed into the stream, + where each sample must be between -1 and 1. + :param data: A pointer to 32-bit floating point wave data. + :param numSamples: The number of samples. + Multiply this by channel count to get the total number of values. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicWriteFloatToStream(self.stream, data, numSamples): + raise MemoryError() + + def writeShort(self, data: c_void_p, numSamples: int) -> None: + """Write 16-bit integer data to be processed into the stream. + :param data: A pointer to 16-bit integer wave data. + :param numSamples: The number of samples. + Multiply this by channel count to get the total number of values. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicWriteShortToStream(self.stream, data, numSamples): + raise MemoryError() + + def writeUnsignedChar(self, data: c_void_p, numSamples: int) -> None: + """Write 8-bit unsigned integer data to be processed into the stream. + :param data: A pointer to 8-bit integer wave data. + :param numSamples: The number of samples. + Multiply this by channel count to get the total number of values. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicWriteUnsignedCharToStream(self.stream, data, numSamples): + raise MemoryError() + + def readFloat(self) -> Array[c_float]: + """Read processed data from the stream as 32-bit floating point data.""" + samples = self.samplesAvailable + arrayLength = samples * self.channels + buffer = (c_float * arrayLength)() + sonicLib.sonicReadShortFromStream(self.stream, buffer, samples) + return buffer + + def readShort(self) -> Array[c_short]: + """Read processed data from the stream as 16-bit integer data.""" + samples = self.samplesAvailable + arrayLength = samples * self.channels + buffer = (c_short * arrayLength)() + sonicLib.sonicReadShortFromStream(self.stream, buffer, samples) + return buffer + + def readUnsignedChar(self) -> Array[c_ubyte]: + """Read processed data from the stream as 8-bit unsigned integer data.""" + samples = self.samplesAvailable + arrayLength = samples * self.channels + buffer = (c_ubyte * arrayLength)() + sonicLib.sonicReadShortFromStream(self.stream, buffer, samples) + return buffer + + def flush(self) -> None: + """Force the sonic stream to generate output using whatever data it currently has. + No extra delay will be added to the output, but flushing in the middle of words could introduce distortion. + This is usually done when data writing is completed. + :raises MemoryError: If memory allocation failed.""" + if not sonicLib.sonicFlushStream(self.stream): + raise MemoryError() + + @property + def samplesAvailable(self) -> int: + return sonicLib.sonicSamplesAvailable(self.stream) + + @property + def speed(self) -> float: + return sonicLib.sonicGetSpeed(self.stream) + + @speed.setter + def speed(self, value: float): + sonicLib.sonicSetSpeed(self.stream, value) + + @property + def pitch(self) -> float: + return sonicLib.sonicGetPitch(self.stream) + + @pitch.setter + def pitch(self, value: float): + sonicLib.sonicSetPitch(self.stream, value) + + @property + def rate(self) -> float: + """This scales pitch and speed at the same time.""" + return sonicLib.sonicGetRate(self.stream) + + @rate.setter + def rate(self, value: float): + sonicLib.sonicSetRate(self.stream, value) + + @property + def volume(self) -> float: + """The scaling factor of the stream.""" + return sonicLib.sonicGetVolume(self.stream) + + @volume.setter + def volume(self, value: float): + sonicLib.sonicSetVolume(self.stream, value) + + @property + def quality(self) -> int: + """Default 0 is virtually as good as 1, but very much faster.""" + return sonicLib.sonicGetQuality(self.stream) + + @quality.setter + def quality(self, value: int): + sonicLib.sonicSetQuality(self.stream, value) + + @property + def sampleRate(self) -> int: + return sonicLib.sonicGetSampleRate(self.stream) + + @sampleRate.setter + def sampleRate(self, value: int): + sonicLib.sonicSetSampleRate(self.stream, value) + + @property + def channels(self) -> int: + return sonicLib.sonicGetNumChannels(self.stream) + + @channels.setter + def channels(self, value: int): + sonicLib.sonicSetNumChannels(self.stream, value) From 79b51b91ce8c1a7b363a47a21e82dcf16d5f2e92 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sat, 11 Jan 2025 22:23:45 +0800 Subject: [PATCH 02/15] Add rate boost support in SAPI5 --- source/synthDrivers/sapi5.py | 46 +++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index abc80fe5acc..17b3330b54f 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -32,6 +32,7 @@ PhonemeCommand, SpeechCommand, ) +from ._sonic import SonicStream class SpeechVoiceSpeakFlags(IntEnum): @@ -80,7 +81,9 @@ def ISequentialStream_RemoteWrite(self, pv: LP_c_ubyte, cb: int) -> int: return 0 if not synth.isSpeaking: return 0 - synth.player.feed(pv, cb) + synth.sonicStream.writeShort(pv, cb // 2 // synth.sonicStream.channels) + audioData = synth.sonicStream.readShort() + synth.player.feed(audioData, len(audioData) * 2) self._writtenBytes += cb return cb @@ -165,6 +168,10 @@ def Bookmark(self, streamNum: int, pos: int, bookmark: str, bookmarkId: int): def EndStream(self, streamNum: int, pos: int): synth = self.synthRef() + # Flush the stream and get the remaining data. + synth.sonicStream.flush() + audioData = synth.sonicStream.readShort() + synth.player.feed(audioData, len(audioData) * 2) synth.isSpeaking = False synth.player.idle() synthDoneSpeaking.notify(synth=synth) @@ -181,6 +188,7 @@ class SynthDriver(SynthDriver): supportedSettings = ( SynthDriver.VoiceSetting(), SynthDriver.RateSetting(), + SynthDriver.RateBoostSetting(), SynthDriver.PitchSetting(), SynthDriver.VolumeSetting(), ) @@ -217,8 +225,10 @@ def __init__(self, _defaultVoiceToken=None): @type _defaultVoiceToken: ISpeechObjectToken """ self._pitch = 50 + self._rate = 50 self.player = None self.isSpeaking = False + self._rateBoost = False self._initTts(_defaultVoiceToken) def terminate(self): @@ -250,7 +260,10 @@ def _getVoiceTokens(self): return self.tts.getVoices() def _get_rate(self): - return (self.tts.rate * 5) + 50 + return self._rate + + def _get_rateBoost(self): + return self._rateBoost def _get_pitch(self): return self._pitch @@ -268,11 +281,32 @@ def _get_lastIndex(self): else: return None + @classmethod + def _percentToParam(self, percent, min, max): + """Overrides SynthDriver._percentToParam to return floating point parameter values.""" + return float(percent) / 100 * (max - min) + min + def _percentToRate(self, percent): return (percent - 50) // 5 def _set_rate(self, rate): - self.tts.Rate = self._percentToRate(rate) + self._rate = rate + if self._rateBoost: + # When rate boost is enabled, use sonicStream to change the speed. + # Supports 0.5x~6x speed. + self.tts.Rate = 0 + self.sonicStream.speed = self._percentToParam(rate, 0.5, 6.0) + else: + # When rate boost is disabled, let the voice itself change the speed. + self.tts.Rate = self._percentToRate(rate) + self.sonicStream.speed = 1 + + def _set_rateBoost(self, enable): + if enable == self._rateBoost: + return + rate = self._rate + self._rateBoost = enable + self.rate = rate def _set_pitch(self, value): # pitch is really controled with xml around speak commands @@ -293,6 +327,11 @@ def _initTts(self, voice=None): self.tts.AudioOutput = self.tts.AudioOutput # Reset the audio and its format parameters fmt = self.tts.AudioOutputStream.Format wfx = fmt.GetWaveFormatEx() + # Force the wave format to be 16-bit integer (which Sonic uses internally). + # SAPI will convert the format for us if it isn't supported by the voice. + wfx.FormatTag = nvwave.WAVE_FORMAT_PCM + wfx.BitsPerSample = 16 + fmt.SetWaveFormatEx(wfx) if self.player: self.player.close() self.player = nvwave.WavePlayer( @@ -307,6 +346,7 @@ def _initTts(self, voice=None): customStream.BaseStream = audioStream customStream.Format = fmt self.tts.AudioOutputStream = customStream + self.sonicStream = SonicStream(wfx.SamplesPerSec, wfx.Channels) # Set event notify sink self.tts.EventInterests = ( From 6c431984fad1f162cc358a607fbd1f1cec9b0843 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sun, 12 Jan 2025 20:46:14 +0800 Subject: [PATCH 03/15] Add changelog entry --- user_docs/en/changes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index a1230c6b729..02c61c7c89d 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -27,6 +27,7 @@ To use this feature, "allow NVDA to control the volume of other applications" mu * Automatic language switching is now supported when using Microsoft Speech API version 5 (SAPI5) and Microsoft Speech Platform voices. (#17146, @gexgd0419) * NVDA can now be configured to speak the current line or paragraph when navigating with braille navigation keys. (#17053, @nvdaes) * In Word, the selection update is now reported when using Word commands to extend or reduce the selection (`f8` or `shift+f8`). (#3293, @CyrilleB79) +* Rate boost is now supported when using Microsoft Speech API version 5 (SAPI5) and Microsoft Speech Platform voices, which supports up to 6X speed. (#17606, @gexgd0419) ### Changes From d141591b45037de869d0f6a21161fab6e15df80d Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:06:34 +0800 Subject: [PATCH 04/15] Make eSpeak dynamically link to Sonic --- nvdaHelper/archBuild_sconscript | 1 - nvdaHelper/espeak/sconscript | 12 ++++++++---- nvdaHelper/{sonic => espeak}/sonic.def | 0 nvdaHelper/sonic/sconscript | 24 ------------------------ source/synthDrivers/_sonic.py | 2 +- 5 files changed, 9 insertions(+), 30 deletions(-) rename nvdaHelper/{sonic => espeak}/sonic.def (100%) delete mode 100644 nvdaHelper/sonic/sconscript diff --git a/nvdaHelper/archBuild_sconscript b/nvdaHelper/archBuild_sconscript index 3fb5347b91c..6877b28425e 100644 --- a/nvdaHelper/archBuild_sconscript +++ b/nvdaHelper/archBuild_sconscript @@ -240,4 +240,3 @@ if TARGET_ARCH in ("x86_64", "arm64"): if TARGET_ARCH == "x86": thirdPartyEnv.SConscript("espeak/sconscript") thirdPartyEnv.SConscript("liblouis/sconscript") - thirdPartyEnv.SConscript("sonic/sconscript") # build the Sonic library as a DLL diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index 18bcd308f63..d280ac2bc47 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -1034,10 +1034,13 @@ def espeak_compileDict_buildAction( return ACTION_SUCCESS -sonicLib = env.StaticLibrary( +sonicLib = env.SharedLibrary( target="sonic", srcdir=sonicSrcDir.abspath, - source="sonic.c", + source=[ + "sonic.c", + Dir(".").File("sonic.def"), + ] ) espeakLib = env.SharedLibrary( @@ -1082,7 +1085,6 @@ espeakLib = env.SharedLibrary( "tr_languages.c", "voices.c", "wavegen.c", - sonicLib, # espeak OPT_SPEECHPLAYER block "sPlayer.c", "../speechPlayer/src/frame.cpp", @@ -1100,7 +1102,8 @@ espeakLib = env.SharedLibrary( # com\ttsengine.cpp # We do not use the ASYNC compile option in espeak. ], - LIBS=["advapi32"], + LIBS=["advapi32", "sonic"], + LIBPATH='.', ) @@ -1151,6 +1154,7 @@ for dictFileName, (langCode, inputFiles) in espeakDictionaryCompileList.items(): ) env.Install(synthDriversDir, espeakLib) +env.Install(synthDriversDir, sonicLib) # install espeak-ng-data targetEspeakDataDir = synthDriversDir.Dir("espeak-ng-data") diff --git a/nvdaHelper/sonic/sonic.def b/nvdaHelper/espeak/sonic.def similarity index 100% rename from nvdaHelper/sonic/sonic.def rename to nvdaHelper/espeak/sonic.def diff --git a/nvdaHelper/sonic/sconscript b/nvdaHelper/sonic/sconscript deleted file mode 100644 index c562701f94b..00000000000 --- a/nvdaHelper/sonic/sconscript +++ /dev/null @@ -1,24 +0,0 @@ -Import( - [ - "thirdPartyEnv", - "sourceDir", - ] -) - -env = thirdPartyEnv.Clone() - -sonicSrcDir = Dir("#include/sonic") - -# Rename the .obj file so that this won't conflict with the static library build in eSpeak -sonicObj = env.Object("sonicDll", srcdir=sonicSrcDir.abspath, source="sonic.c") - -# Build sonic.dll with all functions exported -sonicLib = env.SharedLibrary( - target="sonic", - source=[ - sonicObj, - "sonic.def", - ], -) - -env.Install(sourceDir, sonicLib) diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index df659a9a304..fc87ae35ef0 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -13,7 +13,7 @@ def initialize(): """Initialize the Sonic DLL. The sonic.dll file should be in the installation directory.""" global sonicLib - sonicLib = cdll.LoadLibrary(os.path.join(globalVars.appDir, "sonic.dll")) + sonicLib = cdll.LoadLibrary(os.path.join(globalVars.appDir, "synthDrivers", "sonic.dll")) sonicLib.sonicCreateStream.restype = SonicStreamP sonicLib.sonicCreateStream.argtypes = [c_int, c_int] sonicLib.sonicDestroyStream.restype = None From 7b1c22fe80458da2ee51babaa34a8e779f8f95e4 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:20:13 +0800 Subject: [PATCH 05/15] Change sonic.h to include __declspec(dllimport) --- nvdaHelper/espeak/sconscript | 1 + nvdaHelper/espeak/sonic.h | 306 +++++++++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+) create mode 100644 nvdaHelper/espeak/sonic.h diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index d280ac2bc47..96209139cbe 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -111,6 +111,7 @@ env.Append( "/DUSE_SPEECHPLAYER=1", "/DUSE_KLATT=1", "/DUSE_LIBSONIC=1", + "/DSONIC_DLL", ] ) diff --git a/nvdaHelper/espeak/sonic.h b/nvdaHelper/espeak/sonic.h new file mode 100644 index 00000000000..b44c3f694ca --- /dev/null +++ b/nvdaHelper/espeak/sonic.h @@ -0,0 +1,306 @@ +// This is a modified version of the Sonic Library's header, +// which added __declspec(dllimport) to each function +// if SONIC_DLL is defined. +// Used when compiling eSpeak dynamically linked to Sonic. + +#ifndef SONIC_H_ +#define SONIC_H_ + +/* Sonic library + Copyright 2010 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* +The Sonic Library implements a new algorithm invented by Bill Cox for the +specific purpose of speeding up speech by high factors at high quality. It +generates smooth speech at speed up factors as high as 6X, possibly more. It is +also capable of slowing down speech, and generates high quality results +regardless of the speed up or slow down factor. For speeding up speech by 2X or +more, the following equation is used: + + newSamples = period/(speed - 1.0) + scale = 1.0/newSamples; + +where period is the current pitch period, determined using AMDF or any other +pitch estimator, and speed is the speedup factor. If the current position in +the input stream is pointed to by "samples", and the current output stream +position is pointed to by "out", then newSamples number of samples can be +generated with: + + out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; + +where t = 0 to newSamples - 1. + +For speed factors < 2X, the PICOLA algorithm is used. The above +algorithm is first used to double the speed of one pitch period. Then, enough +input is directly copied from the input to the output to achieve the desired +speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived: + + speed = (2*period + length)/(period + length) + speed*length + speed*period = 2*period + length + length(speed - 1) = 2*period - speed*period + length = period*(2 - speed)/(speed - 1) + +For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into +the output twice, and length of input is copied from the input to the output +until the output desired speed is reached. The length of data copied is: + + length = period*(speed - 0.5)/(1 - speed) + +For slow down factors below 0.5, no data is copied, and an algorithm +similar to high speed factors is used. +*/ + +/* Uncomment this to use sin-wav based overlap add which in theory can improve + sound quality slightly, at the expense of lots of floating point math. */ +/* #define SONIC_USE_SIN */ + +#ifdef SONIC_DLL +#ifdef SONIC_EXPORT +#define SONIC_API __declspec(dllexport) +#else +#define SONIC_API __declspec(dllimport) +#endif /* SONIC_EXPORT */ +#else +#define SONIC_API +#endif /* SONIC_DLL */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef SONIC_INTERNAL +/* The following #define's are used to change the names of the routines defined + * here so that a new library (i.e. speedy) can reuse these names, and then call + * the original names. We do this for two reasons: 1) we don't want to change + * the original API, and 2) we want to add a shim, using the original names and + * still call these routines. + * + * Original users of this API and the libsonic library need to do nothing. The + * original behavior remains. + * + * A new user that add some additional functionality above this library (a shim) + * should #define SONIC_INTERNAL before including this file, undefine all these + * symbols and call the sonicIntXXX functions directly. + */ +#define sonicCreateStream sonicIntCreateStream +#define sonicDestroyStream sonicIntDestroyStream +#define sonicWriteFloatToStream sonicIntWriteFloatToStream +#define sonicWriteShortToStream sonicIntWriteShortToStream +#define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream +#define sonicReadFloatFromStream sonicIntReadFloatFromStream +#define sonicReadShortFromStream sonicIntReadShortFromStream +#define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream +#define sonicFlushStream sonicIntFlushStream +#define sonicSamplesAvailable sonicIntSamplesAvailable +#define sonicGetSpeed sonicIntGetSpeed +#define sonicSetSpeed sonicIntSetSpeed +#define sonicGetPitch sonicIntGetPitch +#define sonicSetPitch sonicIntSetPitch +#define sonicGetRate sonicIntGetRate +#define sonicSetRate sonicIntSetRate +#define sonicGetVolume sonicIntGetVolume +#define sonicSetVolume sonicIntSetVolume +#define sonicGetQuality sonicIntGetQuality +#define sonicSetQuality sonicIntSetQuality +#define sonicGetSampleRate sonicIntGetSampleRate +#define sonicSetSampleRate sonicIntSetSampleRate +#define sonicGetNumChannels sonicIntGetNumChannels +#define sonicGetUserData sonicIntGetUserData +#define sonicSetUserData sonicIntSetUserData +#define sonicSetNumChannels sonicIntSetNumChannels +#define sonicChangeFloatSpeed sonicIntChangeFloatSpeed +#define sonicChangeShortSpeed sonicIntChangeShortSpeed +#define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup +#define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength +#define sonicComputeSpectrogram sonicIntComputeSpectrogram +#define sonicGetSpectrogram sonicIntGetSpectrogram + +#endif /* SONIC_INTERNAL */ + +/* This specifies the range of voice pitches we try to match. + Note that if we go lower than 65, we could overflow in findPitchInRange */ +#ifndef SONIC_MIN_PITCH +#define SONIC_MIN_PITCH 65 +#endif /* SONIC_MIN_PITCH */ +#ifndef SONIC_MAX_PITCH +#define SONIC_MAX_PITCH 400 +#endif /* SONIC_MAX_PITCH */ + +/* These are used to down-sample some inputs to improve speed */ +#define SONIC_AMDF_FREQ 4000 + +struct sonicStreamStruct; +typedef struct sonicStreamStruct* sonicStream; + +/* For all of the following functions, numChannels is multiplied by numSamples + to determine the actual number of values read or returned. */ + +/* Create a sonic stream. Return NULL only if we are out of memory and cannot + allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ +SONIC_API sonicStream sonicCreateStream(int sampleRate, int numChannels); +/* Destroy the sonic stream. */ +SONIC_API void sonicDestroyStream(sonicStream stream); +/* Attach user data to the stream. */ +SONIC_API void sonicSetUserData(sonicStream stream, void *userData); +/* Retrieve user data attached to the stream. */ +SONIC_API void *sonicGetUserData(sonicStream stream); +/* Use this to write floating point data to be speed up or down into the stream. + Values must be between -1 and 1. Return 0 if memory realloc failed, + otherwise 1 */ +SONIC_API int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples); +/* Use this to write 16-bit data to be speed up or down into the stream. + Return 0 if memory realloc failed, otherwise 1 */ +SONIC_API int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples); +/* Use this to write 8-bit unsigned data to be speed up or down into the stream. + Return 0 if memory realloc failed, otherwise 1 */ +SONIC_API int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples, + int numSamples); +/* Use this to read floating point data out of the stream. Sometimes no data + will be available, and zero is returned, which is not an error condition. */ +SONIC_API int sonicReadFloatFromStream(sonicStream stream, float* samples, + int maxSamples); +/* Use this to read 16-bit data out of the stream. Sometimes no data will + be available, and zero is returned, which is not an error condition. */ +SONIC_API int sonicReadShortFromStream(sonicStream stream, short* samples, + int maxSamples); +/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data + will be available, and zero is returned, which is not an error condition. */ +SONIC_API int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, + int maxSamples); +/* Force the sonic stream to generate output using whatever data it currently + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ +SONIC_API int sonicFlushStream(sonicStream stream); +/* Return the number of samples in the output buffer */ +SONIC_API int sonicSamplesAvailable(sonicStream stream); +/* Get the speed of the stream. */ +SONIC_API float sonicGetSpeed(sonicStream stream); +/* Set the speed of the stream. */ +SONIC_API void sonicSetSpeed(sonicStream stream, float speed); +/* Get the pitch of the stream. */ +SONIC_API float sonicGetPitch(sonicStream stream); +/* Set the pitch of the stream. */ +SONIC_API void sonicSetPitch(sonicStream stream, float pitch); +/* Get the rate of the stream. */ +SONIC_API float sonicGetRate(sonicStream stream); +/* Set the rate of the stream. */ +SONIC_API void sonicSetRate(sonicStream stream, float rate); +/* Get the scaling factor of the stream. */ +SONIC_API float sonicGetVolume(sonicStream stream); +/* Set the scaling factor of the stream. */ +SONIC_API void sonicSetVolume(sonicStream stream, float volume); +/* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These + functions still exist to avoid breaking existing code. */ +/* Get the chord pitch setting. */ +SONIC_API int sonicGetChordPitch(sonicStream stream); +/* Set chord pitch mode on or off. Default is off. See the documentation + page for a description of this feature. */ +SONIC_API void sonicSetChordPitch(sonicStream stream, int useChordPitch); +/* Get the quality setting. */ +SONIC_API int sonicGetQuality(sonicStream stream); +/* Set the "quality". Default 0 is virtually as good as 1, but very much + * faster. */ +SONIC_API void sonicSetQuality(sonicStream stream, int quality); +/* Get the sample rate of the stream. */ +SONIC_API int sonicGetSampleRate(sonicStream stream); +/* Set the sample rate of the stream. This will drop any samples that have not + * been read. */ +SONIC_API void sonicSetSampleRate(sonicStream stream, int sampleRate); +/* Get the number of channels. */ +SONIC_API int sonicGetNumChannels(sonicStream stream); +/* Set the number of channels. This will drop any samples that have not been + * read. */ +SONIC_API void sonicSetNumChannels(sonicStream stream, int numChannels); +/* This is a non-stream oriented interface to just change the speed of a sound + sample. It works in-place on the sample array, so there must be at least + speed*numSamples available space in the array. Returns the new number of + samples. */ +SONIC_API int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, + float pitch, float rate, float volume, + int useChordPitch, int sampleRate, int numChannels); +/* This is a non-stream oriented interface to just change the speed of a sound + sample. It works in-place on the sample array, so there must be at least + speed*numSamples available space in the array. Returns the new number of + samples. */ +SONIC_API int sonicChangeShortSpeed(short* samples, int numSamples, float speed, + float pitch, float rate, float volume, + int useChordPitch, int sampleRate, int numChannels); + +#ifdef SONIC_SPECTROGRAM +/* +This code generates high quality spectrograms from sound samples, using +Time-Aliased-FFTs as described at: + + https://github.com/waywardgeek/spectrogram + +Basically, two adjacent pitch periods are overlap-added to create a sound +sample that accurately represents the speech sound at that moment in time. +This set of samples is converted to a spetral line using an FFT, and the result +is saved as a single spectral line at that moment in time. The resulting +spectral lines vary in resolution (it is equal to the number of samples in the +pitch period), and the spacing of spectral lines also varies (proportional to +the numver of samples in the pitch period). + +To generate a bitmap, linear interpolation is used to render the grayscale +value at any particular point in time and frequency. +*/ + +#define SONIC_MAX_SPECTRUM_FREQ 5000 + +struct sonicSpectrogramStruct; +struct sonicBitmapStruct; +typedef struct sonicSpectrogramStruct* sonicSpectrogram; +typedef struct sonicBitmapStruct* sonicBitmap; + +/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each + pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size. + Rows are indexed top to bottom and columns are indexed left to right */ +struct sonicBitmapStruct { + unsigned char* data; + int numRows; + int numCols; +}; + +typedef struct sonicBitmapStruct* sonicBitmap; + +/* Enable coomputation of a spectrogram on the fly. */ +SONIC_API void sonicComputeSpectrogram(sonicStream stream); + +/* Get the spectrogram. */ +SONIC_API sonicSpectrogram sonicGetSpectrogram(sonicStream stream); + +/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram + has been called. */ +SONIC_API sonicSpectrogram sonicCreateSpectrogram(int sampleRate); + +/* Destroy the spectrotram. This is called automatically when calling + sonicDestroyStream. */ +SONIC_API void sonicDestroySpectrogram(sonicSpectrogram spectrogram); + +/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */ +SONIC_API sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, + int numRows, int numCols); + +/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */ +SONIC_API void sonicDestroyBitmap(sonicBitmap bitmap); + +SONIC_API int sonicWritePGM(sonicBitmap bitmap, char* fileName); + +/* Add two pitch periods worth of samples to the spectrogram. There must be + 2*period samples. Time should advance one pitch period for each call to + this function. */ +SONIC_API void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, + short* samples, int numSamples, + int numChannels); +#endif /* SONIC_SPECTROGRAM */ + +#ifdef __cplusplus +} +#endif + +#endif /* SONIC_H_ */ From 91f0031cde5016feb4f8ad5ac8a2227ec214359d Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:49:10 +0800 Subject: [PATCH 06/15] Lint fix --- nvdaHelper/espeak/sconscript | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index 96209139cbe..92a47b4b1be 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -1041,7 +1041,7 @@ sonicLib = env.SharedLibrary( source=[ "sonic.c", Dir(".").File("sonic.def"), - ] + ], ) espeakLib = env.SharedLibrary( @@ -1104,7 +1104,7 @@ espeakLib = env.SharedLibrary( # We do not use the ASYNC compile option in espeak. ], LIBS=["advapi32", "sonic"], - LIBPATH='.', + LIBPATH=".", ) From 973bc7a9853dc3e4407c4ee02aac2a868333ab30 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Fri, 17 Jan 2025 21:05:23 +0800 Subject: [PATCH 07/15] Revert "Change sonic.h to include __declspec(dllimport)" This reverts commit 7b1c22fe80458da2ee51babaa34a8e779f8f95e4. --- nvdaHelper/espeak/sconscript | 1 - nvdaHelper/espeak/sonic.h | 306 ----------------------------------- 2 files changed, 307 deletions(-) delete mode 100644 nvdaHelper/espeak/sonic.h diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index 92a47b4b1be..b26db67d691 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -111,7 +111,6 @@ env.Append( "/DUSE_SPEECHPLAYER=1", "/DUSE_KLATT=1", "/DUSE_LIBSONIC=1", - "/DSONIC_DLL", ] ) diff --git a/nvdaHelper/espeak/sonic.h b/nvdaHelper/espeak/sonic.h deleted file mode 100644 index b44c3f694ca..00000000000 --- a/nvdaHelper/espeak/sonic.h +++ /dev/null @@ -1,306 +0,0 @@ -// This is a modified version of the Sonic Library's header, -// which added __declspec(dllimport) to each function -// if SONIC_DLL is defined. -// Used when compiling eSpeak dynamically linked to Sonic. - -#ifndef SONIC_H_ -#define SONIC_H_ - -/* Sonic library - Copyright 2010 - Bill Cox - This file is part of the Sonic Library. - - This file is licensed under the Apache 2.0 license. -*/ - -/* -The Sonic Library implements a new algorithm invented by Bill Cox for the -specific purpose of speeding up speech by high factors at high quality. It -generates smooth speech at speed up factors as high as 6X, possibly more. It is -also capable of slowing down speech, and generates high quality results -regardless of the speed up or slow down factor. For speeding up speech by 2X or -more, the following equation is used: - - newSamples = period/(speed - 1.0) - scale = 1.0/newSamples; - -where period is the current pitch period, determined using AMDF or any other -pitch estimator, and speed is the speedup factor. If the current position in -the input stream is pointed to by "samples", and the current output stream -position is pointed to by "out", then newSamples number of samples can be -generated with: - - out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; - -where t = 0 to newSamples - 1. - -For speed factors < 2X, the PICOLA algorithm is used. The above -algorithm is first used to double the speed of one pitch period. Then, enough -input is directly copied from the input to the output to achieve the desired -speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived: - - speed = (2*period + length)/(period + length) - speed*length + speed*period = 2*period + length - length(speed - 1) = 2*period - speed*period - length = period*(2 - speed)/(speed - 1) - -For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into -the output twice, and length of input is copied from the input to the output -until the output desired speed is reached. The length of data copied is: - - length = period*(speed - 0.5)/(1 - speed) - -For slow down factors below 0.5, no data is copied, and an algorithm -similar to high speed factors is used. -*/ - -/* Uncomment this to use sin-wav based overlap add which in theory can improve - sound quality slightly, at the expense of lots of floating point math. */ -/* #define SONIC_USE_SIN */ - -#ifdef SONIC_DLL -#ifdef SONIC_EXPORT -#define SONIC_API __declspec(dllexport) -#else -#define SONIC_API __declspec(dllimport) -#endif /* SONIC_EXPORT */ -#else -#define SONIC_API -#endif /* SONIC_DLL */ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef SONIC_INTERNAL -/* The following #define's are used to change the names of the routines defined - * here so that a new library (i.e. speedy) can reuse these names, and then call - * the original names. We do this for two reasons: 1) we don't want to change - * the original API, and 2) we want to add a shim, using the original names and - * still call these routines. - * - * Original users of this API and the libsonic library need to do nothing. The - * original behavior remains. - * - * A new user that add some additional functionality above this library (a shim) - * should #define SONIC_INTERNAL before including this file, undefine all these - * symbols and call the sonicIntXXX functions directly. - */ -#define sonicCreateStream sonicIntCreateStream -#define sonicDestroyStream sonicIntDestroyStream -#define sonicWriteFloatToStream sonicIntWriteFloatToStream -#define sonicWriteShortToStream sonicIntWriteShortToStream -#define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream -#define sonicReadFloatFromStream sonicIntReadFloatFromStream -#define sonicReadShortFromStream sonicIntReadShortFromStream -#define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream -#define sonicFlushStream sonicIntFlushStream -#define sonicSamplesAvailable sonicIntSamplesAvailable -#define sonicGetSpeed sonicIntGetSpeed -#define sonicSetSpeed sonicIntSetSpeed -#define sonicGetPitch sonicIntGetPitch -#define sonicSetPitch sonicIntSetPitch -#define sonicGetRate sonicIntGetRate -#define sonicSetRate sonicIntSetRate -#define sonicGetVolume sonicIntGetVolume -#define sonicSetVolume sonicIntSetVolume -#define sonicGetQuality sonicIntGetQuality -#define sonicSetQuality sonicIntSetQuality -#define sonicGetSampleRate sonicIntGetSampleRate -#define sonicSetSampleRate sonicIntSetSampleRate -#define sonicGetNumChannels sonicIntGetNumChannels -#define sonicGetUserData sonicIntGetUserData -#define sonicSetUserData sonicIntSetUserData -#define sonicSetNumChannels sonicIntSetNumChannels -#define sonicChangeFloatSpeed sonicIntChangeFloatSpeed -#define sonicChangeShortSpeed sonicIntChangeShortSpeed -#define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup -#define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength -#define sonicComputeSpectrogram sonicIntComputeSpectrogram -#define sonicGetSpectrogram sonicIntGetSpectrogram - -#endif /* SONIC_INTERNAL */ - -/* This specifies the range of voice pitches we try to match. - Note that if we go lower than 65, we could overflow in findPitchInRange */ -#ifndef SONIC_MIN_PITCH -#define SONIC_MIN_PITCH 65 -#endif /* SONIC_MIN_PITCH */ -#ifndef SONIC_MAX_PITCH -#define SONIC_MAX_PITCH 400 -#endif /* SONIC_MAX_PITCH */ - -/* These are used to down-sample some inputs to improve speed */ -#define SONIC_AMDF_FREQ 4000 - -struct sonicStreamStruct; -typedef struct sonicStreamStruct* sonicStream; - -/* For all of the following functions, numChannels is multiplied by numSamples - to determine the actual number of values read or returned. */ - -/* Create a sonic stream. Return NULL only if we are out of memory and cannot - allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ -SONIC_API sonicStream sonicCreateStream(int sampleRate, int numChannels); -/* Destroy the sonic stream. */ -SONIC_API void sonicDestroyStream(sonicStream stream); -/* Attach user data to the stream. */ -SONIC_API void sonicSetUserData(sonicStream stream, void *userData); -/* Retrieve user data attached to the stream. */ -SONIC_API void *sonicGetUserData(sonicStream stream); -/* Use this to write floating point data to be speed up or down into the stream. - Values must be between -1 and 1. Return 0 if memory realloc failed, - otherwise 1 */ -SONIC_API int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples); -/* Use this to write 16-bit data to be speed up or down into the stream. - Return 0 if memory realloc failed, otherwise 1 */ -SONIC_API int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples); -/* Use this to write 8-bit unsigned data to be speed up or down into the stream. - Return 0 if memory realloc failed, otherwise 1 */ -SONIC_API int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples, - int numSamples); -/* Use this to read floating point data out of the stream. Sometimes no data - will be available, and zero is returned, which is not an error condition. */ -SONIC_API int sonicReadFloatFromStream(sonicStream stream, float* samples, - int maxSamples); -/* Use this to read 16-bit data out of the stream. Sometimes no data will - be available, and zero is returned, which is not an error condition. */ -SONIC_API int sonicReadShortFromStream(sonicStream stream, short* samples, - int maxSamples); -/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data - will be available, and zero is returned, which is not an error condition. */ -SONIC_API int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, - int maxSamples); -/* Force the sonic stream to generate output using whatever data it currently - has. No extra delay will be added to the output, but flushing in the middle - of words could introduce distortion. */ -SONIC_API int sonicFlushStream(sonicStream stream); -/* Return the number of samples in the output buffer */ -SONIC_API int sonicSamplesAvailable(sonicStream stream); -/* Get the speed of the stream. */ -SONIC_API float sonicGetSpeed(sonicStream stream); -/* Set the speed of the stream. */ -SONIC_API void sonicSetSpeed(sonicStream stream, float speed); -/* Get the pitch of the stream. */ -SONIC_API float sonicGetPitch(sonicStream stream); -/* Set the pitch of the stream. */ -SONIC_API void sonicSetPitch(sonicStream stream, float pitch); -/* Get the rate of the stream. */ -SONIC_API float sonicGetRate(sonicStream stream); -/* Set the rate of the stream. */ -SONIC_API void sonicSetRate(sonicStream stream, float rate); -/* Get the scaling factor of the stream. */ -SONIC_API float sonicGetVolume(sonicStream stream); -/* Set the scaling factor of the stream. */ -SONIC_API void sonicSetVolume(sonicStream stream, float volume); -/* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These - functions still exist to avoid breaking existing code. */ -/* Get the chord pitch setting. */ -SONIC_API int sonicGetChordPitch(sonicStream stream); -/* Set chord pitch mode on or off. Default is off. See the documentation - page for a description of this feature. */ -SONIC_API void sonicSetChordPitch(sonicStream stream, int useChordPitch); -/* Get the quality setting. */ -SONIC_API int sonicGetQuality(sonicStream stream); -/* Set the "quality". Default 0 is virtually as good as 1, but very much - * faster. */ -SONIC_API void sonicSetQuality(sonicStream stream, int quality); -/* Get the sample rate of the stream. */ -SONIC_API int sonicGetSampleRate(sonicStream stream); -/* Set the sample rate of the stream. This will drop any samples that have not - * been read. */ -SONIC_API void sonicSetSampleRate(sonicStream stream, int sampleRate); -/* Get the number of channels. */ -SONIC_API int sonicGetNumChannels(sonicStream stream); -/* Set the number of channels. This will drop any samples that have not been - * read. */ -SONIC_API void sonicSetNumChannels(sonicStream stream, int numChannels); -/* This is a non-stream oriented interface to just change the speed of a sound - sample. It works in-place on the sample array, so there must be at least - speed*numSamples available space in the array. Returns the new number of - samples. */ -SONIC_API int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, - float pitch, float rate, float volume, - int useChordPitch, int sampleRate, int numChannels); -/* This is a non-stream oriented interface to just change the speed of a sound - sample. It works in-place on the sample array, so there must be at least - speed*numSamples available space in the array. Returns the new number of - samples. */ -SONIC_API int sonicChangeShortSpeed(short* samples, int numSamples, float speed, - float pitch, float rate, float volume, - int useChordPitch, int sampleRate, int numChannels); - -#ifdef SONIC_SPECTROGRAM -/* -This code generates high quality spectrograms from sound samples, using -Time-Aliased-FFTs as described at: - - https://github.com/waywardgeek/spectrogram - -Basically, two adjacent pitch periods are overlap-added to create a sound -sample that accurately represents the speech sound at that moment in time. -This set of samples is converted to a spetral line using an FFT, and the result -is saved as a single spectral line at that moment in time. The resulting -spectral lines vary in resolution (it is equal to the number of samples in the -pitch period), and the spacing of spectral lines also varies (proportional to -the numver of samples in the pitch period). - -To generate a bitmap, linear interpolation is used to render the grayscale -value at any particular point in time and frequency. -*/ - -#define SONIC_MAX_SPECTRUM_FREQ 5000 - -struct sonicSpectrogramStruct; -struct sonicBitmapStruct; -typedef struct sonicSpectrogramStruct* sonicSpectrogram; -typedef struct sonicBitmapStruct* sonicBitmap; - -/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each - pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size. - Rows are indexed top to bottom and columns are indexed left to right */ -struct sonicBitmapStruct { - unsigned char* data; - int numRows; - int numCols; -}; - -typedef struct sonicBitmapStruct* sonicBitmap; - -/* Enable coomputation of a spectrogram on the fly. */ -SONIC_API void sonicComputeSpectrogram(sonicStream stream); - -/* Get the spectrogram. */ -SONIC_API sonicSpectrogram sonicGetSpectrogram(sonicStream stream); - -/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram - has been called. */ -SONIC_API sonicSpectrogram sonicCreateSpectrogram(int sampleRate); - -/* Destroy the spectrotram. This is called automatically when calling - sonicDestroyStream. */ -SONIC_API void sonicDestroySpectrogram(sonicSpectrogram spectrogram); - -/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */ -SONIC_API sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, - int numRows, int numCols); - -/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */ -SONIC_API void sonicDestroyBitmap(sonicBitmap bitmap); - -SONIC_API int sonicWritePGM(sonicBitmap bitmap, char* fileName); - -/* Add two pitch periods worth of samples to the spectrogram. There must be - 2*period samples. Time should advance one pitch period for each call to - this function. */ -SONIC_API void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, - short* samples, int numSamples, - int numChannels); -#endif /* SONIC_SPECTROGRAM */ - -#ifdef __cplusplus -} -#endif - -#endif /* SONIC_H_ */ From 61bbf7b29fe8c92cabe765619215730a29680e8a Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:57:28 +0800 Subject: [PATCH 08/15] Apply suggestions from code review Co-authored-by: Sascha Cowley <16543535+SaschaCowley@users.noreply.github.com> --- source/synthDrivers/_sonic.py | 4 ++-- source/synthDrivers/sapi5.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index fc87ae35ef0..19fef4f458d 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -112,7 +112,7 @@ def readFloat(self) -> Array[c_float]: samples = self.samplesAvailable arrayLength = samples * self.channels buffer = (c_float * arrayLength)() - sonicLib.sonicReadShortFromStream(self.stream, buffer, samples) + sonicLib.sonicReadFloatFromStream(self.stream, buffer, samples) return buffer def readShort(self) -> Array[c_short]: @@ -128,7 +128,7 @@ def readUnsignedChar(self) -> Array[c_ubyte]: samples = self.samplesAvailable arrayLength = samples * self.channels buffer = (c_ubyte * arrayLength)() - sonicLib.sonicReadShortFromStream(self.stream, buffer, samples) + sonicLib.sonicReadUnsignedCharFromStream(self.stream, buffer, samples) return buffer def flush(self) -> None: diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index c0c572b79a5..03f2f32b4e4 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -325,7 +325,7 @@ def _get_lastIndex(self): return None @classmethod - def _percentToParam(self, percent, min, max): + def _percentToParam(self, percent, min, max) -> float: """Overrides SynthDriver._percentToParam to return floating point parameter values.""" return float(percent) / 100 * (max - min) + min From 8fcb2882173683112ad0ede729a29efa8af97c39 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 23 Jan 2025 11:38:35 +0800 Subject: [PATCH 09/15] Add & update copyright headers --- nvdaHelper/espeak/sconscript | 5 +++++ nvdaHelper/espeak/sonic.def | 5 +++++ source/synthDrivers/_sonic.py | 5 +++++ source/synthDrivers/sapi5.py | 2 +- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index b26db67d691..dceb2a143cd 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -1,3 +1,8 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2011-2025 NV Access Limited +# This file may be used under the terms of the GNU General Public License, version 2 or later. +# For more details see: https://www.gnu.org/licenses/gpl-2.0.html + import enum import typing import os diff --git a/nvdaHelper/espeak/sonic.def b/nvdaHelper/espeak/sonic.def index 6063a219a19..001ec3a2337 100644 --- a/nvdaHelper/espeak/sonic.def +++ b/nvdaHelper/espeak/sonic.def @@ -1,3 +1,8 @@ +; A part of NonVisual Desktop Access (NVDA) +; Copyright (C) 2025-2025 NV Access Limited +; This file may be used under the terms of the GNU General Public License, version 2 or later. +; For more details see: https://www.gnu.org/licenses/gpl-2.0.html + EXPORTS sonicCreateStream sonicDestroyStream diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index 19fef4f458d..271d2d49528 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -1,3 +1,8 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2025-2025 NV Access Limited +# This file may be used under the terms of the GNU General Public License, version 2 or later. +# For more details see: https://www.gnu.org/licenses/gpl-2.0.html + from ctypes import Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll import os import globalVars diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index 03f2f32b4e4..13e18133401 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- # A part of NonVisual Desktop Access (NVDA) -# Copyright (C) 2006-2024 NV Access Limited, Peter Vágner, Aleksey Sadovoy +# Copyright (C) 2006-2025 NV Access Limited, Peter Vágner, Aleksey Sadovoy # This file is covered by the GNU General Public License. # See the file COPYING for more details. From ca6090186389609ba493a0ac38ee90070edbee5b Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 23 Jan 2025 11:54:59 +0800 Subject: [PATCH 10/15] Add LIBRARY to sonic.def Co-authored-by: Sascha Cowley <16543535+SaschaCowley@users.noreply.github.com> --- nvdaHelper/espeak/sonic.def | 1 + 1 file changed, 1 insertion(+) diff --git a/nvdaHelper/espeak/sonic.def b/nvdaHelper/espeak/sonic.def index 001ec3a2337..89045cb240d 100644 --- a/nvdaHelper/espeak/sonic.def +++ b/nvdaHelper/espeak/sonic.def @@ -3,6 +3,7 @@ ; This file may be used under the terms of the GNU General Public License, version 2 or later. ; For more details see: https://www.gnu.org/licenses/gpl-2.0.html +LIBRARY sonic EXPORTS sonicCreateStream sonicDestroyStream From b1baf9001a6a9e2bf53ef80c4f03ed29efe13e35 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 23 Jan 2025 12:15:41 +0800 Subject: [PATCH 11/15] Add type hints --- source/synthDrivers/_sonic.py | 21 ++++++++++++++++----- source/synthDrivers/sapi5.py | 2 +- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index 271d2d49528..e1c1266df6e 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -3,11 +3,22 @@ # This file may be used under the terms of the GNU General Public License, version 2 or later. # For more details see: https://www.gnu.org/licenses/gpl-2.0.html -from ctypes import Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll +from ctypes import CDLL, POINTER, Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll import os +from typing import TYPE_CHECKING import globalVars -sonicLib = None +if TYPE_CHECKING: + from ctypes import _Pointer + c_float_p = _Pointer[c_float] + c_short_p = _Pointer[c_short] + c_ubyte_p = _Pointer[c_ubyte] +else: + c_float_p = POINTER(c_float) + c_short_p = POINTER(c_short) + c_ubyte_p = POINTER(c_ubyte) + +sonicLib: CDLL | None = None class SonicStreamP(c_void_p): @@ -84,7 +95,7 @@ def __init__(self, sampleRate: int, channels: int): def __del__(self): sonicLib.sonicDestroyStream(self.stream) - def writeFloat(self, data: c_void_p, numSamples: int) -> None: + def writeFloat(self, data: c_float_p, numSamples: int) -> None: """Write 32-bit floating point data to be processed into the stream, where each sample must be between -1 and 1. :param data: A pointer to 32-bit floating point wave data. @@ -94,7 +105,7 @@ def writeFloat(self, data: c_void_p, numSamples: int) -> None: if not sonicLib.sonicWriteFloatToStream(self.stream, data, numSamples): raise MemoryError() - def writeShort(self, data: c_void_p, numSamples: int) -> None: + def writeShort(self, data: c_short_p, numSamples: int) -> None: """Write 16-bit integer data to be processed into the stream. :param data: A pointer to 16-bit integer wave data. :param numSamples: The number of samples. @@ -103,7 +114,7 @@ def writeShort(self, data: c_void_p, numSamples: int) -> None: if not sonicLib.sonicWriteShortToStream(self.stream, data, numSamples): raise MemoryError() - def writeUnsignedChar(self, data: c_void_p, numSamples: int) -> None: + def writeUnsignedChar(self, data: c_ubyte_p, numSamples: int) -> None: """Write 8-bit unsigned integer data to be processed into the stream. :param data: A pointer to 8-bit integer wave data. :param numSamples: The number of samples. diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index 13e18133401..7505f9cb4ac 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -344,7 +344,7 @@ def _set_rate(self, rate): self.tts.Rate = self._percentToRate(rate) self.sonicStream.speed = 1 - def _set_rateBoost(self, enable): + def _set_rateBoost(self, enable: bool): if enable == self._rateBoost: return rate = self._rate From 6bc4df143b08112d68c20bbfec8982529d72d69c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 23 Jan 2025 04:16:46 +0000 Subject: [PATCH 12/15] Pre-commit auto-fix --- source/synthDrivers/_sonic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index e1c1266df6e..e356d88e88b 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -10,6 +10,7 @@ if TYPE_CHECKING: from ctypes import _Pointer + c_float_p = _Pointer[c_float] c_short_p = _Pointer[c_short] c_ubyte_p = _Pointer[c_ubyte] From d6741a21330f829a6d63bcc89a4e7e0a52edf2bc Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 23 Jan 2025 13:23:54 +0800 Subject: [PATCH 13/15] Initialize Sonic only when needed --- source/speech/__init__.py | 2 -- source/synthDrivers/_sonic.py | 7 ++++++- source/synthDrivers/sapi5.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/source/speech/__init__.py b/source/speech/__init__.py index 937fc2f74c6..7090bd8ab50 100644 --- a/source/speech/__init__.py +++ b/source/speech/__init__.py @@ -149,7 +149,6 @@ import config from .speech import initialize as speechInitialize from .sayAll import initialize as sayAllInitialize -from synthDrivers._sonic import initialize as sonicInitialize def initialize(): @@ -157,7 +156,6 @@ def initialize(): Initializes the state of speech and initializes the sayAllHandler """ synthDriverHandler.initialize() - sonicInitialize() synthDriverHandler.setSynth(config.conf["speech"]["synth"]) speechInitialize() sayAllInitialize( diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index e356d88e88b..dbfb9790f17 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -7,6 +7,7 @@ import os from typing import TYPE_CHECKING import globalVars +from logHandler import log if TYPE_CHECKING: from ctypes import _Pointer @@ -28,8 +29,12 @@ class SonicStreamP(c_void_p): def initialize(): """Initialize the Sonic DLL. - The sonic.dll file should be in the installation directory.""" + The sonic.dll file should be in the synthDrivers directory. + This can be called more than once.""" global sonicLib + if sonicLib: + return + log.debug("Initializing Sonic library") sonicLib = cdll.LoadLibrary(os.path.join(globalVars.appDir, "synthDrivers", "sonic.dll")) sonicLib.sonicCreateStream.restype = SonicStreamP sonicLib.sonicCreateStream.argtypes = [c_int, c_int] diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index 7505f9cb4ac..7f752104965 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -32,7 +32,7 @@ PhonemeCommand, SpeechCommand, ) -from ._sonic import SonicStream +from ._sonic import SonicStream, initialize as sonicInitialize class SpeechVoiceSpeakFlags(IntEnum): @@ -389,6 +389,7 @@ def _initTts(self, voice=None): customStream.BaseStream = audioStream customStream.Format = fmt self.tts.AudioOutputStream = customStream + sonicInitialize() self.sonicStream = SonicStream(wfx.SamplesPerSec, wfx.Channels) # Set event notify sink From 4a04c41379601017bad116ec63e33a11bfc3edff Mon Sep 17 00:00:00 2001 From: Sascha Cowley <16543535+SaschaCowley@users.noreply.github.com> Date: Tue, 4 Feb 2025 13:33:58 +1100 Subject: [PATCH 14/15] Fix copyright headers --- nvdaHelper/espeak/sconscript | 4 ++-- nvdaHelper/espeak/sonic.def | 6 +++--- source/synthDrivers/_sonic.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nvdaHelper/espeak/sconscript b/nvdaHelper/espeak/sconscript index dceb2a143cd..e03b7ab4968 100644 --- a/nvdaHelper/espeak/sconscript +++ b/nvdaHelper/espeak/sconscript @@ -1,7 +1,7 @@ # A part of NonVisual Desktop Access (NVDA) +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. # Copyright (C) 2011-2025 NV Access Limited -# This file may be used under the terms of the GNU General Public License, version 2 or later. -# For more details see: https://www.gnu.org/licenses/gpl-2.0.html import enum import typing diff --git a/nvdaHelper/espeak/sonic.def b/nvdaHelper/espeak/sonic.def index 89045cb240d..9dc1a0516ac 100644 --- a/nvdaHelper/espeak/sonic.def +++ b/nvdaHelper/espeak/sonic.def @@ -1,7 +1,7 @@ ; A part of NonVisual Desktop Access (NVDA) -; Copyright (C) 2025-2025 NV Access Limited -; This file may be used under the terms of the GNU General Public License, version 2 or later. -; For more details see: https://www.gnu.org/licenses/gpl-2.0.html +; This file is covered by the GNU General Public License. +; See the file COPYING for more details. +; Copyright (C) 2025 NV Access Limited LIBRARY sonic EXPORTS diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index dbfb9790f17..4de5ed44d8e 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -1,7 +1,7 @@ # A part of NonVisual Desktop Access (NVDA) -# Copyright (C) 2025-2025 NV Access Limited -# This file may be used under the terms of the GNU General Public License, version 2 or later. -# For more details see: https://www.gnu.org/licenses/gpl-2.0.html +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. +# Copyright (C) 2025 NV Access Limited from ctypes import CDLL, POINTER, Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll import os From b3f454ae050e73363d05ecc167a9949b4c2d0ff9 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 6 Feb 2025 09:27:19 +0800 Subject: [PATCH 15/15] Add username in copyright headers --- nvdaHelper/espeak/sonic.def | 2 +- source/synthDrivers/_sonic.py | 2 +- source/synthDrivers/sapi5.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nvdaHelper/espeak/sonic.def b/nvdaHelper/espeak/sonic.def index 9dc1a0516ac..1952706d0ad 100644 --- a/nvdaHelper/espeak/sonic.def +++ b/nvdaHelper/espeak/sonic.def @@ -1,7 +1,7 @@ ; A part of NonVisual Desktop Access (NVDA) ; This file is covered by the GNU General Public License. ; See the file COPYING for more details. -; Copyright (C) 2025 NV Access Limited +; Copyright (C) 2025 NV Access Limited, gexgd0419 LIBRARY sonic EXPORTS diff --git a/source/synthDrivers/_sonic.py b/source/synthDrivers/_sonic.py index 4de5ed44d8e..54aea8b3f52 100644 --- a/source/synthDrivers/_sonic.py +++ b/source/synthDrivers/_sonic.py @@ -1,7 +1,7 @@ # A part of NonVisual Desktop Access (NVDA) # This file is covered by the GNU General Public License. # See the file COPYING for more details. -# Copyright (C) 2025 NV Access Limited +# Copyright (C) 2025 NV Access Limited, gexgd0419 from ctypes import CDLL, POINTER, Array, c_float, c_int, c_short, c_ubyte, c_void_p, cdll import os diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index 7f752104965..31d90d6afd5 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -1,6 +1,6 @@ # -*- coding: UTF-8 -*- # A part of NonVisual Desktop Access (NVDA) -# Copyright (C) 2006-2025 NV Access Limited, Peter Vágner, Aleksey Sadovoy +# Copyright (C) 2006-2025 NV Access Limited, Peter Vágner, Aleksey Sadovoy, gexgd0419 # This file is covered by the GNU General Public License. # See the file COPYING for more details.