james-trayford · soleyhyman · Oct 29, 2024 · Nov 3, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/environment.yml b/environment.yml
@@ -18,7 +18,8 @@ dependencies:
     - sf2utils
     - sphinx
     - tqdm
-    - TTS
+    - "--editable=git+https://github.com/nateshmbhat/pyttsx3.git#egg=pyttsx3"
+    - pyttsx3
     - wavio
     - wheel
     - sounddevice
diff --git a/examples/AudioCaption.ipynb b/examples/AudioCaption.ipynb
@@ -6,7 +6,9 @@
    "metadata": {},
    "source": [
     "### <u> Generate a sonification with an audio caption in `strauss` </u>\n",
-    "Import the relevant modules:"
+    "Import the relevant modules:\n",
+    "\n",
+    "***Note***: you will need to have some form of python text-to-speech installed (`TTS` or `pyttsx3`) for these examples to work. See the error raised when trying to run the examples below for more info:"
    ]
   },
   {
@@ -27,7 +29,28 @@
     "from strauss.generator import Sampler\n",
     "import os\n",
     "from pathlib import Path\n",
-    "%matplotlib inline"
+    "import strauss\n",
+    "%matplotlib inline\n",
+    "\n",
+    "mode = strauss.tts_caption.ttsMode"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "226f3af8-eea8-4f8e-b537-bda602e1418d",
+   "metadata": {},
+   "source": [
+    "What text to speech do we have?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffe715e8-d5aa-487d-a125-0e17a6a01958",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Available text-to-speech (TTS) is: {mode}\")"
    ]
   },
   {
@@ -46,7 +69,6 @@
    "outputs": [],
    "source": [
     "# platform agnostic absolute path for samples...\n",
-    "import strauss\n",
     "strauss_dir = Path(strauss.__file__).parents[2]\n",
     "sample_path = Path(strauss_dir, 'data','samples','glockenspiels')\n",
     "# setup used in stars appearing example\n",
@@ -74,12 +96,31 @@
     "events.apply_mapping_functions(map_lims=maplims)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "ce448cfd-bd92-49d1-9c1d-c3c4d6252383",
+   "metadata": {},
+   "source": [
+    "Now, lets look at the avaialble voices for our TTS engine:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e50a986e-5c51-4d1a-aea5-99f3161cdd9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from strauss.tts_caption import TTS\n",
+    "voices = TTS().list_models()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "b7d1566f-ff8c-4e21-8ceb-f743394fa4a5",
    "metadata": {},
    "source": [
-    "Generate text-to-speech (TTS) for the caption, using the default choice of voice (`\"Jenny\"` from the `TTS` module)"
+    "Generate text-to-speech (TTS) for the caption, using the default choice of voice (`\"Jenny\"` for the `coqui-tts` module, OS default for `pyttsx3`)"
    ]
   },
   {
@@ -91,7 +132,6 @@
    "source": [
     "caption_en = 'In the following audio, a glockenspiel is used to represent stars of varying colour.'\n",
     "\n",
-    "# render at default 48 kHz rate\n",
     "soni = Sonification(score, events, generator, system,\n",
     "                    caption=caption_en)\n",
     "soni.render()\n",
@@ -107,9 +147,22 @@
    "source": [
     "caption_en = 'In the following audio, a glockenspiel is used to represent stars of varying colour.'\n",
     "\n",
-    "soni = Sonification(score, events, generator, system,\n",
-    "                    caption=caption_en,\n",
-    "                   ttsmodel=str(Path('tts_models', 'en', 'ljspeech', 'tacotron2-DDC')))\n",
+    "if mode == 'coqui-tts':\n",
+    "    soni = Sonification(score, events, generator, system,\n",
+    "                        caption=caption_en,\n",
+    "                       ttsmodel=str(Path('tts_models', 'en', 'ljspeech', 'tacotron2-DDC')))\n",
+    "elif mode == 'pyttsx3':\n",
+    "    for v in voices[::-1]:\n",
+    "        #print(v.languages[0][:2])\n",
+    "        if v.languages[0][:2] == 'en':\n",
+    "            break\n",
+    "    print(f\"Selected voice: {v.name}\")\n",
+    "    soni = Sonification(score, events, generator, system,\n",
+    "                        caption=caption_en,\n",
+    "                       ttsmodel={'voice':v.id,\n",
+    "                                 # we can also set a rate for pyttsx3 (int16)...\n",
+    "                                'rate': 217})\n",
+    "\n",
     "soni.render()\n",
     "soni.notebook_display(show_waveform=False)"
    ]
@@ -131,9 +184,19 @@
    "source": [
     "caption_de = \"In der folgenden Tonspur wird ein Glockenspiel verwendet um Sterne mit unterschiedlichen Farben zu repräsentieren.\"\n",
     "\n",
-    "soni = Sonification(score, events, generator, system,\n",
-    "                    caption=caption_de, \n",
-    "                    ttsmodel=str(Path('tts_models', 'de', 'thorsten', 'vits')))\n",
+    "if mode == 'coqui-tts':\n",
+    "    soni = Sonification(score, events, generator, system,\n",
+    "                        caption=caption_de, \n",
+    "                        ttsmodel=str(Path('tts_models', 'de', 'thorsten', 'vits')))\n",
+    "elif mode == 'pyttsx3':\n",
+    "    # find a German-language voice...\n",
+    "    for v in voices:\n",
+    "        if v.languages[0][:2] == 'de':\n",
+    "            break\n",
+    "    soni = Sonification(score, events, generator, system,\n",
+    "                        caption=caption_de,\n",
+    "                        ttsmodel={'voice':v.id})\n",
+    "\n",
     "soni.render()\n",
     "soni.notebook_display(show_waveform=False)"
    ]
@@ -143,7 +206,7 @@
    "id": "ff8db018-02e3-48c2-a043-6ba132c1e239",
    "metadata": {},
    "source": [
-    "**Note**: the AI-based `TTS` can behave strangely when using unrecognised characters or terms. Sometimes these will be mispronounced by the TTS, other times they could be skipped entirely. This can be circumvented by writing out the how symbols should be pronounced, or spelling phonetically to improve pronunciation:"
+    "**Note**: the AI-based `TTS` can behave unpredictably when using unrecognised characters or terms. Sometimes these will be mispronounced by the TTS, other times they could be skipped entirely. This can be circumvented by writing out the how symbols should be pronounced, or spelling phonetically to improve pronunciation:"
    ]
   },
   {
@@ -155,32 +218,25 @@
    "source": [
     "symbol_examples_en = 'The Lyman-α resonance is 1216 Å. The Lyman alpha resonance is twelve hundred and sixteen angstroms. '\n",
     "\n",
+    "for v in voices[::-1]:\n",
+    "        #print(v.languages[0][:2])\n",
+    "        if v.languages[0][:2] == 'en':\n",
+    "            break\n",
+    "                       \n",
     "soni = Sonification(score, events, generator, system,\n",
-    "                    caption=symbol_examples_en+caption_en)\n",
+    "                    caption=symbol_examples_en, ttsmodel={'voice':v.id, 'rate': 217})\n",
+    "\n",
     "soni.render()\n",
     "soni.notebook_display(show_waveform=0)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "5a9db75d-6da4-4a9c-92d6-e31caee18e86",
-   "metadata": {},
-   "source": [
-    "Captions can be used to provide context to sonifications, explaining what to listen for.\n",
-    "\n",
-    "We can list available models for the TTS module (including `Jenny` the default `strauss` voice):"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c216a84c-2fd4-46a0-abc1-a152bc77b639",
+   "id": "f706e822-d989-4b2a-b834-b1565548349d",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from strauss.tts_caption import TTS\n",
-    "TTS().list_models()"
-   ]
+   "source": []
   }
  ],
  "metadata": {
@@ -199,7 +255,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.8.20"
   }
  },
  "nbformat": 4,

diff --git a/examples/AudioCaption.py b/examples/AudioCaption.py
@@ -3,6 +3,9 @@
 
 # ### <u> Generate a sonification with an audio caption in `strauss` </u>
 # Import the relevant modules:
+# 
+# ***Note***: you will need to have some form of python text-to-speech installed (`TTS` or `pyttsx3`) for these examples to work. See the error raised when trying to run the examples below for more info:
+
 
 from strauss.sonification import Sonification
 from strauss.sources import Events
@@ -12,13 +15,20 @@
 import numpy as np
 from strauss.generator import Sampler
 import os
-import pprint
 from pathlib import Path
+import strauss
+
+mode = strauss.tts_caption.ttsMode
+
+
+# What text to speech do we have?
+print(f"Available text-to-speech (TTS) is: {mode}")
+
 
 # Generate a placeholder sonification (a short sequence of glockenspiel notes) that we may want to add a caption to:
 
+
 # platform agnostic absolute path for samples...
-import strauss
 strauss_dir = Path(strauss.__file__).parents[2]
 sample_path = Path(strauss_dir, 'data','samples','glockenspiels')
 
@@ -47,26 +57,36 @@
 events.apply_mapping_functions(map_lims=maplims)
 
 
-# Generate text-to-speech (TTS) for the caption, using the default choice of voice (`"Jenny"` from the `TTS` module)
+# Now, lets look at the avaialble voices for our TTS engine:
+from strauss.tts_caption import TTS
+voices = TTS().list_models()
 
-caption_en = 'In the following audio, a glockenspiel is used to represent stars of varying colour.'
 
-print("Example of a caption using the default voice...")
+# Generate text-to-speech (TTS) for the caption, using the default choice of voice (`"Jenny"` for the `coqui-tts` module, OS default for `pyttsx3`)
+caption_en = 'In the following audio, a glockenspiel is used to represent stars of varying colour.'
 
-# render at default 48 kHz rate
 soni = Sonification(score, events, generator, system,
                     caption=caption_en)
 soni.render()
 soni.hear()
 
-
 caption_en = 'In the following audio, a glockenspiel is used to represent stars of varying colour.'
 
-print("Example of a caption using an alternative voice...")
+if mode == 'coqui-tts':
+    soni = Sonification(score, events, generator, system,
+                        caption=caption_en,
+                       ttsmodel=str(Path('tts_models', 'en', 'ljspeech', 'tacotron2-DDC')))
+elif mode == 'pyttsx3':
+    for v in voices[::-1]:
+        if v.languages[0][:2] == 'en':
+            break
+    print(f"Selected voice: {v.name}")
+    soni = Sonification(score, events, generator, system,
+                        caption=caption_en,
+                       ttsmodel={'voice':v.id,
+                                 # we can also set a rate for pyttsx3 (int16)...
+                                'rate': 217})
 
-soni = Sonification(score, events, generator, system,
-                    caption=caption_en,
-                   ttsmodel=Path('tts_models', 'en', 'ljspeech', 'tacotron2-DDC'))
 soni.render()
 soni.hear()
 
@@ -75,31 +95,34 @@
 
 caption_de = "In der folgenden Tonspur wird ein Glockenspiel verwendet um Sterne mit unterschiedlichen Farben zu repräsentieren."
 
-print("Example of a caption in a different language (German), selecting a voice supportingh that language ('Thorsten')...")
+if mode == 'coqui-tts':
+    soni = Sonification(score, events, generator, system,
+                        caption=caption_de, 
+                        ttsmodel=str(Path('tts_models', 'de', 'thorsten', 'vits')))
+elif mode == 'pyttsx3':
+    # find a German-language voice...
+    for v in voices:
+        if v.languages[0][:2] == 'de':
+            break
+    soni = Sonification(score, events, generator, system,
+                        caption=caption_de,
+                        ttsmodel={'voice':v.id})
 
-soni = Sonification(score, events, generator, system,
-                    caption=caption_de, 
-                    ttsmodel=Path('tts_models', 'de', 'thorsten', 'vits'))
 soni.render()
 soni.hear()
 
 
-# **Note**: the AI-based `TTS` can behave strangely when using unrecognised characters or terms. Sometimes these will be mispronounced by the TTS, other times they could be skipped entirely. This can be circumvented by writing out the how symbols should be pronounced, or spelling phonetically to improve pronunciation:
+# **Note**: the AI-based `TTS` can behave unpredictably when using unrecognised characters or terms. Sometimes these will be mispronounced by the TTS, other times they could be skipped entirely. This can be circumvented by writing out the how symbols should be pronounced, or spelling phonetically to improve pronunciation:
 
 symbol_examples_en = 'The Lyman-α resonance is 1216 Å. The Lyman alpha resonance is twelve hundred and sixteen angstroms. '
 
-print("Example of mispronunciation of terms or symbols...")
-
+for v in voices[::-1]:
+        if v.languages[0][:2] == 'en':
+            break
+
 soni = Sonification(score, events, generator, system,
-                    caption=symbol_examples_en+caption_en)
+                    caption=symbol_examples_en, ttsmodel={'voice':v.id, 'rate': 217})
+
 soni.render()
 soni.hear()
 
-
-# Captions can be used to provide context to sonifications, explaining what to listen for.
-# 
-# We can list available models for the TTS module (including `Jenny` the default `strauss` voice):
-
-print("Print available voice models...")
-from strauss.tts_caption import TTS
-pprint.pprint(TTS().list_models().list_tts_models())
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,6 +10,7 @@ requires = [
     "sf2utils",
     "tqdm",
     "wavio",
-    "wheel"
+    "wheel",
+    "pyttsx3 @ git+https://github.com/nateshmbhat/pyttsx3.git"
 ]
 build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
@@ -39,3 +39,4 @@ where = src
 [options.extras_require]
 TTS =
  TTS
+ pyttsx3
-Original file line number
+Diff line change
@@ Expand Up / @@ -39,3 +39,4 @@ where = src @@
     [options.extras_require]
     TTS =
      TTS
+     pyttsx3