From 3666d82163a6da6ca625125aab56c8ad929d2607 Mon Sep 17 00:00:00 2001 From: BBC-Esq Date: Tue, 20 Feb 2024 08:16:25 -0500 Subject: [PATCH] Corrected installation and verified as working Works on recent cpu/mps merge. --- examples/text_to_audio_file.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/examples/text_to_audio_file.py b/examples/text_to_audio_file.py index 556f820..b0f624e 100644 --- a/examples/text_to_audio_file.py +++ b/examples/text_to_audio_file.py @@ -2,29 +2,28 @@ Simple example that speaks some text and creates an audio file named output_audio.wav in the same directory in which the script is run. INSTALLATION INSTRUCTIONS~ +**Tested on Windows -pip install WhisperSpeech -pip install pydub - -IMPORTANT~ - -WhisperSpeech installs pytorch by default, but the cpu-only version. If you want to use gpu-acceleration, after WhisperSpeech installs -go to this website and find the appropriate command to run: +(1) create a virtual environment and activate it +(2) install pytorch by going to the following website and running the appropriate command for your platform and setup: https://pytorch.org/get-started/locally/ +(3) pip3 install WhisperSpeech +(4) pip3 install soundfile==0.12.1 pydub==0.25.1 +(5) python text_to_audio_file.py ''' from pydub import AudioSegment import numpy as np from whisperspeech.pipeline import Pipeline -pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-small-en+pl.model') +# pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-small-en+pl.model') # pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model') -# pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model') +pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model') audio_tensor = pipe.generate(""" - This is some sample text. You would add text here that you want spoken and then only leave one of the above lines ununcommented for the model you want to test. Note that this script does not rely on the standard method within the whisperspeech pipeline. Rather, it replaces a part of the functionality with reliance on pydub instead. This approach "just worked." Feel free to modify or distribute at your pleasure. + This is some sample text. You would add text here that you want spoken and then only leave one of the above lines uncommented for the model you want to test. Note that this script does not rely on the standard method within the whisperspeech pipeline. Rather, it replaces a part of the functionality with reliance on pydub instead. This approach "just worked." Feel free to modify or distribute at your pleasure. """) # generate uses CUDA if available; therefore, it's necessary to move to CPU before converting to NumPy array