Merge pull request #43 from NavodPeiris/dev

fixed some errors
NavodPeiris · Aug 16, 2024 · 4085aa7 · 4085aa7
2 parents bbe32be + f24ff24
commit 4085aa7
Show file tree

Hide file tree

Showing 8 changed files with 19 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ transcript will also indicate the timeframe in seconds where each speaker speaks
 ```
 from speechlib import Transcriptor
 
-file = "obama1.wav"  # your audio file
+file = "obama_zach.wav"  # your audio file
 voices_folder = "voices" # voices folder containing voice samples for recognition
 language = "en"          # language code
 log_folder = "logs"      # log folder for storing transcripts

diff --git a/examples/chinese_wav.wav b/examples/chinese_wav.wav
diff --git a/examples/transcribe.py b/examples/transcribe.py
@@ -1,12 +1,12 @@
 from speechlib import Transcriptor
 
-file = "obama1.wav"  # your audio file
+file = "obama_zach.wav"  # your audio file
 voices_folder = "voices" # voices folder containing voice samples for recognition
 language = "en"          # language code
 log_folder = "logs"      # log folder for storing transcripts
 modelSize = "tiny"     # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
 quantization = False   # setting this 'True' may speed up the process but lower the accuracy
-ACCESS_TOKEN = "your huggingface access token" # get permission to access pyannote/[email protected] on huggingface
+ACCESS_TOKEN = "your huggingface token" # get permission to access pyannote/[email protected] on huggingface
 
 # quantization only works on faster-whisper
 transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)

diff --git a/library.md b/library.md
@@ -72,7 +72,7 @@ transcript will also indicate the timeframe in seconds where each speaker speaks
 ```
 from speechlib import Transcriptor
 
-file = "obama1.wav"  # your audio file
+file = "obama_zach.wav"  # your audio file
 voices_folder = "voices" # voices folder containing voice samples for recognition
 language = "en"          # language code
 log_folder = "logs"      # log folder for storing transcripts

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="speechlib",
-    version="1.1.3",  
+    version="1.1.4",  
     description="speechlib is a library that can do speaker diarization, transcription and speaker recognition on an audio file to create transcripts with actual speaker names. This library also contain audio preprocessor functions.",
     packages=find_packages(),
     long_description=long_description,

diff --git a/setup_instruction.md b/setup_instruction.md
@@ -9,7 +9,7 @@ for publishing:
     pip install twine
 
 for install locally for testing:
-    pip install dist/speechlib-1.1.3-py3-none-any.whl
+    pip install dist/speechlib-1.1.4-py3-none-any.whl
 
 finally run:
     twine upload dist/*

diff --git a/speechlib/speechlib.py b/speechlib/speechlib.py
@@ -6,23 +6,26 @@
 class Transcriptor:
 
     def __init__(self, file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder=None, quantization=False):
-        '''transcribe a wav file 
+        '''
+        transcribe a wav file 
         
-            arguments:
+        arguments:
+
+        file: name of wav file with extension ex: file.wav
 
-            file: name of wav file with extension ex: file.wav
+        log_folder: name of folder where transcript will be stored
 
-            log_folder: name of folder where transcript will be stored
+        language: language of wav file
 
-            language: language of wav file
+        modelSize: tiny, small, medium, large, large-v1, large-v2, large-v3 (bigger model is more accurate but slow!!)
 
-            modelSize: tiny, small, medium, large, large-v1, large-v2, large-v3 (bigger model is more accurate but slow!!)
+        ACCESS_TOKEN: huggingface access token
 
-            voices_folder: folder containing subfolders named after each speaker with speaker voice samples in them. This will be used for speaker recognition
+        voices_folder: folder containing subfolders named after each speaker with speaker voice samples in them. This will be used for speaker recognition
 
-            quantization: whether to use int8 quantization or not (default=False)
+        quantization: whether to use int8 quantization or not (default=False)
 
-            see documentation: https://github.com/Navodplayer1/speechlib
+        see documentation: https://github.com/Navodplayer1/speechlib
         
             
         supported languages:  

diff --git a/speechlib/wav_segmenter.py b/speechlib/wav_segmenter.py
@@ -32,8 +32,7 @@ def wav_file_segmentation(file_name, segments, language, modelSize, model_type,
             # return -> [[start time, end time, transcript], [start time, end time, transcript], ..]
             texts.append([segment[0], segment[1], trans])
         except Exception as err:
-            # to avoid transcription exceptions that occur when transcribing silent segments we have to pass
-            pass
+            print("ERROR while transcribing: ", err)
         # Delete the WAV file after processing
         os.remove(file)