collabora · zoq · Jan 31, 2024 · Jan 29, 2024 · Jan 29, 2024 · Jan 29, 2024
diff --git a/TensorRT_whisper.md b/TensorRT_whisper.md
@@ -37,17 +37,18 @@ python -c "import torch; import tensorrt; import tensorrt_llm"
 - We build `small.en` and `small` multilingual TensorRT engine. The script logs the path of the directory with Whisper TensorRT engine. We need the model_path to run the server.
 ```bash
 # convert small.en
-bash build_whisper_tensorrt /root/TensorRT-LLM-examples small.en
+bash scripts/build_whisper_tensorrt.sh /root/TensorRT-LLM-examples small.en
 
 # convert small multilingual model
-bash build_whisper_tensorrt /root/TensorRT-LLM-examples small
+bash scripts/build_whisper_tensorrt.sh /root/TensorRT-LLM-examples small
 ```
 
 ## Run WhisperLive Server with TensorRT Backend
 ```bash
 cd /home/WhisperLive
 
 # Install requirements
+bash scripts/setup.sh
 pip install -r requirements/server.txt
 
 # Required to create mel spectogram

diff --git a/requirements/server.txt b/requirements/server.txt
@@ -2,4 +2,8 @@ faster-whisper==0.10.0
 torch
 websockets
 onnxruntime==1.16.0
-numba
+numba
+openai-whisper
+kaldialign
+soundfile
+ffmpeg-python
diff --git a/whisper_live/server.py b/whisper_live/server.py
@@ -397,6 +397,7 @@ def __init__(
             language=self.language,
             task=self.task
         )
+        self.warmup()
 
         # threading
         self.trans_thread = threading.Thread(target=self.speech_to_text)
@@ -410,6 +411,12 @@ def __init__(
                 }
             )
         )
+
+    def warmup(self, warmup_steps=10):
+        logging.info("[INFO:] Warming up TensorRT engine..")
+        mel, duration = self.transcriber.log_mel_spectrogram("tests/jfk.flac")
+        for i in range(warmup_steps):
+            last_segment = self.transcriber.transcribe(mel)
 
     def set_eos(self, eos):
         self.lock.acquire()

diff --git a/whisper_live/transcriber_tensorrt.py b/whisper_live/transcriber_tensorrt.py
@@ -11,7 +11,7 @@
 from whisper.tokenizer import get_tokenizer
 from whisper_live.tensorrt_utils import (mel_filters, store_transcripts,
                            write_error_stats, load_audio_wav_format,
-                           pad_or_trim)
+                           pad_or_trim, load_audio)
 
 import tensorrt_llm
 import tensorrt_llm.logger as logger
@@ -337,4 +337,4 @@ def decode_wav_file(
     if normalizer:
         prediction = normalizer(prediction)
 
-    return prediction.strip()
+    return prediction.strip()