Update index.ts

elevenlabs · May 1, 2024 · 715df16 · 715df16
1 parent 38dbed1
commit 715df16
Showing 1 changed file with 20 additions and 7 deletions.
diff --git a/examples/text-to-speech-websockets/measure-latency-node/src/index.ts b/examples/text-to-speech-websockets/measure-latency-node/src/index.ts
@@ -1,9 +1,12 @@
+// Import environment variables from .env file and WebSocket package.
 import 'dotenv/config';
 import WebSocket from 'ws';
 
+// IDs for the voice and model used in the text-to-speech API.
 const voiceId = '21m00Tcm4TlvDq8ikWAM';
-const modelId = 'eleven_multilingual_v1';
+const modelId = 'eleven_turbo_v2';
 
+// A function to split input text into manageable chunks based on punctuation and whitespace.
 function textChunker(textArray: any[]) {
   const splitters = [
     '.',
@@ -41,14 +44,16 @@ function textChunker(textArray: any[]) {
   })();
 }
 
+// This function initiates a WebSocket connection to stream text-to-speech requests.
 async function textToSpeechInputStreaming(textIterator: any) {
   const startTime = new Date().getTime();
-
+  let firstByte = true;
   const uri = `wss://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream-input?model_id=${modelId}`;
   const websocket = new WebSocket(uri, {
     headers: { Authorization: `Bearer ${process.env.ELEVENLABS_API_KEY}` },
   });
 
+  // When connection is open, send the initial and subsequent text chunks.
   websocket.on('open', async () => {
     await websocket.send(
       JSON.stringify({
@@ -58,9 +63,7 @@ async function textToSpeechInputStreaming(textIterator: any) {
           similarity_boost: 0.8,
           use_speaker_boost: false,
         },
-        generation_config: {
-          chunk_length_schedule: [120, 160, 250, 290],
-        },
+        generation_config: { chunk_length_schedule: [120, 160, 250, 290] },
       }),
     );
 
@@ -71,31 +74,41 @@ async function textToSpeechInputStreaming(textIterator: any) {
     await websocket.send(JSON.stringify({ text: '', flush: true }));
   });
 
+  // Log received data and the time elapsed since the connection started.
   websocket.on('message', function incoming(data) {
     const endTime = new Date().getTime();
     const elapsedMilliseconds = endTime - startTime;
-    console.log(`Data: ${elapsedMilliseconds} ms`);
+
+    if (firstByte) {
+      console.log(`First byte: ${elapsedMilliseconds} ms`);
+      firstByte = false;
+    } else {
+      console.log(`Data: ${elapsedMilliseconds} ms`);
+    }
   });
 
+  // Log when the WebSocket connection closes and the total time elapsed.
   websocket.on('close', () => {
     const endTime = new Date().getTime();
     const elapsedMilliseconds = endTime - startTime;
-
     console.log(`End: ${elapsedMilliseconds} ms`);
   });
 
+  // Handle and log any errors that occur in the WebSocket connection.
   websocket.on('error', (error) => {
     console.log('WebSocket error:', error);
   });
 }
 
+// A function to start the text-to-speech process for a given query.
 async function chatCompletion(query: string) {
   const response = query.split(' ');
   const textIterator = textChunker(response);
 
   await textToSpeechInputStreaming(textIterator);
 }
 
+// The main function that triggers the entire process with a test text.
 (async () => {
   const text = `This is a test to see how the latency performs.`;