From 715df1665a67390df2112dc71198355a532a949d Mon Sep 17 00:00:00 2001
From: Marcel Thomas <marcelthomas5@gmail.com>
Date: Wed, 1 May 2024 12:11:13 +0100
Subject: [PATCH] Update index.ts

---
 .../measure-latency-node/src/index.ts         | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/examples/text-to-speech-websockets/measure-latency-node/src/index.ts b/examples/text-to-speech-websockets/measure-latency-node/src/index.ts
index 761116c..1043ee0 100644
--- a/examples/text-to-speech-websockets/measure-latency-node/src/index.ts
+++ b/examples/text-to-speech-websockets/measure-latency-node/src/index.ts
@@ -1,9 +1,12 @@
+// Import environment variables from .env file and WebSocket package.
 import 'dotenv/config';
 import WebSocket from 'ws';
 
+// IDs for the voice and model used in the text-to-speech API.
 const voiceId = '21m00Tcm4TlvDq8ikWAM';
-const modelId = 'eleven_multilingual_v1';
+const modelId = 'eleven_turbo_v2';
 
+// A function to split input text into manageable chunks based on punctuation and whitespace.
 function textChunker(textArray: any[]) {
   const splitters = [
     '.',
@@ -41,14 +44,16 @@ function textChunker(textArray: any[]) {
   })();
 }
 
+// This function initiates a WebSocket connection to stream text-to-speech requests.
 async function textToSpeechInputStreaming(textIterator: any) {
   const startTime = new Date().getTime();
-
+  let firstByte = true;
   const uri = `wss://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream-input?model_id=${modelId}`;
   const websocket = new WebSocket(uri, {
     headers: { Authorization: `Bearer ${process.env.ELEVENLABS_API_KEY}` },
   });
 
+  // When connection is open, send the initial and subsequent text chunks.
   websocket.on('open', async () => {
     await websocket.send(
       JSON.stringify({
@@ -58,9 +63,7 @@ async function textToSpeechInputStreaming(textIterator: any) {
           similarity_boost: 0.8,
           use_speaker_boost: false,
         },
-        generation_config: {
-          chunk_length_schedule: [120, 160, 250, 290],
-        },
+        generation_config: { chunk_length_schedule: [120, 160, 250, 290] },
       }),
     );
 
@@ -71,24 +74,33 @@ async function textToSpeechInputStreaming(textIterator: any) {
     await websocket.send(JSON.stringify({ text: '', flush: true }));
   });
 
+  // Log received data and the time elapsed since the connection started.
   websocket.on('message', function incoming(data) {
     const endTime = new Date().getTime();
     const elapsedMilliseconds = endTime - startTime;
-    console.log(`Data: ${elapsedMilliseconds} ms`);
+
+    if (firstByte) {
+      console.log(`First byte: ${elapsedMilliseconds} ms`);
+      firstByte = false;
+    } else {
+      console.log(`Data: ${elapsedMilliseconds} ms`);
+    }
   });
 
+  // Log when the WebSocket connection closes and the total time elapsed.
   websocket.on('close', () => {
     const endTime = new Date().getTime();
     const elapsedMilliseconds = endTime - startTime;
-
     console.log(`End: ${elapsedMilliseconds} ms`);
   });
 
+  // Handle and log any errors that occur in the WebSocket connection.
   websocket.on('error', (error) => {
     console.log('WebSocket error:', error);
   });
 }
 
+// A function to start the text-to-speech process for a given query.
 async function chatCompletion(query: string) {
   const response = query.split(' ');
   const textIterator = textChunker(response);
@@ -96,6 +108,7 @@ async function chatCompletion(query: string) {
   await textToSpeechInputStreaming(textIterator);
 }
 
+// The main function that triggers the entire process with a test text.
 (async () => {
   const text = `This is a test to see how the latency performs.`;