From 715df1665a67390df2112dc71198355a532a949d Mon Sep 17 00:00:00 2001 From: Marcel Thomas Date: Wed, 1 May 2024 12:11:13 +0100 Subject: [PATCH] Update index.ts --- .../measure-latency-node/src/index.ts | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/examples/text-to-speech-websockets/measure-latency-node/src/index.ts b/examples/text-to-speech-websockets/measure-latency-node/src/index.ts index 761116c..1043ee0 100644 --- a/examples/text-to-speech-websockets/measure-latency-node/src/index.ts +++ b/examples/text-to-speech-websockets/measure-latency-node/src/index.ts @@ -1,9 +1,12 @@ +// Import environment variables from .env file and WebSocket package. import 'dotenv/config'; import WebSocket from 'ws'; +// IDs for the voice and model used in the text-to-speech API. const voiceId = '21m00Tcm4TlvDq8ikWAM'; -const modelId = 'eleven_multilingual_v1'; +const modelId = 'eleven_turbo_v2'; +// A function to split input text into manageable chunks based on punctuation and whitespace. function textChunker(textArray: any[]) { const splitters = [ '.', @@ -41,14 +44,16 @@ function textChunker(textArray: any[]) { })(); } +// This function initiates a WebSocket connection to stream text-to-speech requests. async function textToSpeechInputStreaming(textIterator: any) { const startTime = new Date().getTime(); - + let firstByte = true; const uri = `wss://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream-input?model_id=${modelId}`; const websocket = new WebSocket(uri, { headers: { Authorization: `Bearer ${process.env.ELEVENLABS_API_KEY}` }, }); + // When connection is open, send the initial and subsequent text chunks. websocket.on('open', async () => { await websocket.send( JSON.stringify({ @@ -58,9 +63,7 @@ async function textToSpeechInputStreaming(textIterator: any) { similarity_boost: 0.8, use_speaker_boost: false, }, - generation_config: { - chunk_length_schedule: [120, 160, 250, 290], - }, + generation_config: { chunk_length_schedule: [120, 160, 250, 290] }, }), ); @@ -71,24 +74,33 @@ async function textToSpeechInputStreaming(textIterator: any) { await websocket.send(JSON.stringify({ text: '', flush: true })); }); + // Log received data and the time elapsed since the connection started. websocket.on('message', function incoming(data) { const endTime = new Date().getTime(); const elapsedMilliseconds = endTime - startTime; - console.log(`Data: ${elapsedMilliseconds} ms`); + + if (firstByte) { + console.log(`First byte: ${elapsedMilliseconds} ms`); + firstByte = false; + } else { + console.log(`Data: ${elapsedMilliseconds} ms`); + } }); + // Log when the WebSocket connection closes and the total time elapsed. websocket.on('close', () => { const endTime = new Date().getTime(); const elapsedMilliseconds = endTime - startTime; - console.log(`End: ${elapsedMilliseconds} ms`); }); + // Handle and log any errors that occur in the WebSocket connection. websocket.on('error', (error) => { console.log('WebSocket error:', error); }); } +// A function to start the text-to-speech process for a given query. async function chatCompletion(query: string) { const response = query.split(' '); const textIterator = textChunker(response); @@ -96,6 +108,7 @@ async function chatCompletion(query: string) { await textToSpeechInputStreaming(textIterator); } +// The main function that triggers the entire process with a test text. (async () => { const text = `This is a test to see how the latency performs.`;