Skip to content

Commit

Permalink
🎯 fix: Prevent UI De-sync By Removing Redundant States (#5333)
Browse files Browse the repository at this point in the history
* fix: remove local state from Dropdown causing de-sync

* refactor: cleanup STT code, avoid redundant states to prevent de-sync and side effects

* fix: reset transcript after sending final text to prevent data loss

* fix: clear timeout on component unmount to prevent memory leaks
  • Loading branch information
danny-avila authored Jan 16, 2025
1 parent b55e695 commit e309c6a
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 145 deletions.
98 changes: 52 additions & 46 deletions client/src/components/Chat/Input/AudioRecorder.tsx
Original file line number Diff line number Diff line change
@@ -1,73 +1,79 @@
import { useEffect } from 'react';
import { useCallback } from 'react';
import { useChatFormContext, useToastContext } from '~/Providers';
import { ListeningIcon, Spinner } from '~/components/svg';
import { useLocalize, useSpeechToText } from '~/hooks';
import { useChatFormContext } from '~/Providers';
import { TooltipAnchor } from '~/components/ui';
import { globalAudioId } from '~/common';
import { cn } from '~/utils';

export default function AudioRecorder({
textAreaRef,
methods,
ask,
isRTL,
disabled,
ask,
methods,
textAreaRef,
isSubmitting,
}: {
textAreaRef: React.RefObject<HTMLTextAreaElement>;
methods: ReturnType<typeof useChatFormContext>;
ask: (data: { text: string }) => void;
isRTL: boolean;
disabled: boolean;
ask: (data: { text: string }) => void;
methods: ReturnType<typeof useChatFormContext>;
textAreaRef: React.RefObject<HTMLTextAreaElement>;
isSubmitting: boolean;
}) {
const { setValue, reset } = methods;
const localize = useLocalize();
const { showToast } = useToastContext();

const handleTranscriptionComplete = (text: string) => {
if (text) {
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement;
if (globalAudio) {
console.log('Unmuting global audio');
globalAudio.muted = false;
const onTranscriptionComplete = useCallback(
(text: string) => {
if (isSubmitting) {
showToast({
message: localize('com_ui_speech_while_submitting'),
status: 'error',
});
return;
}
ask({ text });
methods.reset({ text: '' });
clearText();
}
};

const {
isListening,
isLoading,
startRecording,
stopRecording,
interimTranscript,
speechText,
clearText,
} = useSpeechToText(handleTranscriptionComplete);
if (text) {
const globalAudio = document.getElementById(globalAudioId) as HTMLAudioElement | null;
if (globalAudio) {
console.log('Unmuting global audio');
globalAudio.muted = false;
}
ask({ text });
reset({ text: '' });
}
},
[ask, reset, showToast, localize, isSubmitting],
);

useEffect(() => {
if (isListening && textAreaRef.current) {
methods.setValue('text', interimTranscript, {
const setText = useCallback(
(text: string) => {
setValue('text', text, {
shouldValidate: true,
});
} else if (textAreaRef.current) {
textAreaRef.current.value = speechText;
methods.setValue('text', speechText, { shouldValidate: true });
}
}, [interimTranscript, speechText, methods, textAreaRef]);
},
[setValue],
);

const handleStartRecording = async () => {
await startRecording();
};
const { isListening, isLoading, startRecording, stopRecording } = useSpeechToText(
setText,
onTranscriptionComplete,
);

const handleStopRecording = async () => {
await stopRecording();
};
if (!textAreaRef.current) {
return null;
}

const handleStartRecording = async () => startRecording();

const handleStopRecording = async () => stopRecording();

const renderIcon = () => {
if (isListening) {
if (isListening === true) {
return <ListeningIcon className="stroke-red-500" />;
}
if (isLoading) {
if (isLoading === true) {
return <Spinner className="stroke-gray-700 dark:stroke-gray-300" />;
}
return <ListeningIcon className="stroke-gray-700 dark:stroke-gray-300" />;
Expand All @@ -77,7 +83,7 @@ export default function AudioRecorder({
<TooltipAnchor
id="audio-recorder"
aria-label={localize('com_ui_use_micrphone')}
onClick={isListening ? handleStopRecording : handleStartRecording}
onClick={isListening === true ? handleStopRecording : handleStartRecording}
disabled={disabled}
className={cn(
'absolute flex size-[35px] items-center justify-center rounded-full p-1 transition-colors hover:bg-surface-hover',
Expand Down
7 changes: 4 additions & 3 deletions client/src/components/Chat/Input/ChatForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,12 @@ const ChatForm = ({ index = 0 }) => {
</FileFormWrapper>
{SpeechToText && (
<AudioRecorder
disabled={!!disableInputs}
textAreaRef={textAreaRef}
ask={submitMessage}
isRTL={isRTL}
methods={methods}
ask={submitMessage}
textAreaRef={textAreaRef}
disabled={!!disableInputs}
isSubmitting={isSubmitting}
/>
)}
{TextToSpeech && automaticPlayback && <StreamAudio index={index} />}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ export default function FontSizeSelector() {
onChange={handleChange}
testId="font-size-selector"
sizeClasses="w-[150px]"
anchor="bottom start"
/>
</div>
);
Expand Down
9 changes: 3 additions & 6 deletions client/src/components/ui/Dropdown.tsx
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import React, { useState } from 'react';
import React from 'react';
import * as Select from '@ariakit/react/select';
import type { Option } from '~/common';
import { cn } from '~/utils/';

interface DropdownProps {
value: string;
value?: string;
label?: string;
onChange: (value: string) => void;
options: string[] | Option[];
Expand All @@ -14,18 +14,15 @@ interface DropdownProps {
}

const Dropdown: React.FC<DropdownProps> = ({
value: initialValue,
value: selectedValue,
label = '',
onChange,
options,
className = '',
sizeClasses,
testId = 'dropdown-menu',
}) => {
const [selectedValue, setSelectedValue] = useState(initialValue);

const handleChange = (value: string) => {
setSelectedValue(value);
onChange(value);
};

Expand Down
63 changes: 14 additions & 49 deletions client/src/hooks/Input/useSpeechToText.ts
Original file line number Diff line number Diff line change
@@ -1,83 +1,48 @@
import { useState, useEffect } from 'react';
import useSpeechToTextBrowser from './useSpeechToTextBrowser';
import useSpeechToTextExternal from './useSpeechToTextExternal';
import useGetAudioSettings from './useGetAudioSettings';

const useSpeechToText = (handleTranscriptionComplete: (text: string) => void) => {
const useSpeechToText = (
setText: (text: string) => void,
onTranscriptionComplete: (text: string) => void,
): {
isLoading?: boolean;
isListening?: boolean;
stopRecording: () => void | (() => Promise<void>);
startRecording: () => void | (() => Promise<void>);
} => {
const { speechToTextEndpoint } = useGetAudioSettings();
const [animatedText, setAnimatedText] = useState('');
const externalSpeechToText = speechToTextEndpoint === 'external';

const {
isListening: speechIsListeningBrowser,
isLoading: speechIsLoadingBrowser,
interimTranscript: interimTranscriptBrowser,
text: speechTextBrowser,
startRecording: startSpeechRecordingBrowser,
stopRecording: stopSpeechRecordingBrowser,
} = useSpeechToTextBrowser();
} = useSpeechToTextBrowser(setText, onTranscriptionComplete);

const {
isListening: speechIsListeningExternal,
isLoading: speechIsLoadingExternal,
text: speechTextExternal,
externalStartRecording: startSpeechRecordingExternal,
externalStopRecording: stopSpeechRecordingExternal,
clearText,
} = useSpeechToTextExternal(handleTranscriptionComplete);
} = useSpeechToTextExternal(setText, onTranscriptionComplete);

const isListening = externalSpeechToText ? speechIsListeningExternal : speechIsListeningBrowser;
const isLoading = externalSpeechToText ? speechIsLoadingExternal : speechIsLoadingBrowser;
const speechTextForm = externalSpeechToText ? speechTextExternal : speechTextBrowser;

const startRecording = externalSpeechToText
? startSpeechRecordingExternal
: startSpeechRecordingBrowser;
const stopRecording = externalSpeechToText
? stopSpeechRecordingExternal
: stopSpeechRecordingBrowser;
const speechText =
isListening || (speechTextExternal && speechTextExternal.length > 0)
? speechTextExternal
: speechTextForm || '';
// for a future real-time STT external
const interimTranscript = externalSpeechToText ? '' : interimTranscriptBrowser;

const animateTextTyping = (text: string) => {
const totalDuration = 2000;
const frameRate = 60;
const totalFrames = totalDuration / (1000 / frameRate);
const charsPerFrame = Math.ceil(text.length / totalFrames);
let currentIndex = 0;

const animate = () => {
currentIndex += charsPerFrame;
const currentText = text.substring(0, currentIndex);
setAnimatedText(currentText);

if (currentIndex < text.length) {
requestAnimationFrame(animate);
} else {
setAnimatedText(text);
}
};

requestAnimationFrame(animate);
};

useEffect(() => {
if (speechText && externalSpeechToText) {
animateTextTyping(speechText);
}
}, [speechText, externalSpeechToText]);

return {
isListening,
isLoading,
startRecording,
isListening,
stopRecording,
interimTranscript,
speechText: externalSpeechToText ? animatedText : speechText,
clearText,
startRecording,
};
};

Expand Down
Loading

0 comments on commit e309c6a

Please sign in to comment.