From e13f520eb91bd8af1cc42578da29316f0b25033e Mon Sep 17 00:00:00 2001 From: Andrea Patini <70520359+andrepat0@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:17:31 +0200 Subject: [PATCH] feat: improve lip sync (#20) * feat: setted the avatar to cover all the screen for the ZOOMED FULL BODY layout * feat: improved stripOutputTags function * refactor: simplified viseme and animation process for lip sync * feat: fine tuned lips values for more smoothness * refactor: improve speak function * refactor: inserted blink and emotion animation all in the same useFrame * refactor: resetted previous emotion morph targets when new emotion is triggered * fix: adjusted position of input and powered by box inside the ZOOMED FULL BODY layout --- src/components/Avatar/Avatar.tsx | 10 +- .../AvatarComponent/avatarComponent.tsx | 51 ++- .../components/fullbodyAvatar.tsx | 352 +++++++++++------- .../components/halfbodyAvatar.tsx | 8 +- src/components/Avatar/AvatarView/index.tsx | 17 +- src/components/MemoriWidget/MemoriWidget.tsx | 300 +++++++-------- src/components/layouts/ZoomedFullBody.tsx | 67 ++-- .../ZoomedFullBody.test.tsx.snap | 46 +-- src/components/layouts/zoomed-full-body.css | 16 + src/context/visemeContext.tsx | 350 +++++------------ src/helpers/utils.ts | 17 +- src/styles.css | 1 + 12 files changed, 578 insertions(+), 657 deletions(-) create mode 100644 src/components/layouts/zoomed-full-body.css diff --git a/src/components/Avatar/Avatar.tsx b/src/components/Avatar/Avatar.tsx index 64f1dedf..54e9beb8 100644 --- a/src/components/Avatar/Avatar.tsx +++ b/src/components/Avatar/Avatar.tsx @@ -57,7 +57,7 @@ const Avatar: React.FC = ({ const { t } = useTranslation(); const [isClient, setIsClient] = useState(false); - const { setMeshRef, clearVisemes, setEmotion } = useViseme(); + const { stopProcessing, updateCurrentViseme, resetVisemeQueue } = useViseme(); useEffect(() => { setIsClient(true); @@ -132,6 +132,7 @@ const Avatar: React.FC = ({ } > = ({ speaking={isPlayingAudio} loading={loading} style={getAvatarStyle()} - clearVisemes={clearVisemes} - setMeshRef={setMeshRef} - isZoomed={isZoomed} + stopProcessing={stopProcessing} + resetVisemeQueue={resetVisemeQueue} + isZoomed={isZoomed} chatEmission={chatProps?.dialogState?.emission} - setEmotion={setEmotion} /> ); diff --git a/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx b/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx index 3f6e43b0..402d6f35 100644 --- a/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx +++ b/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx @@ -14,9 +14,9 @@ interface Props { speaking: boolean; isZoomed: boolean; chatEmission: any; - setMeshRef: any; - clearVisemes: () => void; - setEmotion: (emotion: string) => void; + stopProcessing: () => void; + resetVisemeQueue: () => void; + updateCurrentViseme: (currentTime: number) => { name: string; weight: number } | null; } interface BaseAction { @@ -50,9 +50,9 @@ const baseActions: Record = { Loading3: { weight: 0 }, }; + export const AvatarView: React.FC = ({ - setMeshRef, - clearVisemes, + stopProcessing, chatEmission, showControls, animation, @@ -64,7 +64,8 @@ export const AvatarView: React.FC = ({ halfBody, loading, isZoomed, - setEmotion, + updateCurrentViseme, + resetVisemeQueue, }) => { const [currentBaseAction, setCurrentBaseAction] = useState({ action: animation || 'Idle1', @@ -77,6 +78,9 @@ export const AvatarView: React.FC = ({ const [morphTargetDictionary, setMorphTargetDictionary] = useState<{ [key: string]: number; }>({}); + const [emotionMorphTargets, setEmotionMorphTargets] = useState<{ + [key: string]: number; + }>({}); const [timeScale, setTimeScale] = useState(0.8); @@ -93,7 +97,7 @@ export const AvatarView: React.FC = ({ //remove the last character from the action const newEmotion = action.slice(0, -1); - setEmotion(newEmotion); + // setEmotion(newEmotion); const defaultEmotions = Object.keys(emotionMap).reduce((acc, key) => { acc[key] = 0; @@ -105,9 +109,8 @@ export const AvatarView: React.FC = ({ const emotionValues = emotion === 'default' ? defaultEmotions : emotionMap[emotion]; - setMorphTargetInfluences(prevInfluences => ({ - ...prevInfluences, - ...defaultEmotions, + setEmotionMorphTargets(prevEmotions => ({ + ...prevEmotions, ...emotionValues, })); }, []); @@ -162,6 +165,11 @@ export const AvatarView: React.FC = ({ const emotion = `${outputContent}${randomNumber}`; onBaseActionChange(emotion); + } else { + //Set a random idle animation + const randomNumber = Math.floor(Math.random() * 5) + 1; + const animation = `Idle${randomNumber === 3 ? 4 : randomNumber}`; + onBaseActionChange(animation); } }, [chatEmission]); @@ -174,6 +182,13 @@ export const AvatarView: React.FC = ({ } }, [loading]); + // useEffect(() => { + // if (speaking && currentBaseAction.action !== 'Idle1') { + // const animation = `Idle1`; + // onBaseActionChange(animation); + // } + // }, [speaking]); + return ( <> {showControls && ( @@ -191,30 +206,28 @@ export const AvatarView: React.FC = ({ {halfBody ? ( ) : ( )} diff --git a/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx b/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx index e11e2bc6..34341a2e 100644 --- a/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx +++ b/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx @@ -1,21 +1,17 @@ -import React, { useEffect, useRef, useState, useCallback } from 'react'; +import React, { useEffect, useRef, useState } from 'react'; import { Vector3, Euler, AnimationMixer, SkinnedMesh, Object3D, + MathUtils, AnimationAction, + LoopOnce, } from 'three'; import { useAnimations, useGLTF } from '@react-three/drei'; -import { useGraph, dispose, useFrame } from '@react-three/fiber'; +import { useGraph, useFrame } from '@react-three/fiber'; import { correctMaterials, isSkinnedMesh } from '../../../../../helpers/utils'; -import { useAvatarBlink } from '../../utils/useEyeBlink'; -import { useViseme } from '../../../../../context/visemeContext'; - -const lerp = (start: number, end: number, alpha: number): number => { - return start * (1 - alpha) + end * alpha; -}; interface FullbodyAvatarProps { url: string; @@ -26,16 +22,23 @@ interface FullbodyAvatarProps { weight: number; }; timeScale: number; - loading?: boolean; - speaking?: boolean; isZoomed?: boolean; - setMorphTargetInfluences: (influences: { [key: string]: number }) => void; - setMorphTargetDictionary: (dictionary: { [key: string]: number }) => void; - morphTargetInfluences: { [key: string]: number }; - morphTargetDictionary: { [key: string]: number }; - setMeshRef: any; eyeBlink?: boolean; - clearVisemes: () => void; + stopProcessing: () => void; + resetVisemeQueue: () => void; + updateCurrentViseme: ( + currentTime: number + ) => { name: string; weight: number } | null; + smoothMorphTarget?: boolean; + morphTargetSmoothing?: number; + morphTargetInfluences: Record; + setMorphTargetDictionary: ( + morphTargetDictionary: Record + ) => void; + setMorphTargetInfluences: ( + morphTargetInfluences: Record + ) => void; + emotionMorphTargets: Record; } const AVATAR_POSITION = new Vector3(0, -1, 0); @@ -43,11 +46,19 @@ const AVATAR_ROTATION = new Euler(0.175, 0, 0); const AVATAR_POSITION_ZOOMED = new Vector3(0, -1.45, 0); const ANIMATION_URLS = { - MALE: 'https://assets.memori.ai/api/v2/asset/1c350a21-97d8-4add-82cc-9dc10767a26b.glb', + MALE: 'https://assets.memori.ai/api/v2/asset/2c5e88a4-cf62-408b-9ef0-518b099dfcb2.glb', FEMALE: - 'https://assets.memori.ai/api/v2/asset/c2b07166-de10-4c66-918b-7b7cd380cca7.glb', + 'https://assets.memori.ai/api/v2/asset/0e49aa5d-f757-4292-a170-d843c2839a41.glb', +}; + +// Blink configuration +const BLINK_CONFIG = { + minInterval: 1000, + maxInterval: 5000, + blinkDuration: 150, }; -const ANIMATION_DURATION = 3000; // Duration in milliseconds for non-idle animations + +const EMOTION_TRANSITION_SPEED = 0.1; // Adjust this value to control emotion transition speed export default function FullbodyAvatar({ url, @@ -56,85 +67,69 @@ export default function FullbodyAvatar({ currentBaseAction, timeScale, isZoomed, - setMorphTargetInfluences, - setMorphTargetDictionary, - morphTargetInfluences, eyeBlink, - setMeshRef, - clearVisemes, + stopProcessing, + morphTargetSmoothing = 0.5, + updateCurrentViseme, + setMorphTargetDictionary, + setMorphTargetInfluences, + resetVisemeQueue, + emotionMorphTargets, }: FullbodyAvatarProps) { const { scene } = useGLTF(url); const { animations } = useGLTF(ANIMATION_URLS[sex]); const { nodes, materials } = useGraph(scene); const { actions } = useAnimations(animations, scene); - const [mixer] = useState(() => new AnimationMixer(scene)); - const avatarMeshRef = useRef(); + const mixer = useRef(new AnimationMixer(scene)); + const headMeshRef = useRef(); const currentActionRef = useRef(null); - const isTransitioningRef = useRef(false); - - // Blink animation - useAvatarBlink({ - enabled: eyeBlink || false, - setMorphTargetInfluences, - config: { - minInterval: 1500, - maxInterval: 4000, - blinkDuration: 120, - }, - }); + const [isTransitioningToIdle, setIsTransitioningToIdle] = useState(false); - // Idle animation when emotion animation is finished - const transitionToIdle = useCallback(() => { - if (!actions || isTransitioningRef.current) return; + // Blink state + const lastBlinkTime = useRef(0); + const nextBlinkTime = useRef(0); + const isBlinking = useRef(false); + const blinkStartTime = useRef(0); - isTransitioningRef.current = true; + // Morph targets + const currentEmotionRef = useRef>({}); + const previousEmotionKeysRef = useRef>(new Set()); - const finishCurrentAnimation = () => { - if (currentActionRef.current && !currentActionRef.current.paused) { - const remainingTime = (currentActionRef.current.getClip().duration - currentActionRef.current.time) * 1000; - setTimeout(() => { - startIdleAnimation(); - }, remainingTime); - } else { - startIdleAnimation(); - } - }; - - const startIdleAnimation = () => { - const idleAnimations = Object.keys(actions).filter(key => - key.startsWith('Idle') - ); - const randomIdle = - idleAnimations[Math.floor(Math.random() * idleAnimations.length)]; + useEffect(() => { + correctMaterials(materials); - const idleAction = actions[randomIdle]; - const fadeOutDuration = 0.5; - const fadeInDuration = 0.5; + scene.traverse((object: Object3D) => { + if (object instanceof SkinnedMesh) { + if (object.name === 'GBNL__Head' || object.name === 'Wolf3D_Avatar') { + headMeshRef.current = object; + if (object.morphTargetDictionary && object.morphTargetInfluences) { + setMorphTargetDictionary(object.morphTargetDictionary); - if (currentActionRef.current) { - currentActionRef.current.fadeOut(fadeOutDuration); + const initialInfluences = Object.keys( + object.morphTargetDictionary + ).reduce((acc, key) => ({ ...acc, [key]: 0 }), {}); + setMorphTargetInfluences(initialInfluences); + } + } } + }); - idleAction?.reset().fadeIn(fadeInDuration).play(); - currentActionRef.current = idleAction; + onLoaded?.(); - setTimeout(() => { - isTransitioningRef.current = false; - }, (fadeOutDuration + fadeInDuration) * 1000); + return () => { + Object.values(materials).forEach(material => material.dispose()); + Object.values(nodes) + .filter(isSkinnedMesh) + .forEach(mesh => mesh.geometry.dispose()); + stopProcessing(); + resetVisemeQueue(); }; + }, [materials, nodes, url, onLoaded, stopProcessing, resetVisemeQueue, scene]); - if (currentActionRef.current && !currentActionRef.current.getClip().name.startsWith('Idle')) { - finishCurrentAnimation(); - } else { - startIdleAnimation(); - } - }, [actions]); - - // Base animation + // Handle base animation changes useEffect(() => { - if (!actions || !currentBaseAction.action || isTransitioningRef.current) - return; + if (!actions || !currentBaseAction.action) return; const newAction = actions[currentBaseAction.action]; if (!newAction) { @@ -147,79 +142,156 @@ export default function FullbodyAvatar({ const fadeOutDuration = 0.8; const fadeInDuration = 0.8; - if (!currentBaseAction.action.startsWith('Idle')) { - setTimeout(() => { - transitionToIdle(); - }, ANIMATION_DURATION); - } - if (currentActionRef.current) { currentActionRef.current.fadeOut(fadeOutDuration); } - - newAction.timeScale = timeScale; + + console.log(newAction); newAction.reset().fadeIn(fadeInDuration).play(); currentActionRef.current = newAction; - }, [currentBaseAction, timeScale, actions, transitionToIdle]); - // Set up the mesh reference and morph target influences - useEffect(() => { - correctMaterials(materials); + // Set the time scale for the new action + newAction.timeScale = timeScale; - scene.traverse((object: Object3D) => { - if ( - object instanceof SkinnedMesh && - (object.name === 'GBNL__Head' || object.name === 'Wolf3D_Avatar') - ) { - avatarMeshRef.current = object; - setMeshRef(object); - - if (object.morphTargetDictionary && object.morphTargetInfluences) { - setMorphTargetDictionary(object.morphTargetDictionary); - - const initialInfluences = Object.keys( - object.morphTargetDictionary - ).reduce((acc, key) => ({ ...acc, [key]: 0 }), {}); - setMorphTargetInfluences(initialInfluences); + // If it's an emotion animation, set it to play once and then transition to idle + if ( + currentBaseAction.action.startsWith('Gioia') || + currentBaseAction.action.startsWith('Rabbia') || + currentBaseAction.action.startsWith('Sorpresa') || + currentBaseAction.action.startsWith('Timore') || + currentBaseAction.action.startsWith('Tristezza') + ) { + newAction.setLoop(LoopOnce, 1); + newAction.clampWhenFinished = true; + setIsTransitioningToIdle(true); + } + }, [actions, currentBaseAction, timeScale]); + + useFrame(state => { + if ( + headMeshRef.current && + headMeshRef.current.morphTargetDictionary && + headMeshRef.current.morphTargetInfluences + ) { + const currentTime = state.clock.getElapsedTime() * 1000; // Convert to milliseconds + + // Handle blinking + let blinkValue = 0; + if (eyeBlink) { + if (currentTime >= nextBlinkTime.current && !isBlinking.current) { + isBlinking.current = true; + blinkStartTime.current = currentTime; + lastBlinkTime.current = currentTime; + nextBlinkTime.current = + currentTime + + Math.random() * + (BLINK_CONFIG.maxInterval - BLINK_CONFIG.minInterval) + + BLINK_CONFIG.minInterval; + } + + if (isBlinking.current) { + const blinkProgress = + (currentTime - blinkStartTime.current) / BLINK_CONFIG.blinkDuration; + if (blinkProgress <= 0.5) { + // Eyes closing + blinkValue = blinkProgress * 2; + } else if (blinkProgress <= 1) { + // Eyes opening + blinkValue = 2 - blinkProgress * 2; + } else { + // Blink finished + isBlinking.current = false; + blinkValue = 0; + } } } - }); - onLoaded?.(); + const currentViseme = updateCurrentViseme(currentTime / 1000); - return () => { - Object.values(materials).forEach(dispose); - Object.values(nodes).filter(isSkinnedMesh).forEach(dispose); - clearVisemes(); - }; - }, [ - materials, - nodes, - url, - onLoaded, - setMorphTargetDictionary, - setMorphTargetInfluences, - setMeshRef, - clearVisemes, - ]); - - // Update morph target influences - useFrame((_, delta) => { - if (avatarMeshRef.current && avatarMeshRef.current.morphTargetDictionary) { - updateMorphTargetInfluences(); - } - mixer.update(delta * 0.001); - - function updateMorphTargetInfluences() { - Object.entries(morphTargetInfluences).forEach(([key, value]) => { - const index = avatarMeshRef.current!.morphTargetDictionary![key]; - if (typeof index === 'number' && - avatarMeshRef.current!.morphTargetInfluences) { - const currentValue = avatarMeshRef.current!.morphTargetInfluences[index]; - const smoothValue = lerp(currentValue, value, 0.1); - avatarMeshRef.current!.morphTargetInfluences[index] = smoothValue; + // Create a set of current emotion keys + const currentEmotionKeys = new Set(Object.keys(emotionMorphTargets)); + + // Reset old emotion morph targets + previousEmotionKeysRef.current.forEach(key => { + if (!currentEmotionKeys.has(key)) { + const index = headMeshRef.current!.morphTargetDictionary![key]; + if (typeof index === 'number') { + currentEmotionRef.current[key] = 0; + if (headMeshRef.current && headMeshRef.current.morphTargetInfluences) { + headMeshRef.current.morphTargetInfluences[index] = 0; + } + } } }); + + // Update morph targets + Object.entries(headMeshRef.current.morphTargetDictionary).forEach( + ([key, index]) => { + if (typeof index === 'number') { + let targetValue = 0; + + // Handle emotions (base layer) + if (Object.prototype.hasOwnProperty.call(emotionMorphTargets, key)) { + const targetEmotionValue = emotionMorphTargets[key]; + const currentEmotionValue = currentEmotionRef.current[key] || 0; + const newEmotionValue = MathUtils.lerp( + currentEmotionValue, + targetEmotionValue * 2, + EMOTION_TRANSITION_SPEED + ); + currentEmotionRef.current[key] = newEmotionValue; + targetValue += newEmotionValue; + } + + // Handle visemes (additive layer) + if (currentViseme && key === currentViseme.name) { + targetValue += currentViseme.weight * 1.2; // Amplify the effect + } + + // Handle blinking (additive layer, only for 'eyesClosed') + if (key === 'eyesClosed' && eyeBlink) { + targetValue += blinkValue; + } + + // Clamp the final value between 0 and 1 + targetValue = MathUtils.clamp(targetValue, 0, 1); + + // Apply smoothing + if (headMeshRef.current && headMeshRef.current.morphTargetInfluences) { + headMeshRef.current.morphTargetInfluences[index] = MathUtils.lerp( + headMeshRef.current.morphTargetInfluences[index], + targetValue, + morphTargetSmoothing + ); + } + } + } + ); + + // Update the set of previous emotion keys for the next frame + previousEmotionKeysRef.current = currentEmotionKeys; + + // Handle transition from emotion animation to idle + if (isTransitioningToIdle && currentActionRef.current) { + if ( + currentActionRef.current.time >= + currentActionRef.current.getClip().duration + ) { + // Transition to the idle animation + const idleNumber = Math.floor(Math.random() * 5) + 1; // Randomly choose 1, 2, 3, 4 or 5 + const idleAction = actions[`Idle${idleNumber == 3 ? 4 : idleNumber}`]; + + if (idleAction) { + currentActionRef.current.fadeOut(0.5); + idleAction.reset().fadeIn(0.5).play(); + currentActionRef.current = idleAction; + setIsTransitioningToIdle(false); + } + } + } + + // Update the animation mixer + mixer.current.update(0.01); // Fixed delta time for consistent animation speed } }); @@ -231,4 +303,4 @@ export default function FullbodyAvatar({ ); -} \ No newline at end of file +} diff --git a/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx b/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx index 4f0ad93f..279f0bd2 100644 --- a/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx +++ b/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx @@ -14,8 +14,6 @@ interface HalfBodyAvatarProps { headMovement?: boolean; speaking?: boolean; onLoaded?: () => void; - setMeshRef: (mesh: Object3D) => void; - clearVisemes: () => void; setMorphTargetDictionary: (morphTargetDictionary: any) => void; eyeBlink?: boolean; morphTargetInfluences: any; @@ -31,9 +29,7 @@ export default function HalfBodyAvatar({ setMorphTargetDictionary, headMovement, eyeBlink, - setMeshRef, onLoaded, - clearVisemes, morphTargetInfluences, }: HalfBodyAvatarProps) { const { scene } = useGLTF(url); @@ -58,7 +54,6 @@ export default function HalfBodyAvatar({ // Set mesh reference for the first SkinnedMesh found const firstSkinnedMesh = Object.values(nodes).find(isSkinnedMesh) as SkinnedMesh; if (firstSkinnedMesh) { - setMeshRef(firstSkinnedMesh); avatarMeshRef.current = firstSkinnedMesh; if (firstSkinnedMesh.morphTargetDictionary && firstSkinnedMesh.morphTargetInfluences) { setMorphTargetDictionary(firstSkinnedMesh.morphTargetDictionary); @@ -77,12 +72,11 @@ export default function HalfBodyAvatar({ const disposeObjects = () => { Object.values(materials).forEach(dispose); Object.values(nodes).filter(isSkinnedMesh).forEach(dispose); - clearVisemes(); }; disposeObjects(); }; - }, [materials, nodes, url, onLoaded, clearVisemes]); + }, [materials, nodes, url, onLoaded]); const skinnedMeshes = useMemo( () => Object.values(nodes).filter(isSkinnedMesh), diff --git a/src/components/Avatar/AvatarView/index.tsx b/src/components/Avatar/AvatarView/index.tsx index 0c3be35f..cd9bac69 100644 --- a/src/components/Avatar/AvatarView/index.tsx +++ b/src/components/Avatar/AvatarView/index.tsx @@ -23,8 +23,9 @@ export interface Props { isZoomed?: boolean; chatEmission?: any; setMeshRef?: any; - clearVisemes: () => void; - setEmotion: (emotion: string) => void; + stopProcessing: () => void; + resetVisemeQueue: () => void; + updateCurrentViseme: (currentTime: number) => { name: string; weight: number } | null; } const defaultStyles = { @@ -88,9 +89,9 @@ export default function ContainerAvatarView({ showControls = false, isZoomed, chatEmission, - setMeshRef, - clearVisemes, - setEmotion, + stopProcessing, + resetVisemeQueue, + updateCurrentViseme, }: Props) { return ( diff --git a/src/components/MemoriWidget/MemoriWidget.tsx b/src/components/MemoriWidget/MemoriWidget.tsx index bc9df956..e6257c63 100644 --- a/src/components/MemoriWidget/MemoriWidget.tsx +++ b/src/components/MemoriWidget/MemoriWidget.tsx @@ -543,12 +543,19 @@ const MemoriWidget = ({ const [hideEmissions, setHideEmissions] = useState(false); const { - addVisemeToQueue, - processVisemeQueue, - clearVisemes, - emotion, - getAzureStyleForEmotion, + startProcessing, + stopProcessing, + addViseme, + resetVisemeQueue, + isProcessing, } = useViseme(); + const audioContextRef = useRef(null); + const speechSynthesizerRef = useRef(null); + const audioDestinationRef = useRef( + null + ); + const currentSpeechRef = useRef<{ cancel: () => void } | null>(null); + useEffect(() => { setIsPlayingAudio(!!speechSynthesizer); @@ -1883,200 +1890,167 @@ const MemoriWidget = ({ const e = new CustomEvent('MemoriEndSpeak'); document.dispatchEvent(e); }; + const initializeAudioContext = useCallback(() => { + if (!audioContextRef.current || audioContextRef.current.state === 'closed') { + audioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)() as unknown as IAudioContext; + } + return audioContextRef.current; + }, []); + + const initializeSpeechSynthesizer = useCallback((audioConfig: speechSdk.AudioConfig) => { + if (!speechSynthesizerRef.current && AZURE_COGNITIVE_SERVICES_TTS_KEY) { + const speechConfig = speechSdk.SpeechConfig.fromSubscription( + AZURE_COGNITIVE_SERVICES_TTS_KEY, + 'eastus' + ); + speechSynthesizerRef.current = new speechSdk.SpeechSynthesizer(speechConfig, audioConfig); + } + return speechSynthesizerRef.current; + }, []); - const speak = (text: string): void => { + const stopCurrentSpeech = useCallback(() => { + if (currentSpeechRef.current) { + currentSpeechRef.current.cancel(); + currentSpeechRef.current = null; + } + if (audioContextRef.current) { + audioContextRef.current.suspend(); + } + if (audioDestinationRef.current) { + audioDestinationRef.current.pause(); + } + setIsPlayingAudio(false); + stopProcessing(); + resetVisemeQueue(); + }, []); + + const speak = useCallback(async (text: string): Promise => { if (!AZURE_COGNITIVE_SERVICES_TTS_KEY || preview) { emitEndSpeakEvent(); return; } - stopListening(); - // stopAudio(); - if (preview) return; + stopListening(); + stopCurrentSpeech(); // Stop any ongoing speech - if (muteSpeaker || speakerMuted) { - memoriSpeaking = false; + if (preview || muteSpeaker || speakerMuted) { + setIsPlayingAudio(false); setMemoriTyping(false); - emitEndSpeakEvent(); - - // trigger start continuous listening if set, see MemoriChat if (continuousSpeech) { setListeningTimeout(); } return; } - if (audioDestination) audioDestination.pause(); - - let isSafari = - window.navigator.userAgent.includes('Safari') && - !window.navigator.userAgent.includes('Chrome'); - let isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent); - if ((audioContext.state as string) === 'interrupted') { - audioContext.resume().then(() => speak(text)); - return; - } - if (audioContext.state === 'closed') { - audioContext = new AudioContext(); - let buffer = audioContext.createBuffer(1, 10000, 22050); - let source = audioContext.createBufferSource(); - source.buffer = buffer; - source.connect(audioContext.destination); - } else if (audioContext.state === 'suspended') { - stopAudio(); - - audioContext = new AudioContext(); - let buffer = audioContext.createBuffer(1, 10000, 22050); - let source = audioContext.createBufferSource(); - source.buffer = buffer; - source.connect(audioContext.destination); - } + try { + const audioContext = initializeAudioContext(); + await audioContext.resume(); - if (!speechSynthesizer) { - if (!isIOS) { - audioDestination = new speechSdk.SpeakerAudioDestination(); + if (!audioDestinationRef.current) { + audioDestinationRef.current = new speechSdk.SpeakerAudioDestination(); } - let audioConfig = - speechSdk.AudioConfig.fromSpeakerOutput(audioDestination); - speechSynthesizer = new speechSdk.SpeechSynthesizer( - speechConfig, - audioConfig - ); - } - const source = audioContext.createBufferSource(); - source.addEventListener('ended', () => { - setIsPlayingAudio(false); - memoriSpeaking = false; - }); - audioDestination.onAudioEnd = () => { - setIsPlayingAudio(false); - memoriSpeaking = false; - source.disconnect(); - - emitEndSpeakEvent(); + const audioConfig = speechSdk.AudioConfig.fromSpeakerOutput(audioDestinationRef.current); + const speechSynthesizer = initializeSpeechSynthesizer(audioConfig); - // trigger start continuous listening if set - onEndSpeakStartListen(); - }; - - // Clear any existing visemes before starting new speech - clearVisemes(); + if (speechSynthesizer) { - // Set up the viseme event handler - speechSynthesizer.visemeReceived = function (_, e) { - addVisemeToQueue({ - visemeId: e.visemeId, - audioOffset: e.audioOffset, + // Add the new visemeReceived event listener + speechSynthesizer.visemeReceived = (_, e) => { + addViseme(e.visemeId, e.audioOffset); + console.log('viseme added') + }; + } + startProcessing(); + + const textToSpeak = escapeHTML(stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text))))); + + const ssml = ` + + + ${replaceTextWithPhonemes(textToSpeak, userLang.toLowerCase())} + + + `; + + const speakPromise = new Promise((resolve, reject) => { + speechSynthesizer?.speakSsmlAsync( + ssml, + result => resolve(result), + error => reject(error) + ); }); - }; - - const textToSpeak = escapeHTML( - stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text)))) - ); - speechSynthesizer.speakSsmlAsync( - `${replaceTextWithPhonemes( - textToSpeak, - userLang.toLowerCase() - )}`, - result => { - if (result) { - setIsPlayingAudio(true); - memoriSpeaking = true; - - // Process the viseme data - processVisemeQueue(); - - try { - // Decode the audio data - audioContext.decodeAudioData(result.audioData, function (buffer) { - source.buffer = buffer; - source.connect(audioContext.destination); + currentSpeechRef.current = { + cancel: () => { + speechSynthesizer?.close(); + audioDestinationRef.current?.pause(); + } + }; - if (history.length < 1 || (isSafari && isIOS)) { - source.start(0); - } - }); + const result = await speakPromise; - // Handle the audio context state changes - audioContext.onstatechange = () => { - if ( - audioContext.state === 'suspended' || - audioContext.state === 'closed' - ) { - source.disconnect(); - setIsPlayingAudio(false); - memoriSpeaking = false; - } else if ((audioContext.state as string) === 'interrupted') { - audioContext.resume(); - } - }; + setIsPlayingAudio(true); - audioContext.resume(); + if (audioContext && result) { + const audioBuffer = await audioContext.decodeAudioData(result.audioData); + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); - if (speechSynthesizer) { - speechSynthesizer.close(); - speechSynthesizer = null; - } - } catch (e) { - console.warn('speak error: ', e); - window.speechSynthesis.speak(new SpeechSynthesisUtterance(text)); - clearVisemes(); - setIsPlayingAudio(false); - memoriSpeaking = false; - - if (speechSynthesizer) { - speechSynthesizer.close(); - speechSynthesizer = null; - } - emitEndSpeakEvent(); - } - } else { - audioContext.resume(); - clearVisemes(); + source.onended = () => { setIsPlayingAudio(false); - memoriSpeaking = false; + stopProcessing(); + resetVisemeQueue(); + currentSpeechRef.current = null; emitEndSpeakEvent(); - } - }, - error => { - console.error('speak:', error); - window.speechSynthesis.speak(new SpeechSynthesisUtterance(text)); - setIsPlayingAudio(false); - memoriSpeaking = false; - emitEndSpeakEvent(); + onEndSpeakStartListen(); + }; + + await audioContext.resume(); + source.start(0); + } else { + stopProcessing(); + resetVisemeQueue(); + throw new Error('No result from speech synthesis'); } - ); + } catch (error) { + console.error('Speech synthesis error:', error); + stopProcessing(); + resetVisemeQueue(); + // Fallback to browser's speech synthesis + const utterance = new SpeechSynthesisUtterance(text); + window.speechSynthesis.speak(utterance); + } finally { + setMemoriTyping(false); + } + }, [initializeAudioContext, initializeSpeechSynthesizer, stopCurrentSpeech]); - setMemoriTyping(false); - }; - const stopAudio = () => { + const stopAudio = useCallback(() => { setIsPlayingAudio(false); memoriSpeaking = false; try { - if (speechSynthesizer) { - speechSynthesizer.close(); - speechSynthesizer = null; + if (speechSynthesizerRef.current) { + speechSynthesizerRef.current.close(); + speechSynthesizerRef.current = null; } - if (audioContext.state !== 'closed') { - audioContext.close(); + if (audioContextRef.current && audioContextRef.current.state !== 'closed') { + audioContextRef.current.close(); } - if (audioDestination) { - audioDestination.pause(); - audioDestination.close(); + if (audioDestinationRef.current) { + audioDestinationRef.current.pause(); + audioDestinationRef.current.close(); } + stopCurrentSpeech(); } catch (e) { console.debug('stopAudio error: ', e); } - }; + }, [stopCurrentSpeech]); + /** * Focus on the chat input on mount @@ -2240,6 +2214,12 @@ const MemoriWidget = ({ useEffect(() => { return () => { resetUIEffects(); + if (speechSynthesizerRef.current) { + speechSynthesizerRef.current.close(); + } + if (audioContextRef.current) { + audioContextRef.current.close(); + } }; // eslint-disable-next-line react-hooks/exhaustive-deps }, []); diff --git a/src/components/layouts/ZoomedFullBody.tsx b/src/components/layouts/ZoomedFullBody.tsx index 77fd0cfe..488e9324 100644 --- a/src/components/layouts/ZoomedFullBody.tsx +++ b/src/components/layouts/ZoomedFullBody.tsx @@ -1,8 +1,8 @@ -import React from 'react'; +import React, { useEffect, useCallback } from 'react'; import Spin from '../ui/Spin'; import { LayoutProps } from '../MemoriWidget/MemoriWidget'; -const FullPageLayout: React.FC = ({ +const ZoomedFullBodyLayout: React.FC = ({ Header, headerProps, Avatar, @@ -20,36 +20,45 @@ const FullPageLayout: React.FC = ({ showInstruct = false, loading = false, poweredBy, -}) => ( - <> - {integrationStyle} - {integrationBackground} +}) => { + useEffect(() => { + document.body.style.overflow = 'hidden'; + return () => { + document.body.style.overflow = ''; + }; + }, []); - - {showInstruct && ChangeMode && changeModeProps && ( - - )} + return ( + <> + {integrationStyle} + {integrationBackground} - {Header && headerProps &&
} + + {showInstruct && ChangeMode && changeModeProps && } -
-
- {Avatar && avatarProps && } + {Header && headerProps &&
} -
-
-
- {sessionId && hasUserActivatedSpeak && Chat && chatProps ? ( - - ) : startPanelProps ? ( - - ) : null} -
+
+
+ {Avatar && avatarProps && ( + + )} - {poweredBy} -
- - -); +
+
+
+ {sessionId && hasUserActivatedSpeak && Chat && chatProps ? ( + + ) : startPanelProps ? ( + + ) : null} +
+ +
{poweredBy}
+
+ + + ); +}; -export default FullPageLayout; +export default ZoomedFullBodyLayout; diff --git a/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap b/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap index c0d7a523..b5162ce9 100644 --- a/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap +++ b/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap @@ -36,7 +36,7 @@ exports[`renders ZOOMED_FULL_BODY layout unchanged 1`] = ` />
- -

- - Powered by - - - Memori.AI - -

+
+ +

+ + Powered by + + + Memori.AI + +

+
void; - addVisemeToQueue: (viseme: AzureViseme) => void; - processVisemeQueue: () => ProcessedViseme[]; - clearVisemes: () => void; - isMeshSet: boolean; - setEmotion: (emotion: string) => void; - emotion: string; - getAzureStyleForEmotion: (emotion: string) => string; + addViseme: (visemeId: number, audioOffset: number) => void; + updateCurrentViseme: (currentTime: number) => Viseme | null; + startProcessing: () => void; + stopProcessing: () => void; + resetVisemeQueue: () => void; + isProcessing: boolean; } const VisemeContext = createContext(undefined); -const VISEME_SMOOTHING = 0.5; -const DEFAULT_VISEME_DURATION = 0.1; -const MINIMUM_ELAPSED_TIME = 0.01; -const VISEME_SPEED_FACTOR = 1.0; -const AUDIO_PLAYBACK_RATE = 1.0; -const VISEME_BASE_SPEED = 1.0; - const VISEME_MAP: { [key: number]: string } = { 0: 'viseme_sil', // silence 1: 'viseme_PP', // p, b, m @@ -63,253 +44,102 @@ const VISEME_MAP: { [key: number]: string } = { 21: 'viseme_PP', // y (closest match, could be debated) }; -export const VisemeProvider: React.FC<{ children: React.ReactNode }> = ({ - children, -}) => { - const [isMeshSet, setIsMeshSet] = useState(false); - const [emotion, setEmotion] = useState('Neutral'); - const isAnimatingRef = useRef(false); - const currentVisemesRef = useRef([]); - const visemeQueueRef = useRef([]); - const animationFrameRef = useRef(null); - const startTimeRef = useRef(null); - const currentVisemeWeightRef = useRef<{ [key: string]: number }>({}); - const meshRef = useRef(null); +const DEFAULT_VISEME_DURATION = 0.04 //0; // Reduced from 0.4 for smoother transitions +const VISEME_OVERLAP = 0.35; // Slightly increased from 0.04 for more overlap +const SMOOTHING_FACTOR = 0.35 // New constant for weight smoothing +const TIME_OFFSET =-0.25; // Adjust this value as needed (in seconds) +const PRELOAD_TIME = 0.525; // Preload visemes 0.5 seconds in advance - const lerp = (start: number, end: number, alpha: number): number => { - return start * (1 - alpha) + end * alpha; - }; - - const easeInOutCubic = (x: number): number => { - return x < 0.5 ? 4 * x * x * x : 1 - Math.pow(-2 * x + 2, 3) / 2; - }; +export const VisemeProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const visemeQueueRef = useRef([]); + const startTimeRef = useRef(null); + const [isProcessing, setIsProcessing] = useState(false); + const lastVisemeRef = useRef(null); + + const addViseme = useCallback((visemeId: number, audioOffset: number) => { + const visemeName = VISEME_MAP[visemeId] || 'viseme_sil'; + const startTime = audioOffset / 10000000 + TIME_OFFSET; + const endTime = startTime + DEFAULT_VISEME_DURATION; + const newViseme: Viseme = { + name: visemeName, + weight: 0, + startTime, + endTime, + }; + visemeQueueRef.current.push(newViseme); - const setMeshRef = useCallback( - (mesh: SkinnedMesh | null) => { - if (mesh && mesh.morphTargetDictionary && mesh.morphTargetInfluences) { - meshRef.current = mesh; - setIsMeshSet(true); - // console.log('Mesh set successfully:', mesh); - } else { - console.error('Invalid mesh provided:', mesh); - } - }, - [meshRef] - ); + if (!isProcessing) { + startProcessing(); + } + }, [isProcessing]); - const addVisemeToQueue = useCallback((viseme: AzureViseme) => { - visemeQueueRef.current.push(viseme); - // console.log('Viseme added to queue:', viseme); - }, []); + const updateCurrentViseme = useCallback((currentTime: number): Viseme | null => { + if (!isProcessing || startTimeRef.current === null) { + console.log('StartTimeRef not set'); + return null; + } - const getCurrentViseme = useCallback((elapsedTime: number) => { - if (elapsedTime < MINIMUM_ELAPSED_TIME) return null; + const elapsedTime = currentTime - startTimeRef.current + PRELOAD_TIME - return currentVisemesRef.current.find((viseme, index) => { - const nextViseme = currentVisemesRef.current[index + 1]; - return ( - elapsedTime >= viseme.startTime && - (!nextViseme || elapsedTime < nextViseme.startTime) - ); - }); - }, []); + // Remove expired visemes + visemeQueueRef.current = visemeQueueRef.current.filter(v => v.endTime > elapsedTime); - const getDynamicSpeedFactor = (visemeDuration: number): number => { - const baseDuration = 0.1; // Average expected viseme duration - return ( - VISEME_BASE_SPEED * (baseDuration / visemeDuration) * AUDIO_PLAYBACK_RATE + const currentViseme = visemeQueueRef.current.find(v => + v.startTime <= elapsedTime && v.endTime > elapsedTime - VISEME_OVERLAP ); - }; - - const applyViseme = useCallback( - (viseme: ProcessedViseme, elapsedTime: number) => { - if (!meshRef.current) { - console.error('Mesh not set'); - return; - } - - const visemeProgress = Math.min( - (elapsedTime - viseme.startTime) / viseme.duration, - 1 - ); - - const dynamicSpeedFactor = getDynamicSpeedFactor(viseme.duration); - const adjustedProgress = visemeProgress * dynamicSpeedFactor; - - // Use a cubic easing function for smoother transitions - const easedProgress = easeInOutCubic(adjustedProgress); - const targetWeight = Math.sin(easedProgress * Math.PI) * viseme.weight; - - currentVisemeWeightRef.current[viseme.name] = lerp( - currentVisemeWeightRef.current[viseme.name] || 0, - targetWeight, - VISEME_SMOOTHING - ); - - const visemeIndex = meshRef.current.morphTargetDictionary?.[viseme.name]; - if ( - typeof visemeIndex === 'number' && - meshRef.current.morphTargetInfluences - ) { - meshRef.current.morphTargetInfluences[visemeIndex] = - currentVisemeWeightRef.current[viseme.name]; - // console.log(`Applied viseme: ${viseme.name}, weight: ${currentVisemeWeightRef.current[viseme.name]}`); - } else { - console.error( - `Viseme not found in morph target dictionary: ${viseme.name}` - ); - } - }, - [] - ); - - const animate = useCallback( - (time: number) => { - if (startTimeRef.current === null) { - startTimeRef.current = time; - } - - const elapsedTime = - ((time - startTimeRef.current) / 1000) * VISEME_SPEED_FACTOR; - - const currentViseme = getCurrentViseme(elapsedTime); - if (currentViseme) { - applyViseme(currentViseme, elapsedTime); - } - - if ( - currentVisemesRef.current.length > 0 && - elapsedTime < - currentVisemesRef.current[currentVisemesRef.current.length - 1] - .startTime + - currentVisemesRef.current[currentVisemesRef.current.length - 1] - .duration - ) { - animationFrameRef.current = requestAnimationFrame(animate); - } else { - clearVisemes(); - } - }, - [getCurrentViseme, applyViseme] - ); - - const processVisemeQueue = useCallback(() => { - const azureVisemes = [...visemeQueueRef.current]; - visemeQueueRef.current = []; - - if (azureVisemes.length === 0) { - // console.log('No visemes to process'); - return []; + if (currentViseme) { + console.log('CurrentViseme Found!') + const visemeProgress = (elapsedTime - currentViseme.startTime) / (currentViseme.endTime - currentViseme.startTime); + const targetWeight = Math.sin(Math.PI * Math.min(visemeProgress, 1)); + + // Smooth the weight transition + const smoothedWeight = lastVisemeRef.current + ? lastVisemeRef.current.weight + (targetWeight - lastVisemeRef.current.weight) * SMOOTHING_FACTOR + : targetWeight; + + const updatedViseme = { ...currentViseme, weight: smoothedWeight }; + lastVisemeRef.current = updatedViseme; + return updatedViseme; } - const processedVisemes: ProcessedViseme[] = azureVisemes.map( - (currentViseme, i) => { - const nextViseme = azureVisemes[i + 1]; - const duration = nextViseme - ? (nextViseme.audioOffset - currentViseme.audioOffset) / 10000000 - : DEFAULT_VISEME_DURATION; - - const processedViseme = { - name: VISEME_MAP[currentViseme.visemeId] || 'viseme_sil', - duration, - weight: 1, - startTime: currentViseme.audioOffset / 10000000, - }; - //console.log('Processed viseme:', processedViseme); - return processedViseme; - } - ); - - currentVisemesRef.current = processedVisemes; - - // Start animation immediately if not already animating - if (!isAnimatingRef.current) { - isAnimatingRef.current = true; - startTimeRef.current = performance.now(); - // console.log('Starting animation'); - animationFrameRef.current = requestAnimationFrame(animate); - } else { - // If already animating, adjust the start time for the new visemes - if (startTimeRef.current !== null) { - const currentTime = performance.now(); - const elapsedTime = - ((currentTime - startTimeRef.current) / 1000) * VISEME_SPEED_FACTOR; - startTimeRef.current = - currentTime - (elapsedTime / VISEME_SPEED_FACTOR) * 1000; + // Gradually reduce weight when no viseme is active + if (lastVisemeRef.current) { + const reducedWeight = lastVisemeRef.current.weight * (1 - SMOOTHING_FACTOR); + if (reducedWeight > 0.01) { + lastVisemeRef.current = { ...lastVisemeRef.current, weight: reducedWeight }; + return lastVisemeRef.current; } } - return processedVisemes; - }, [isMeshSet, animate]); - - const clearVisemes = useCallback(() => { - currentVisemesRef.current = []; - visemeQueueRef.current = []; - - if (animationFrameRef.current !== null) { - cancelAnimationFrame(animationFrameRef.current); - animationFrameRef.current = null; - } + lastVisemeRef.current = null; + return null; + }, [isProcessing]); - if ( - meshRef.current?.morphTargetDictionary && - meshRef.current?.morphTargetInfluences - ) { - Object.values(meshRef.current.morphTargetDictionary).forEach(index => { - if (typeof index === 'number') { - meshRef.current!.morphTargetInfluences![index] = 0; - } - }); - } + const startProcessing = useCallback(() => { + if (isProcessing) return; + startTimeRef.current = performance.now() / 1000; + setIsProcessing(true); + }, [isProcessing]); - currentVisemeWeightRef.current = {}; + const stopProcessing = useCallback(() => { + setIsProcessing(false); startTimeRef.current = null; - isAnimatingRef.current = false; - // console.log('Visemes cleared'); + lastVisemeRef.current = null; }, []); - // Your existing emotion map - const emotionMap: Record> = { - Gioia: { Gioria: 1 }, - Rabbia: { Rabbia: 1 }, - Sorpresa: { Sorpresa: 1 }, - Tristezza: { Tristezza: 1 }, - Timore: { Timore: 1 }, - }; - - // Mapping from your emotions to Azure styles - const emotionToAzureStyleMap: Record = { - Gioia: 'cheerful', - Rabbia: 'angry', - Sorpresa: 'excited', - Tristezza: 'sad', - Timore: 'terrified', - }; - - // Function to get Azure style from emotion - function getAzureStyleForEmotion(emotion: string): string { - return emotionToAzureStyleMap[emotion] || 'neutral'; - } - - - useEffect(() => { - return () => { - if (animationFrameRef.current !== null) { - cancelAnimationFrame(animationFrameRef.current); - } - }; + const resetVisemeQueue = useCallback(() => { + visemeQueueRef.current = []; + lastVisemeRef.current = null; }, []); - const contextValue: VisemeContextType = { - setMeshRef, - addVisemeToQueue, - processVisemeQueue, - clearVisemes, - isMeshSet, - setEmotion, - emotion, - getAzureStyleForEmotion, + const contextValue = { + addViseme, + updateCurrentViseme, + startProcessing, + stopProcessing, + resetVisemeQueue, + isProcessing, }; return ( @@ -321,8 +151,8 @@ export const VisemeProvider: React.FC<{ children: React.ReactNode }> = ({ export const useViseme = (): VisemeContextType => { const context = useContext(VisemeContext); - if (context === undefined) { + if (!context) { throw new Error('useViseme must be used within a VisemeProvider'); } return context; -}; +}; \ No newline at end of file diff --git a/src/helpers/utils.ts b/src/helpers/utils.ts index 01f53cb9..99a35432 100644 --- a/src/helpers/utils.ts +++ b/src/helpers/utils.ts @@ -184,15 +184,16 @@ export const stripMarkdown = (text: string) => { }; export const stripOutputTags = (text: string): string => { - let hasTags = text.includes(''); - - if (!hasTags) return text; - - let output = text.split(''); - let textBefore = output[0].split('/gs; + + if (!outputTagRegex.test(text)) { + return text; + } - return stripOutputTags(textBefore + textAfter); + const strippedText = text.replace(outputTagRegex, ''); + + // Recursively strip nested output tags + return stripOutputTags(strippedText); }; export const stripHTML = (text: string) => { diff --git a/src/styles.css b/src/styles.css index 8df84b2c..bafbabea 100644 --- a/src/styles.css +++ b/src/styles.css @@ -52,6 +52,7 @@ @import url('./components/layouts/totem.css'); @import url('./components/layouts/website-assistant.css'); @import url('./components/layouts/chat.css'); +@import url('./components/layouts/zoomed-full-body.css'); @import url('https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css');