Skip to content

Commit

Permalink
feat: Currently, you can use Gemini’s built-in audio recognition capa…
Browse files Browse the repository at this point in the history
…bilities to achieve voice communication.
  • Loading branch information
Amery2010 committed Jun 8, 2024
1 parent fdd7bfc commit a54a838
Show file tree
Hide file tree
Showing 25 changed files with 341 additions and 136 deletions.
13 changes: 13 additions & 0 deletions app/globals.css
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,19 @@ html.dark .hljs-warpper ::-webkit-scrollbar-thumb:hover {
border-radius: 4px;
margin-bottom: 8px;
}
.yarl__root .yarl__container {
background-color: rgba(0, 0, 0, 0.8);
}
.audio-slider span[role='slider'] {
cursor: pointer;
width: 16px;
height: 16px;
transition: all 150ms linear;
}
.audio-slider span[role='slider']:hover {
width: 20px;
height: 20px;
}

@media (max-width: 768px) {
.hljs-warpper {
Expand Down
100 changes: 31 additions & 69 deletions app/page.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use client'
import dynamic from 'next/dynamic'
import { useRef, useState, useMemo, KeyboardEvent, useEffect, useCallback } from 'react'
import { EdgeSpeech, SpeechRecognition, getRecordMineType } from '@xiangfa/polly'
import { EdgeSpeech, getRecordMineType } from '@xiangfa/polly'
import SiriWave from 'siriwave'
import {
MessageCircleHeart,
Expand Down Expand Up @@ -61,7 +61,6 @@ export default function Home() {
const audioStreamRef = useRef<AudioStream>()
const edgeSpeechRef = useRef<EdgeSpeech>()
const audioRecordRef = useRef<AudioRecorder>()
const speechRecognitionRef = useRef<SpeechRecognition>()
const speechQueue = useRef<PromiseQueue>()
const messagesRef = useRef(useMessageStore.getState().messages)
const messageStore = useMessageStore()
Expand All @@ -72,12 +71,10 @@ export default function Home() {
const [content, setContent] = useState<string>('')
const [subtitle, setSubtitle] = useState<string>('')
const [errorMessage, setErrorMessage] = useState<string>('')
// const [isRecording, setIsRecording] = useState<boolean>(false)
// const [recordTimer, setRecordTimer] = useState<NodeJS.Timeout>()
const [recordTime, setRecordTime] = useState<number>(0)
const [settingOpen, setSetingOpen] = useState<boolean>(false)
const [speechSilence, setSpeechSilence] = useState<boolean>(false)
const [disableSpeechRecognition, setDisableSpeechRecognition] = useState<boolean>(false)
const [isRecording, setIsRecording] = useState<boolean>(false)
const [status, setStatus] = useState<'thinkng' | 'silence' | 'talking'>('silence')
const statusText = useMemo(() => {
switch (status) {
Expand All @@ -95,6 +92,9 @@ export default function Home() {
const supportAttachment = useMemo(() => {
return !OldTextModel.includes(settingStore.model as Model)
}, [settingStore.model])
const supportSpeechRecognition = useMemo(() => {
return !OldTextModel.includes(settingStore.model as Model) && !OldVisionModel.includes(settingStore.model as Model)
}, [settingStore.model])
const isUploading = useMemo(() => {
for (const file of attachmentStore.files) {
if (file.status === 'PROCESSING') return true
Expand Down Expand Up @@ -276,6 +276,9 @@ export default function Home() {
}
},
onFinish: async () => {
if (talkMode === 'voice') {
setStatus('silence')
}
scrollToBottom()
saveMessage()
if (maxHistoryLength > 0) {
Expand Down Expand Up @@ -432,47 +435,17 @@ export default function Home() {
}
}, [])

// const startRecordTime = useCallback(() => {
// const intervalTimer = setInterval(() => {
// setRecordTime((time) => time + 1)
// }, 1000)
// setRecordTimer(intervalTimer)
// }, [])

// const endRecordTimer = useCallback(() => {
// clearInterval(recordTimer)
// }, [recordTimer])

// const handleRecorder = useCallback(() => {
// if (!checkAccessStatus()) return false
// if (!audioStreamRef.current) {
// audioStreamRef.current = new AudioStream()
// }
// if (speechRecognitionRef.current) {
// const { talkMode } = useSettingStore.getState()
// if (isRecording) {
// speechRecognitionRef.current.stop()
// endRecordTimer()
// setRecordTime(0)
// if (talkMode === 'voice') {
// handleSubmit(speechRecognitionRef.current.text)
// }
// setIsRecording(false)
// } else {
// speechRecognitionRef.current.start()
// setIsRecording(true)
// startRecordTime()
// }
// }
// }, [checkAccessStatus, handleSubmit, startRecordTime, endRecordTimer, isRecording])

const handleRecorder = useCallback(() => {
if (!checkAccessStatus()) return false
if (!audioStreamRef.current) {
audioStreamRef.current = new AudioStream()
}
if (!audioRecordRef.current) {
if (!audioRecordRef.current || audioRecordRef.current.autoStop !== settingStore.autoStopRecord) {
audioRecordRef.current = new AudioRecorder({
autoStop: settingStore.autoStopRecord,
onStart: () => {
setIsRecording(true)
},
onTimeUpdate: (time) => {
setRecordTime(time)
},
Expand All @@ -481,15 +454,18 @@ export default function Home() {
const file = new File([audioData], `${Date.now()}.${recordType.extension}`, { type: recordType.mineType })
const recordDataURL = await readFileAsDataURL(file)
handleSubmit(recordDataURL)
setIsRecording(false)
},
})
}
if (audioRecordRef.current.isRecording) {
audioRecordRef.current.stop()
} else {
audioRecordRef.current.start()
} else {
if (audioRecordRef.current.isRecording) {
audioRecordRef.current.stop()
} else {
audioRecordRef.current.start()
}
}
}, [checkAccessStatus, handleSubmit])
}, [settingStore.autoStopRecord, checkAccessStatus, handleSubmit])

const handleStopTalking = useCallback(() => {
setSpeechSilence(true)
Expand All @@ -500,14 +476,14 @@ export default function Home() {

const handleKeyDown = useCallback(
(ev: KeyboardEvent<HTMLTextAreaElement>) => {
if (ev.key === 'Enter' && !ev.shiftKey && !audioRecordRef.current?.isRecording) {
if (ev.key === 'Enter' && !ev.shiftKey && !isRecording) {
if (!checkAccessStatus()) return false
// Prevent the default carriage return and line feed behavior
ev.preventDefault()
handleSubmit(content)
}
},
[content, handleSubmit, checkAccessStatus],
[content, handleSubmit, checkAccessStatus, isRecording],
)

const handleFileUpload = useCallback(
Expand Down Expand Up @@ -569,20 +545,6 @@ export default function Home() {
requestAnimationFrame(scrollToBottom)
}, [messagesRef.current.length, scrollToBottom])

useEffect(() => {
try {
speechRecognitionRef.current = new SpeechRecognition({
locale: settingStore.sttLang,
onUpdate: (text) => {
setContent(text)
},
})
} catch (err) {
console.error(err)
setDisableSpeechRecognition(true)
}
}, [settingStore.sttLang])

useEffect(() => {
const setting = useSettingStore.getState()
if (setting.ttsLang !== '') {
Expand Down Expand Up @@ -688,7 +650,7 @@ export default function Home() {
)}
<div ref={scrollAreaBottomRef}></div>
<div className="fixed bottom-0 flex w-full max-w-screen-md items-end gap-2 p-4 pb-8 max-sm:p-2 max-sm:pb-3 landscape:max-md:pb-4">
{!disableSpeechRecognition ? (
{supportSpeechRecognition ? (
<Button title={t('voiceMode')} variant="secondary" size="icon" onClick={() => updateTalkMode('voice')}>
<AudioLines />
</Button>
Expand All @@ -704,7 +666,7 @@ export default function Home() {
autoFocus
className={cn(
'h-auto max-h-[120px] w-full resize-none border-none bg-transparent px-2 text-sm leading-6 transition-[height] focus-visible:outline-none',
disableSpeechRecognition ? 'pr-9' : 'pr-[72px]',
!supportSpeechRecognition ? 'pr-9' : 'pr-[72px]',
)}
style={{ height: textareaHeight > 24 ? `${textareaHeight}px` : 'auto' }}
value={content}
Expand All @@ -731,15 +693,15 @@ export default function Home() {
</Tooltip>
</TooltipProvider>
) : null}
{!disableSpeechRecognition ? (
{supportSpeechRecognition ? (
<TooltipProvider>
<Tooltip open={audioRecordRef.current?.isRecording}>
<Tooltip open={isRecording}>
<TooltipTrigger asChild>
<div
className="box-border flex h-8 w-8 cursor-pointer items-center justify-center rounded-full p-1.5 text-slate-800 hover:bg-secondary/80 dark:text-slate-600"
onClick={() => handleRecorder()}
>
<Mic className={audioRecordRef.current?.isRecording ? 'animate-pulse' : ''} />
<Mic className={isRecording ? 'animate-pulse' : ''} />
</div>
</TooltipTrigger>
<TooltipContent
Expand All @@ -759,7 +721,7 @@ export default function Home() {
title={t('send')}
variant="secondary"
size="icon"
disabled={audioRecordRef.current?.isRecording || isUploading}
disabled={isRecording || isUploading}
onClick={() => handleSubmit(content)}
>
<SendHorizontal />
Expand Down Expand Up @@ -808,7 +770,7 @@ export default function Home() {
disabled={status === 'thinkng'}
onClick={() => handleRecorder()}
>
{audioRecordRef.current?.isRecording ? formatTime(recordTime) : <Mic className="h-8 w-8" />}
{isRecording ? formatTime(recordTime) : <Mic className="h-8 w-8" />}
</Button>
)}
<Button
Expand All @@ -824,7 +786,7 @@ export default function Home() {
</div>
</div>
</div>
<Setting open={settingOpen} hiddenTalkPanel={disableSpeechRecognition} onClose={() => setSetingOpen(false)} />
<Setting open={settingOpen} hiddenTalkPanel={!supportSpeechRecognition} onClose={() => setSetingOpen(false)} />
</main>
)
}
36 changes: 36 additions & 0 deletions components/AudioPlayer.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { memo } from 'react'
import { Play, Pause } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Slider } from '@/components/ui/slider'
import useAudio from '@/hooks/useAudio'
import { formatTime } from '@/utils/common'
import { cn } from '@/utils'

type Props = {
className?: string
src: string
}

function AudioPlayer({ src, className }: Props) {
const { playing, duration, current, toggle, onChange } = useAudio(src)

return (
<div className={cn('flex w-72 gap-2', className)}>
<Button className="h-8 w-8 px-1.5 py-1.5" variant="ghost" onClick={() => toggle()}>
{playing ? <Pause className="h-5 w-5" /> : <Play className="h-5 w-5" />}
</Button>
<Slider
className="audio-slider"
value={[Math.ceil(current)]}
max={Math.ceil(duration)}
step={1}
onValueChange={([current]) => onChange(current)}
/>
<div className="font-mono text-sm leading-8">
{formatTime(current)}/{formatTime(duration)}
</div>
</div>
)
}

export default memo(AudioPlayer)
36 changes: 31 additions & 5 deletions components/MessageItem.tsx
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
'use client'
import { useEffect, useState, useCallback, useMemo, memo } from 'react'
import { useTranslation } from 'react-i18next'
import Lightbox from 'yet-another-react-lightbox'
import LightboxFullscreen from 'yet-another-react-lightbox/plugins/fullscreen'
import MarkdownIt from 'markdown-it'
import markdownHighlight from 'markdown-it-highlightjs'
import highlight from 'highlight.js'
import markdownKatex from '@traptitech/markdown-it-katex'
import Clipboard from 'clipboard'
import { useTranslation } from 'react-i18next'
import { User, Bot, RotateCw, Sparkles, Copy, CopyCheck, PencilLine, Eraser, Volume2 } from 'lucide-react'
import { User, Bot, RotateCw, Sparkles, Copy, CopyCheck, PencilLine, Eraser, Volume2, Eye } from 'lucide-react'
import { EdgeSpeech } from '@xiangfa/polly'
import { Avatar, AvatarFallback } from '@/components/ui/avatar'
import BubblesLoading from '@/components/BubblesLoading'
import FileList from '@/components/FileList'
import EditableArea from '@/components/EditableArea'
import AudioPlayer from '@/components/AudioPlayer'
import IconButton from '@/components/IconButton'
import { useMessageStore } from '@/store/chat'
import { useSettingStore } from '@/store/setting'
import AudioStream from '@/utils/AudioStream'
import { sentenceSegmentation } from '@/utils/common'
import { upperFirst, isFunction, find } from 'lodash-es'

import 'yet-another-react-lightbox/styles.css'

interface Props extends Message {
onRegenerate?: (id: string) => void
}
Expand Down Expand Up @@ -68,6 +73,8 @@ function MessageItem({ id, role, parts, attachments, onRegenerate }: Props) {
const [html, setHtml] = useState<string>('')
const [isEditing, setIsEditing] = useState<boolean>(false)
const [isCopyed, setIsCopyed] = useState<boolean>(false)
const [showLightbox, setShowLightbox] = useState<boolean>(false)
const [lightboxIndex, setLightboxIndex] = useState<number>(0)
const fileList = useMemo(() => {
return attachments ? attachments.filter((item) => !item.metadata?.mimeType.startsWith('image/')) : []
}, [attachments])
Expand Down Expand Up @@ -160,6 +167,11 @@ function MessageItem({ id, role, parts, attachments, onRegenerate }: Props) {
}
}, [content])

const openLightbox = useCallback((index: number) => {
setLightboxIndex(index)
setShowLightbox(true)
}, [])

const render = useCallback(
(content: string) => {
const md: MarkdownIt = MarkdownIt({
Expand Down Expand Up @@ -266,15 +278,22 @@ function MessageItem({ id, role, parts, attachments, onRegenerate }: Props) {
{inlineAudioList.length > 0 ? (
<div className="not:last:border-dashed not:last:border-b flex w-full flex-wrap pb-2">
{inlineAudioList.map((audio, idx) => {
return <audio key={idx} className="mb-2" src={audio} controls preload="auto" />
return <AudioPlayer key={idx} className="mb-2" src={audio} />
})}
</div>
) : null}
{inlineImageList.length > 0 ? (
<div className="flex flex-wrap gap-2 pb-2">
{inlineImageList.map((image, idx) => {
// eslint-disable-next-line
return <img key={idx} className="max-h-48 rounded-sm" src={image} alt="inline-image" />
return (
<div key={idx} className="group/image relative cursor-pointer" onClick={() => openLightbox(idx)}>
{
// eslint-disable-next-line
<img className="max-h-48 rounded-sm" src={image} alt="inline-image" />
}
<Eye className="absolute left-1/2 top-1/2 -ml-4 -mt-4 h-8 w-8 text-white/80 opacity-0 group-hover/image:opacity-100" />
</div>
)
})}
</div>
) : null}
Expand Down Expand Up @@ -319,6 +338,13 @@ function MessageItem({ id, role, parts, attachments, onRegenerate }: Props) {
)}
</div>
)}
<Lightbox
open={showLightbox}
close={() => setShowLightbox(false)}
slides={inlineImageList.map((item) => ({ src: item }))}
index={lightboxIndex}
plugins={[LightboxFullscreen]}
/>
</>
)
}
Expand Down
2 changes: 1 addition & 1 deletion components/ResponsiveDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import {
DrawerTitle,
DrawerTrigger,
} from '@/components/ui/drawer'
import { useMediaQuery } from '@/hooks/useMediaQuery'
import useMediaQuery from '@/hooks/useMediaQuery'

type Props = {
open: boolean
Expand Down
Loading

0 comments on commit a54a838

Please sign in to comment.