diff --git a/docker-compose.yml b/docker-compose.yml index 0f4ac79..62a7013 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,13 +8,15 @@ services: options: max-size: "50m" environment: - - API_KEY=${API_KEY} + - GPTS_API_SERVER=${GPTS_API_SERVER} + - API_SECRET=${API_KEY} - OPENAI_API_TYPE=${OPENAI_API_TYPE} - AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION} - AZURE_OPENAI_API_BASE=${AZURE_OPENAI_API_BASE} - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY} - OPENAI_API_KEY=${OPENAI_API_KEY} - QDRANT_URL=${QDRANT_URL} + - QDRANT_KEY=${QDRANT_KEY} - DATA_DIR=/data volumes: - gptservice-volume:/data @@ -32,12 +34,15 @@ services: options: max-size: "50m" environment: + - GPT_SERVICE_ADDRESS=${GPT_SERVICE_ADDRESS} + - GPT_SERVICE_TOKEN=${GPT_SERVICE_TOKEN} - OPENAI_API_TYPE=${OPENAI_API_TYPE} + - OPENAI_API_KEY=${OPENAI_API_KEY} - AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION} - AZURE_OPENAI_API_BASE=${AZURE_OPENAI_API_BASE} - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY} - - OPENAI_API_KEY=${OPENAI_API_KEY} - - QDRANT_URL=${QDRANT_URL} + - MSAL_TENANTID=${MSAL_TENANTID} + - MSAL_APPID=${MSAL_APPID} - DATA_DIR=/data volumes: - gptstudio-volume:/data diff --git a/libs/session.py b/libs/session.py new file mode 100644 index 0000000..0eab863 --- /dev/null +++ b/libs/session.py @@ -0,0 +1,29 @@ +import streamlit as st + + +class PageSessionState: + def __init__(self, prefix): + self._prefix = prefix + + def initn_attr(self, key: str, default_value: object): + if not hasattr(self, key): + setattr(self, key, default_value) + + def __getattr__(self, key): + if key == "_prefix": + return self.__dict__[key] + return st.session_state.get(f"{self._prefix}_{key}", None) + + def __setattr__(self, key, value): + if key == "_prefix": + self.__dict__[key] = value + else: + st.session_state[f"{self._prefix}_{key}"] = value + + def __delattr__(self, key): + if key == "_prefix": + raise AttributeError("Cannot delete _prefix attribute") + st.session_state.pop(f"{self._prefix}_{key}", None) + + def newkey(self, key): + return f"{self._prefix}_{key}" diff --git a/pages/04_Speech_Transcribe.py b/pages/04_Speech_Transcribe.py index 3158c01..0bcb774 100644 --- a/pages/04_Speech_Transcribe.py +++ b/pages/04_Speech_Transcribe.py @@ -11,38 +11,51 @@ import os from dotenv import load_dotenv +from libs.session import PageSessionState + sys.path.append(os.path.abspath('..')) load_dotenv() +page_state = PageSessionState("speech_transcribe") + with st.sidebar: value = msal_auth() if value is None: st.stop() +st.sidebar.markdown("# 🎙️语音转录🎤") st.markdown("# 🎙️语音转录🎤") st.markdown("> 上传文本或者录制语音识别,然后合成新的语音") -if "audio_recode" not in st.session_state: - st.session_state.audio_recode = None - -if "speech_recode" not in st.session_state: - st.session_state.speech_recode = None - -if "audio_processing" not in st.session_state: - st.session_state.audio_processing = False +# 音频录制内容 +page_state.initn_attr("audio_recode", None) +# 语音合成内容 +page_state.initn_attr("speech_recode", None) +# 是否正在处理中 +page_state.initn_attr("audio_processing", False) +# 用于存储临时文件 data_dir = os.getenv("DATA_DIR", "/tmp/data") if not os.path.exists(data_dir): os.makedirs(data_dir) -uploaded_file = st.file_uploader("上传文本文件", type=["txt", "md"]) +content_box = st.empty() + +uploaded_file = st.sidebar.file_uploader("上传文本文件", type=["txt", "md"]) if uploaded_file is not None: stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8")) string_data = stringio.read() - st.session_state.audio_recode = string_data + page_state.audio_recode = string_data + +if st.sidebar.button("录制音频"): + page_state.audio_recode = None + page_state.audio_processing = False + page_state.speech_recode = None + content_box.empty() + st.rerun() -if st.session_state.audio_recode is None: +if page_state.audio_recode is None: with st.spinner('正在识别语音...'): wav_audio_recode = audio_recorder("点击录音", icon_size="2x", pause_threshold=3.0) if wav_audio_recode is not None: @@ -56,12 +69,11 @@ response_format="json", file=open(filename, "rb"), ) - st.session_state.audio_recode = transcript.text + page_state.audio_recode = transcript.text st.rerun() -if st.session_state.audio_recode is not None: - st.markdown("### 🎤语音合成") - st.markdown(st.session_state.audio_recode) +if page_state.audio_recode is not None: + content_box.markdown(page_state.audio_recode) sound = st.selectbox("选择音色", ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]) c1, c2, c3 = st.columns(3) if c1.button("合成语音"): @@ -71,26 +83,20 @@ response = client.audio.speech.create( model="tts-1", voice=sound, - input=st.session_state.audio_recode + input=page_state.audio_recode ) - st.session_state.speech_recode = response.read() + page_state.speech_recode = response.read() st.write(f"🎧{sound}音色") - st.audio(st.session_state.speech_recode, format="audio/mp3") + st.audio(page_state.speech_recode, format="audio/mp3") st.write(f"语音{sound}合成完成") status.update(label="语音合成完成!", state="complete") - if c2.button("重新录制"): - st.session_state.audio_recode = None - st.session_state.audio_processing = False - st.session_state.speech_recode = None - st.rerun() - - if st.session_state.speech_recode is not None: + if page_state.speech_recode is not None: c3.download_button( label="下载语音", - data=st.session_state.speech_recode, + data=page_state.speech_recode, file_name='speech.mp3', ) else: - if st.session_state.audio_processing: + if page_state.audio_processing: st.write("还有任务在处理") diff --git a/pages/05_Video.py b/pages/05_Video.py index 400fa46..97958cb 100644 --- a/pages/05_Video.py +++ b/pages/05_Video.py @@ -1,5 +1,25 @@ -from streamlit_webrtc import webrtc_streamer +import cv2 +import numpy as np +import streamlit as st +from camera_input_live import camera_input_live +"# Streamlit camera input live Demo" +"## Try holding a qr code in front of your webcam" +image = camera_input_live(debounce=2000, width=360, height=240, show_controls=True) -webrtc_streamer(key="sample") +if image is not None: + st.image(image) + bytes_data = image.getvalue() + cv2_img = cv2.imdecode(np.frombuffer(bytes_data, np.uint8), cv2.IMREAD_COLOR) + + detector = cv2.QRCodeDetector() + + data, bbox, straight_qrcode = detector.detectAndDecode(cv2_img) + + if data: + st.write("# Found QR code") + st.write(data) + with st.expander("Show details"): + st.write("BBox:", bbox) + st.write("Straight QR code:", straight_qrcode) diff --git a/pages/06_Image_Analysis.py b/pages/06_Image_Analysis.py new file mode 100644 index 0000000..e84d66b --- /dev/null +++ b/pages/06_Image_Analysis.py @@ -0,0 +1,109 @@ +import streamlit as st +from libs.llms import openai_analyze_image, openai_streaming +from libs.msal import msal_auth +from libs.session import PageSessionState + +st.set_page_config(page_title="视觉分析", page_icon="🔬") + +page_state = PageSessionState("image_analysis") + +with st.sidebar: + value = msal_auth() + if value is None: + st.stop() + + +# 用于存储对话记录 +page_state.initn_attr("messages", []) + +# 用于标记上一条用户消息是否已经处理 +page_state.initn_attr("last_user_msg_processed", True) + +# 用于存储图像分析结果 +page_state.initn_attr("analysis_result", "") + +page_state.initn_attr("input_type", "camera") + +st.sidebar.markdown("# 🔬视觉分析") + +st.title("🔬视觉分析") + +# 图像分析提示输入 +prompt = st.sidebar.text_area("图像分析提示", "识别分析图片内容", height=40) + + +def clear_result(): + page_state.analysis_result = "" + page_state.last_user_msg_processed = True + page_state.messages = [] + if page_state.input_type == "camera" and page_state.camera_image is not None: + with st.spinner("分析中..."): + page_state.analysis_result = openai_analyze_image(prompt, page_state.camera_image) + page_state.messages.append({"role": "assistant", "content": page_state.analysis_result}) + + +# 摄像头输入获取图片 +if st.sidebar.selectbox("选择图片输入方式", ["摄像头", "上传图片"]) == "摄像头": + page_state.input_type = "camera" + image = st.camera_input("点击按钮截图", on_change=clear_result, key="image_analysis_camera_image") +else: + page_state.input_type = "upload" + image = st.sidebar.file_uploader("上传图片", type=["png", "jpg", "jpeg"], + on_change=clear_result, key="image_analysis_camera_image") + +if page_state.camera_image is not None: + if page_state.input_type == "upload": + st.image(page_state.camera_image, caption="上传的图片", use_column_width=True) + c1, c2 = st.columns(2) + if c1.button("分析图像"): + with st.spinner("分析中..."): + page_state.analysis_result = openai_analyze_image(prompt, page_state.camera_image) + page_state.messages.append({"role": "assistant", "content": page_state.analysis_result}) + + if c2.button("清除结果"): + clear_result() + +# 设置对话记录 +for msg in page_state.messages: + with st.chat_message(msg["role"]): + st.write(msg["content"]) + +# 输入用户消息 +if uprompt := st.chat_input("输入你的问题"): + # 用于标记用户消息还没有处理 + page_state.last_user_msg_processed = False + page_state.messages.append({"role": "user", "content": uprompt}) + with st.chat_message("user"): + st.write(uprompt) + +# 用户输入响应,如果上一条消息不是助手的消息,且上一条用户消息还没有处理完毕 +if ((page_state.messages + and page_state.messages[-1]["role"] != "assistant" + and not page_state.last_user_msg_processed) + and page_state.analysis_result not in [""]): + # 处理响应 + with st.chat_message("assistant"): + with st.spinner("Thinking..."): + sysmsg = f"""" + 以下是来自一图片识别获取的内容结果: + ''' + {page_state.analysis_result} + ''' + 我们将围绕这个内容进行深入讨论。 + """ + response = openai_streaming(sysmsg, page_state.messages[-10:]) + # 流式输出 + placeholder = st.empty() + full_response = '' + for item in response: + text = item.content + if text is not None: + full_response += text + placeholder.markdown(full_response) + placeholder.markdown(full_response) + + # 用于标记上一条用户消息已经处理完毕 + page_state.last_user_msg_processed = True + # 追加对话记录 + message = {"role": "assistant", "content": full_response} + page_state.messages.append(message) diff --git a/pages/06_OCR_Vision.py b/pages/06_OCR_Vision.py deleted file mode 100644 index 29e634a..0000000 --- a/pages/06_OCR_Vision.py +++ /dev/null @@ -1,99 +0,0 @@ -import streamlit as st -from libs.llms import openai_analyze_image, openai_streaming -from libs.msal import msal_auth - -with st.sidebar: - value = msal_auth() - if value is None: - st.stop() - -if "ocr_vision_messages" not in st.session_state.keys(): - st.session_state.ocr_vision_messages = [] - -if "ocr_vision_last_user_msg_processed" not in st.session_state: - st.session_state.ocr_vision_last_user_msg_processed = True - -if "ocr_vision_analysis_result" not in st.session_state: - st.session_state.ocr_vision_analysis_result = "" - -st.sidebar.markdown("# 🔬视觉分析") - -st.title("🔬视觉分析") - - -def clear_result(): - st.session_state.ocr_vision_analysis_result = "" - st.session_state.ocr_vision_last_user_msg_processed = True - st.session_state.ocr_vision_messages = [] - - -def save_result(): - st.session_state.ocr_vision_analysis_result = st.session_state.ocr_vision_analysis_result_temp - - -# Streamlit 应用的主要部分 -col1, col2, = st.columns([3, 6]) - -# 摄像头输入获取图片 -image = col1.camera_input("点击按钮截图", on_change=clear_result) - -# 图像分析提示输入 -prompt = col2.text_input("图像分析提示", "识别分析图片内容") - -# 重新获取图像时触发图像分析 -if image is not None and not st.session_state.ocr_vision_analysis_result: - with col2: - with st.spinner("分析中..."): - st.session_state.ocr_vision_analysis_result = openai_analyze_image(prompt, image) - -# 使用文本区域组件显示分析结果, 支持手工修改 -if st.session_state.ocr_vision_analysis_result: - with col2: - st.text_area("识别结果(请手工修正识别错误)", - value=st.session_state.ocr_vision_analysis_result, - key="ocr_vision_analysis_result_temp", - on_change=save_result, - height=170) - - -for ocr_vision_messages in st.session_state.ocr_vision_messages: - with st.chat_message(ocr_vision_messages["role"]): - st.write(ocr_vision_messages["content"]) - -if uprompt := st.chat_input("输入你的问题"): - # 用于标记用户消息还没有处理 - st.session_state.ocr_vision_last_user_msg_processed = False - st.session_state.ocr_vision_messages.append({"role": "user", "content": uprompt}) - with st.chat_message("user"): - st.write(uprompt) - -# 用户输入响应,如果上一条消息不是助手的消息,且上一条用户消息还没有处理完毕 -if ((st.session_state.ocr_vision_messages and - st.session_state.ocr_vision_messages[-1]["role"] != "assistant" and - not st.session_state.ocr_vision_last_user_msg_processed) and - st.session_state.ocr_vision_analysis_result not in [""]): - with st.chat_message("assistant"): - with st.spinner("Thinking..."): - sysmsg = f"""" - 以下是来自一图片识别获取的内容结果: - ''' - {st.session_state.ocr_vision_analysis_result} - ''' - 我们将围绕这个内容进行深入讨论。 - """ - response = openai_streaming(sysmsg, st.session_state.ocr_vision_messages[-10:]) - # 流式输出 - placeholder = st.empty() - full_response = '' - for item in response: - text = item.content - if text is not None: - full_response += text - placeholder.markdown(full_response) - placeholder.markdown(full_response) - - # 用于标记上一条用户消息已经处理完毕 - st.session_state.ocr_vision_last_user_msg_processed = True - # 追加对话记录 - message = {"role": "assistant", "content": full_response} - st.session_state.ocr_vision_messages.append(message) diff --git a/pages/Elements.py b/pages/Elements.py deleted file mode 100644 index 3afad68..0000000 --- a/pages/Elements.py +++ /dev/null @@ -1,7 +0,0 @@ -from streamlit_player import st_player - -# Embed a youtube video -st_player("https://youtu.be/CmSKVW1v0xM") - -# Embed a music from SoundCloud -st_player("https://soundcloud.com/imaginedragons/demons")