Skip to content

Commit

Permalink
session satte warp
Browse files Browse the repository at this point in the history
  • Loading branch information
jamiesun committed Dec 7, 2023
1 parent 7dc8f0f commit c643be2
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 138 deletions.
11 changes: 8 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ services:
options:
max-size: "50m"
environment:
- API_KEY=${API_KEY}
- GPTS_API_SERVER=${GPTS_API_SERVER}
- API_SECRET=${API_KEY}
- OPENAI_API_TYPE=${OPENAI_API_TYPE}
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION}
- AZURE_OPENAI_API_BASE=${AZURE_OPENAI_API_BASE}
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- QDRANT_URL=${QDRANT_URL}
- QDRANT_KEY=${QDRANT_KEY}
- DATA_DIR=/data
volumes:
- gptservice-volume:/data
Expand All @@ -32,12 +34,15 @@ services:
options:
max-size: "50m"
environment:
- GPT_SERVICE_ADDRESS=${GPT_SERVICE_ADDRESS}
- GPT_SERVICE_TOKEN=${GPT_SERVICE_TOKEN}
- OPENAI_API_TYPE=${OPENAI_API_TYPE}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION}
- AZURE_OPENAI_API_BASE=${AZURE_OPENAI_API_BASE}
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- QDRANT_URL=${QDRANT_URL}
- MSAL_TENANTID=${MSAL_TENANTID}
- MSAL_APPID=${MSAL_APPID}
- DATA_DIR=/data
volumes:
- gptstudio-volume:/data
Expand Down
29 changes: 29 additions & 0 deletions libs/session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import streamlit as st


class PageSessionState:
def __init__(self, prefix):
self._prefix = prefix

def initn_attr(self, key: str, default_value: object):
if not hasattr(self, key):
setattr(self, key, default_value)

def __getattr__(self, key):
if key == "_prefix":
return self.__dict__[key]
return st.session_state.get(f"{self._prefix}_{key}", None)

def __setattr__(self, key, value):
if key == "_prefix":
self.__dict__[key] = value
else:
st.session_state[f"{self._prefix}_{key}"] = value

def __delattr__(self, key):
if key == "_prefix":
raise AttributeError("Cannot delete _prefix attribute")
st.session_state.pop(f"{self._prefix}_{key}", None)

def newkey(self, key):
return f"{self._prefix}_{key}"
60 changes: 33 additions & 27 deletions pages/04_Speech_Transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,51 @@
import os
from dotenv import load_dotenv

from libs.session import PageSessionState

sys.path.append(os.path.abspath('..'))
load_dotenv()

page_state = PageSessionState("speech_transcribe")

with st.sidebar:
value = msal_auth()
if value is None:
st.stop()

st.sidebar.markdown("# 🎙️语音转录🎤")

st.markdown("# 🎙️语音转录🎤")
st.markdown("> 上传文本或者录制语音识别,然后合成新的语音")

if "audio_recode" not in st.session_state:
st.session_state.audio_recode = None

if "speech_recode" not in st.session_state:
st.session_state.speech_recode = None

if "audio_processing" not in st.session_state:
st.session_state.audio_processing = False
# 音频录制内容
page_state.initn_attr("audio_recode", None)
# 语音合成内容
page_state.initn_attr("speech_recode", None)
# 是否正在处理中
page_state.initn_attr("audio_processing", False)

# 用于存储临时文件
data_dir = os.getenv("DATA_DIR", "/tmp/data")
if not os.path.exists(data_dir):
os.makedirs(data_dir)

uploaded_file = st.file_uploader("上传文本文件", type=["txt", "md"])
content_box = st.empty()

uploaded_file = st.sidebar.file_uploader("上传文本文件", type=["txt", "md"])
if uploaded_file is not None:
stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
string_data = stringio.read()
st.session_state.audio_recode = string_data
page_state.audio_recode = string_data

if st.sidebar.button("录制音频"):
page_state.audio_recode = None
page_state.audio_processing = False
page_state.speech_recode = None
content_box.empty()
st.rerun()

if st.session_state.audio_recode is None:
if page_state.audio_recode is None:
with st.spinner('正在识别语音...'):
wav_audio_recode = audio_recorder("点击录音", icon_size="2x", pause_threshold=3.0)
if wav_audio_recode is not None:
Expand All @@ -56,12 +69,11 @@
response_format="json",
file=open(filename, "rb"),
)
st.session_state.audio_recode = transcript.text
page_state.audio_recode = transcript.text
st.rerun()

if st.session_state.audio_recode is not None:
st.markdown("### 🎤语音合成")
st.markdown(st.session_state.audio_recode)
if page_state.audio_recode is not None:
content_box.markdown(page_state.audio_recode)
sound = st.selectbox("选择音色", ["alloy", "echo", "fable", "onyx", "nova", "shimmer"])
c1, c2, c3 = st.columns(3)
if c1.button("合成语音"):
Expand All @@ -71,26 +83,20 @@
response = client.audio.speech.create(
model="tts-1",
voice=sound,
input=st.session_state.audio_recode
input=page_state.audio_recode
)
st.session_state.speech_recode = response.read()
page_state.speech_recode = response.read()
st.write(f"🎧{sound}音色")
st.audio(st.session_state.speech_recode, format="audio/mp3")
st.audio(page_state.speech_recode, format="audio/mp3")
st.write(f"语音{sound}合成完成")
status.update(label="语音合成完成!", state="complete")

if c2.button("重新录制"):
st.session_state.audio_recode = None
st.session_state.audio_processing = False
st.session_state.speech_recode = None
st.rerun()

if st.session_state.speech_recode is not None:
if page_state.speech_recode is not None:
c3.download_button(
label="下载语音",
data=st.session_state.speech_recode,
data=page_state.speech_recode,
file_name='speech.mp3',
)
else:
if st.session_state.audio_processing:
if page_state.audio_processing:
st.write("还有任务在处理")
24 changes: 22 additions & 2 deletions pages/05_Video.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
from streamlit_webrtc import webrtc_streamer
import cv2
import numpy as np
import streamlit as st
from camera_input_live import camera_input_live

"# Streamlit camera input live Demo"
"## Try holding a qr code in front of your webcam"

image = camera_input_live(debounce=2000, width=360, height=240, show_controls=True)

webrtc_streamer(key="sample")
if image is not None:
st.image(image)
bytes_data = image.getvalue()
cv2_img = cv2.imdecode(np.frombuffer(bytes_data, np.uint8), cv2.IMREAD_COLOR)

detector = cv2.QRCodeDetector()

data, bbox, straight_qrcode = detector.detectAndDecode(cv2_img)

if data:
st.write("# Found QR code")
st.write(data)
with st.expander("Show details"):
st.write("BBox:", bbox)
st.write("Straight QR code:", straight_qrcode)
109 changes: 109 additions & 0 deletions pages/06_Image_Analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import streamlit as st
from libs.llms import openai_analyze_image, openai_streaming
from libs.msal import msal_auth
from libs.session import PageSessionState

st.set_page_config(page_title="视觉分析", page_icon="🔬")

page_state = PageSessionState("image_analysis")

with st.sidebar:
value = msal_auth()
if value is None:
st.stop()


# 用于存储对话记录
page_state.initn_attr("messages", [])

# 用于标记上一条用户消息是否已经处理
page_state.initn_attr("last_user_msg_processed", True)

# 用于存储图像分析结果
page_state.initn_attr("analysis_result", "")

page_state.initn_attr("input_type", "camera")

st.sidebar.markdown("# 🔬视觉分析")

st.title("🔬视觉分析")

# 图像分析提示输入
prompt = st.sidebar.text_area("图像分析提示", "识别分析图片内容", height=40)


def clear_result():
page_state.analysis_result = ""
page_state.last_user_msg_processed = True
page_state.messages = []
if page_state.input_type == "camera" and page_state.camera_image is not None:
with st.spinner("分析中..."):
page_state.analysis_result = openai_analyze_image(prompt, page_state.camera_image)
page_state.messages.append({"role": "assistant", "content": page_state.analysis_result})


# 摄像头输入获取图片
if st.sidebar.selectbox("选择图片输入方式", ["摄像头", "上传图片"]) == "摄像头":
page_state.input_type = "camera"
image = st.camera_input("点击按钮截图", on_change=clear_result, key="image_analysis_camera_image")
else:
page_state.input_type = "upload"
image = st.sidebar.file_uploader("上传图片", type=["png", "jpg", "jpeg"],
on_change=clear_result, key="image_analysis_camera_image")

if page_state.camera_image is not None:
if page_state.input_type == "upload":
st.image(page_state.camera_image, caption="上传的图片", use_column_width=True)
c1, c2 = st.columns(2)
if c1.button("分析图像"):
with st.spinner("分析中..."):
page_state.analysis_result = openai_analyze_image(prompt, page_state.camera_image)
page_state.messages.append({"role": "assistant", "content": page_state.analysis_result})

if c2.button("清除结果"):
clear_result()

# 设置对话记录
for msg in page_state.messages:
with st.chat_message(msg["role"]):
st.write(msg["content"])

# 输入用户消息
if uprompt := st.chat_input("输入你的问题"):
# 用于标记用户消息还没有处理
page_state.last_user_msg_processed = False
page_state.messages.append({"role": "user", "content": uprompt})
with st.chat_message("user"):
st.write(uprompt)

# 用户输入响应,如果上一条消息不是助手的消息,且上一条用户消息还没有处理完毕
if ((page_state.messages
and page_state.messages[-1]["role"] != "assistant"
and not page_state.last_user_msg_processed)
and page_state.analysis_result not in [""]):
# 处理响应
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
sysmsg = f""""
以下是来自一图片识别获取的内容结果:
'''
{page_state.analysis_result}
'''
我们将围绕这个内容进行深入讨论。
"""
response = openai_streaming(sysmsg, page_state.messages[-10:])
# 流式输出
placeholder = st.empty()
full_response = ''
for item in response:
text = item.content
if text is not None:
full_response += text
placeholder.markdown(full_response)
placeholder.markdown(full_response)

# 用于标记上一条用户消息已经处理完毕
page_state.last_user_msg_processed = True
# 追加对话记录
message = {"role": "assistant", "content": full_response}
page_state.messages.append(message)
Loading

0 comments on commit c643be2

Please sign in to comment.