From af48897db70d5e4b19b81c0ad3eda0c4e32277bf Mon Sep 17 00:00:00 2001 From: Jett Wang Date: Thu, 7 Dec 2023 22:02:12 +0800 Subject: [PATCH] clear code --- .../workflows/docker-gptstudio-publish.yml | 46 -------- Dockerfile.gptstudio | 41 ------- GPTStudio.py | 50 -------- Makefile | 5 - components/__init__.py | 0 .../streamlit_tesseract_scanner/__init__.py | 79 ------------- .../frontend/index.html | 19 --- .../frontend/main.js | 84 -------------- .../frontend/streamlit-component-lib.js | 35 ------ .../frontend/test.js | 1 - config.toml | 3 - libs/__init__.py | 17 --- libs/knowledge.py | 37 ------ libs/llms.py | 52 --------- libs/msal.py | 29 ----- libs/prompts.py | 32 ----- libs/session.py | 29 ----- pages/02_Knowledge_Search.py | 57 --------- pages/03_Ta365_Chatbot.py | 102 ---------------- pages/04_Speech_Transcribe.py | 102 ---------------- pages/05_Video.py | 25 ---- pages/06_Image_Analysis.py | 109 ------------------ pages/07_Audio.py | 6 - requirements-gptstudio.txt | 19 --- 24 files changed, 979 deletions(-) delete mode 100644 .github/workflows/docker-gptstudio-publish.yml delete mode 100644 Dockerfile.gptstudio delete mode 100644 GPTStudio.py delete mode 100644 components/__init__.py delete mode 100644 components/streamlit_tesseract_scanner/__init__.py delete mode 100644 components/streamlit_tesseract_scanner/frontend/index.html delete mode 100644 components/streamlit_tesseract_scanner/frontend/main.js delete mode 100644 components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js delete mode 100644 components/streamlit_tesseract_scanner/frontend/test.js delete mode 100644 config.toml delete mode 100644 libs/__init__.py delete mode 100644 libs/knowledge.py delete mode 100644 libs/llms.py delete mode 100644 libs/msal.py delete mode 100644 libs/prompts.py delete mode 100644 libs/session.py delete mode 100644 pages/02_Knowledge_Search.py delete mode 100644 pages/03_Ta365_Chatbot.py delete mode 100644 pages/04_Speech_Transcribe.py delete mode 100644 pages/05_Video.py delete mode 100644 pages/06_Image_Analysis.py delete mode 100644 pages/07_Audio.py delete mode 100644 requirements-gptstudio.txt diff --git a/.github/workflows/docker-gptstudio-publish.yml b/.github/workflows/docker-gptstudio-publish.yml deleted file mode 100644 index eaa042a..0000000 --- a/.github/workflows/docker-gptstudio-publish.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: GPTStudio Build and Publish - -on: - # run it on push to the default repository branch - push: - branches: [main] - # run it during pull request - pull_request: - -jobs: - # define job to build and publish docker image - build-and-push-docker-image: - name: Build Docker image and push to repositories - # run only when code is compiling and tests are passing - runs-on: ubuntu-latest - - # steps to perform in job - steps: - - name: Checkout code - uses: actions/checkout@v3 - - # setup Docker buld action - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build image and push to Docker Hub and GitHub Container Registry - uses: docker/build-push-action@v2 - with: - # 指向带有 Dockerfile 的源代码所在位置的相对路径 - context: ./ - file: ./Dockerfile.gptstudio - # Note: tags has to be all lower-case - tags: | - talkincode/gptstudio:latest - # build on feature branches, push only on main branch - push: ${{ github.ref == 'refs/heads/main' }} - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/Dockerfile.gptstudio b/Dockerfile.gptstudio deleted file mode 100644 index 1b1df84..0000000 --- a/Dockerfile.gptstudio +++ /dev/null @@ -1,41 +0,0 @@ -# 使用 Mambaforge 基础镜像 -FROM condaforge/mambaforge:latest - -# 设置非交互式前端,避免 apt-get 交互式提示 -ENV DEBIAN_FRONTEND=noninteractive - -# 设置时区 -RUN echo "Asia/Shanghai" > /etc/timezone && \ - ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \ - apt-get update && \ - apt-get install -y tzdata && \ - dpkg-reconfigure --frontend noninteractive tzdata - -# 安装 Tesseract-OCR、Graphviz、字体以及 FFMPEG -RUN apt-get update && \ - apt-get install -y tesseract-ocr tesseract-ocr-chi-sim graphviz fonts-wqy-microhei fonts-noto ffmpeg && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# 设置工作目录 -WORKDIR /app - -# 复制项目文件 -COPY ./GPTStudio.py ./GPTStudio.py -COPY ./pages ./pages -COPY ./libs ./libs -COPY ./config.toml ./.streamlit/config.toml -COPY ./components ./components -COPY ./requirements-gptstudio.txt ./requirements.txt - -# 安装项目依赖以及 OpenCV -RUN pip install --no-cache-dir -r requirements.txt - -# 暴露 Streamlit 默认端口 -EXPOSE 8501 - -# 环境变量设置为非缓冲模式,以便实时输出 -ENV PYTHONUNBUFFERED=1 - -# 设置启动命令 -CMD ["streamlit","run", "GPTStudio.py", "--server.port=8501"] diff --git a/GPTStudio.py b/GPTStudio.py deleted file mode 100644 index d40d9cc..0000000 --- a/GPTStudio.py +++ /dev/null @@ -1,50 +0,0 @@ -import streamlit as st -from libs.msal import msal_auth - -msal_auth() - -def sidebar(): - st.sidebar.markdown(""" -# 🦜GPTStudio -- [GPTStudio Github](https://github.com/terateams/GPTService) -- [Streamlit Website](https://streamlit.io) - """) - if st.sidebar.button('登出'): - st.session_state['authenticated'] = False - st.rerun() - - -def show_page(): - sidebar() - st.title("🦜GPTStudio") - st.markdown(""" - GPTStudio 是一个基于 GPT (Generative Pre-trained Transformer) 的工具库, - 旨在为开发者和数据科学家提供强大且易于使用的 GPT 功能。 - 本工具库结合了知识库管理、GPT 能力,以及一个基于 AI 的工具集合, - 使其成为任何涉及 AI 和大模型的项目的理想选择。 - - ## 主要特性 - - ### 知识库检索: - - 提供高效的检索工具,帮助用户快速找到知识库中的相关信息。 - - ### GPT 能力测试 - - **模型能力测试**:允许用户测试GPT模型在知识库辅助下的性能和能力。 - - **实时反馈**:提供实时反馈,帮助用户了解模型的响应和准确性。 - - ### AI 工具集合 - - **广泛的 AI 工具**:包括但不限于文本生成、语言理解、数据分析等多种 AI 相关工具。 - - **大模型支持**:支持与其他大型 AI 模型集成,扩展应用的能力和范围。 - - - """) - - -def main(): - """主应用""" - show_page() - - -if __name__ == "__main__": - main() diff --git a/Makefile b/Makefile index f7cc2d5..a7c4817 100644 --- a/Makefile +++ b/Makefile @@ -8,10 +8,5 @@ arm64: talkincode/gptservice:latest-arm64 . docker push talkincode/gptservice:latest-arm64 -pubstudio: - docker buildx build -f Dockerfile.gptstudio --build-arg GoArch="amd64" --platform=linux/amd64 -t \ - talkincode/gptstudio:latest . - docker push talkincode/gptstudio:latest - .PHONY: clean build diff --git a/components/__init__.py b/components/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/components/streamlit_tesseract_scanner/__init__.py b/components/streamlit_tesseract_scanner/__init__.py deleted file mode 100644 index d0509fe..0000000 --- a/components/streamlit_tesseract_scanner/__init__.py +++ /dev/null @@ -1,79 +0,0 @@ -import base64 -from io import BytesIO -from pathlib import Path -from typing import Optional -import cv2 -import numpy as np -import pytesseract -from pytesseract import Output - -import streamlit as st -import streamlit.components.v1 as components - -# Tell streamlit that there is a component called camera_input_live, -# and that the code to display that component is in the "frontend" folder -frontend_dir = (Path(__file__).parent / "frontend").absolute() -_component_func = components.declare_component( - "tesseract_scanner", path=str(frontend_dir) -) - - -def tesseract_scanner(showimg: bool =False, - lang: str = 'eng', - blacklist: str = None, - whitelist: str = None, - psm: str = '3', - hrate: float=0.2, - key: Optional[str] = None - ) -> Optional[BytesIO]: - """ - Add a descriptive docstring - """ - b64_data: Optional[str] = _component_func(hrate=hrate, key=key) - - if b64_data is None: - return None - - raw_data = b64_data.split(",")[1] # Strip the data: type prefix - - component_value = BytesIO(base64.b64decode(raw_data)) - - # return component_value - # image = cv2.imdecode(np.frombuffer(component_value, np.uint8), cv2.IMREAD_COLOR) - - image = base64.b64decode(raw_data) - image = np.fromstring(image, dtype=np.uint8) - image = cv2.imdecode(image, cv2.IMREAD_COLOR) - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] - - if showimg: - st.image(image) - - # blacklist = '@*|©_Ⓡ®¢§š' - if blacklist: - custom_config = f'''--oem 3 --psm 11''' - else: - custom_config = f'''--oem 3 --psm 3''' - - text = pytesseract.image_to_string(image, lang=lang, config=custom_config) - # text = text.split('\n') - # while("" in text): text.remove("") - # while(" " in text): text.remove(" ") - # text.remove("\x0c") - - return text - - -def main(): - st.write("## Example") - - blacklist='@*|©_Ⓡ®¢§š' - data = tesseract_scanner(showimg=False, lang='vie+eng', - blacklist=blacklist, psm=3) - - if data is not None: - st.write(data) - -if __name__ == "__main__": - main() diff --git a/components/streamlit_tesseract_scanner/frontend/index.html b/components/streamlit_tesseract_scanner/frontend/index.html deleted file mode 100644 index 6215874..0000000 --- a/components/streamlit_tesseract_scanner/frontend/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - streamlit-camera-input-live - - - - - -
- - - -
- - \ No newline at end of file diff --git a/components/streamlit_tesseract_scanner/frontend/main.js b/components/streamlit_tesseract_scanner/frontend/main.js deleted file mode 100644 index 3c16ead..0000000 --- a/components/streamlit_tesseract_scanner/frontend/main.js +++ /dev/null @@ -1,84 +0,0 @@ -// The `Streamlit` object exists because our html file includes -// `streamlit-component-lib.js`. -// If you get an error about "Streamlit" not being defined, that -// means you're missing that file. - -function sendValue(value) { - Streamlit.setComponentValue(value) - } - - /** - * The component's render function. This will be called immediately after - * the component is initially loaded, and then again every time the - * component gets new data from Python. - */ - function onRender(event) { - // Only run the render code the first time the component is loaded. - if (!window.rendered) { - // You most likely want to get the data passed in like this - var {hrate} = event.detail.args; - - let video = document.getElementById('video'); - let videoheight = document.getElementById('videoheight'); - let canvas = document.getElementById('canvas'); - - video.setAttribute('width', '100%'); - - var device = "desktop"; - width = video.clientWidth; - height = 3 / 4 * width; - - const ua = navigator.userAgent; - if (/(tablet|ipad|playbook|silk)|(android(?!.*mobi))/i.test(ua)) { - device = "mobile"; // return "tablet"; - height = 16/9 * width; - } - if (/Mobile|iP(hone|od)|Android|BlackBerry|IEMobile|Kindle|Silk-Accelerated|(hpw|web)OS|Opera M(obi|ini)/.test(ua)) { - device = "mobile"; - height = 16/9 * width; - } - - Streamlit.setFrameHeight(hrate * height); - - const constraints = { facingMode: 'environment', advanced : [{focusMode: "continuous"}]}; - navigator.mediaDevices.getUserMedia({ video: constraints }) - .then(function(stream) { - video.srcObject = stream; - video.play(); - }) - .catch(function(err) { - console.log("An error occurred: " + err); - }); - - function beep() { - var snd = new Audio("data:audio/mpeg;base64,SUQzAwAAAAAAJlRQRTEAAAAcAAAAU291bmRKYXkuY29tIFNvdW5kIEVmZmVjdHMA//uSwAAAAAABLBQAAAL6QWkrN1ADDCBAACAQBAQECQD//2c7OmpoX/btmzIxt4R/7tmdKRqBVldEDICeA2szOT5E0ANLDoERvAwYDvXUwGPgUBhQVAiIAGFQb9toDBQAwSGwMLgECIPAUE/7v4YoAwyHQMSh8BgNl0r//5ofWmt///4swTaBg0CgSAgNoClQMSAwCgBAwiA//t9/GRFBlcXORYXAN8ZQggBgCACBH////4WYFjpmaRcLZcYggswUoBgEEgYPBf////////+VwfOBAwA7llUiIABQAAAgAAAEBgUARBzKEVmNPo26GUFGinz0RnZcAARtaVqlvTwGDx8BvHbgkEQMtcYIQgBjzkgaETYGFhuAEeRQ5m4ZcMEAsmKArYXE7qZFkXGOGkI5L4yqTIqRZNK45ociBkoKE6brSDUgMNi8mkJqHfAwaMBz11/t23+yEgox4FicKWLheWtJMWkAYIGpvvKwpgAQBJxVki+QFZOmhfJkQWCICACENuqdNB1Ba39WSI1wxkIsPSalHkFsZloPyHLBoEwssSa3Xf/7ksBnABz9nUn5qoACZTMov7FQAGsyLZRDwG7X+vJcfAjUzWVJMUz/DadX/DPVVPTwxgAAYggAShABbnnd5DQOPbj70zVpiaxayfheoOiDfgbrAYWXYHf90BlMZAYvDQUAYhKOIfxmTyebVJ71qsPaSBSPnR4NTPoOShOniyMyQEMSAScgXMjmnkkTJ71ob1q2rei1TUOy0Ss5w4QYIA0HbOG3Pf//3+j8i6LMiQ0CAFFXbU9Xf//+/mJHJOsyLwYXJ1mr16/1AJZ4ZlMAACAAADEFHpoLU2ytFsJ1sql3c1hG7r4LivRJ06AgAMwNgSDQUFJMGgAAOAXR8a+/8op8Ln/Z5+X/z+4/yc+vLe5V+QXz/52DO8uxhuYWBWA9SESgTZOJpmtaG2rbR2u29NqluNQrUjU4EoAfZG1SNfVX/928+3ccDzJEmgCCQc41Szj/V9S/r+o29Qn1qrhQY9Wg/rb/9fzku8RCoAABQAABKjQCK1VNcqoJHKmjjRanrzeKUiQHJyu63xb0wtDo+TRcFFkPAS68UpPuY2f+v/4/+///+5LAbIATtdU/7HqNwlm0aD2O0bDv9q3qS1nq12Z9yUSRRMBjQF4wHfMidi6aVlt2PVI7a6n11d7ashxpscCbQWBa2qP1tnq22q7VatDVj01aygAkcI0TXnHr1tX2/W+qrqmQ03rwUBNXnK7dvTeRh2VkYwAAKAAANmkNuUCQrNCopStlXHuCRUS6Xmb1FJdyyQKCxhEZZ3xiBiIE5ZJ45VZj9nK/39d7n/5////b0Sx1MW7zwd/89STW8J+EAoCwJcYM2OAvmjE5VzayGr+nvpash5arY4EJIBQOJrNaZL1tUtS9v9uqd08Zl2RSIaASHQ402MXko1etvr+632qPbKLI3F1YDQRecybarX+3qq+o+upVkRCAAAgAAAZGbDPFHmW35hRX4JfLKULFfuWuey1yVKB0FwsZRmlgZgIFCHdUjlw/BVq9h3Cxnzv4Y5659JYr7ortvLj4fn/eR6xq5K3oC4vgc9EKDIAQdSBMspPTXT3+m/tOp1oR0qQtBCwCiw3RPTpb+qvtV6mbzJqGMtZSBTAMIhsaBxUyNXV0GV0l//uSwJkAFGnXPex2rcKXuuf9jtG4L9f0z2nQFK1JqQAUDM681f7/Zf1e82WAioiGUwAAMAAAKBrafL7Ku+qidGFD4nVyacggTALkCEoYIANAGBgXCWBiVFyBp/PgBhGCEAMFAMVk+dH2TBoYrm9BHTe8nCjIANs3I8ixWIx9JAjDVNA6IXAeEUDDEBoBQCAuBTqPtesy39Nt61bVKrZRgnRMDwIQGA4EBFC0aIHUG/9/1P/pUBjTdzhgOgBwDBF1qQrb1Nv/v+tfWok07GBcC4En3VljsdIclUMYgIgAAAAAAAAAAAASAeJK1eXElURk3DcGCI9jsylQ8LhANGAxQ48DSKDgORA0gBiAYAwXjYCQG0TUCwHBzEUHUy2WsrkHMi4kpqDJuxmVE5bNC+GOAYPAailFSeFzgYZQCCf1rIiJtAwuASGAkyNqtKt9Zmmo0NE1npbEqCAAZga6aaQ5YDQMiJm+VzQqiugHAgLRxk7b6x6FDBZX75ZUM+BYBydBk7okIKFC+iTM9m1zp8pB4zfVX1uU2H2I2agtPQdZuiWhqv/7ksC6gBV1o0P1iwADaDro+x9gAEEdFvX///mZ/eT/6Dx8wAyYoAUAAAADAEAFAAAAAAPVTzyO6U2P8w8nM8P6bv+PBRjw07pfb/AciANoiwLBCM1LAysBAFCABgMGhMABswkysR0CIHAMAAMBiAo5JOE9XhikQ4LmBQgtKRMlgyJ74xQblBiMCQEEeCOyis1IcTRb/IEKMJ0FbiyRtCUCGmKBskYnP43B0i4xpidRkB2DlmSRsUTE8ZGTl3/juHAOeOaSQzA/ENHPGXE+oqeicUbFExb/5UKhAzhEiIEXIqViCEoQ0i46x2GSTooqeipSRii3//YliLmBPE4RcmSsQQjP//mQ0nLjQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+5LAvgAcldNN2bqASAAAJYOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//uSwP+AAAABLAAAAAAAACWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/7ksD/gAAAASwAAAAAAAAlgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+5LA/4AAAAEsAAAAAAAAJYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//uSwP+AAAABLAAAAAAAACWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/7ksD/gAAAASwAAAAAAAAlgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+5LA/4AAAAEsAAAAAAAAJYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//uSwP+AAAABLAAAAAAAACWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/7ksD/gAAAASwAAAAAAAAlgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAD/+5LA/4AAAAEsAAAAAAAAJYAAAAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAA"); - snd.play(); - } - - function hrateChange() { - hrate = videoheight.value / 100; - Streamlit.setFrameHeight(hrate * height); - } - - function takePicture() { - let context = canvas.getContext('2d'); - canvas.width = width; - canvas.height = hrate * height; - context.drawImage(video, 0, 0, width, height); - var data = canvas.toDataURL('image/png'); - beep(); - sendValue(data); - } - videoheight.addEventListener('change', hrateChange); - video.addEventListener('click', takePicture); - window.rendered = true - } - } - - // Render the component whenever python send a "render event" - Streamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender) - // Tell Streamlit that the component is ready to receive events - Streamlit.setComponentReady() - // Don't actually need to display anything, so set the height to 0 - Streamlit.setFrameHeight(0) - \ No newline at end of file diff --git a/components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js b/components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js deleted file mode 100644 index 6b2ec02..0000000 --- a/components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js +++ /dev/null @@ -1,35 +0,0 @@ - -// Borrowed minimalistic Streamlit API from Thiago -// https://discuss.streamlit.io/t/code-snippet-create-components-without-any-frontend-tooling-no-react-babel-webpack-etc/13064 -function sendMessageToStreamlitClient(type, data) { - console.log(type, data) - const outData = Object.assign({ - isStreamlitMessage: true, - type: type, - }, data); - window.parent.postMessage(outData, "*"); - } - - const Streamlit = { - setComponentReady: function() { - sendMessageToStreamlitClient("streamlit:componentReady", {apiVersion: 1}); - }, - setFrameHeight: function(height) { - sendMessageToStreamlitClient("streamlit:setFrameHeight", {height: height}); - }, - setComponentValue: function(value) { - sendMessageToStreamlitClient("streamlit:setComponentValue", {value: value}); - }, - RENDER_EVENT: "streamlit:render", - events: { - addEventListener: function(type, callback) { - window.addEventListener("message", function(event) { - if (event.data.type === type) { - event.detail = event.data - callback(event); - } - }); - } - } - } - \ No newline at end of file diff --git a/components/streamlit_tesseract_scanner/frontend/test.js b/components/streamlit_tesseract_scanner/frontend/test.js deleted file mode 100644 index 8b13789..0000000 --- a/components/streamlit_tesseract_scanner/frontend/test.js +++ /dev/null @@ -1 +0,0 @@ - diff --git a/config.toml b/config.toml deleted file mode 100644 index c6fb305..0000000 --- a/config.toml +++ /dev/null @@ -1,3 +0,0 @@ -[server] -enableCORS = false -enableXsrfProtection = false diff --git a/libs/__init__.py b/libs/__init__.py deleted file mode 100644 index 77d2940..0000000 --- a/libs/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -import streamlit as st - - -def set_session_value(name: str, value): - if name not in st.session_state: - st.session_state[name] = value - - -def get_session_value(name: str): - if name not in st.session_state: - return None - return st.session_state[name] - - -def rmv_session_value(name: str): - if name in st.session_state: - del st.session_state[name] diff --git a/libs/knowledge.py b/libs/knowledge.py deleted file mode 100644 index 6b157ce..0000000 --- a/libs/knowledge.py +++ /dev/null @@ -1,37 +0,0 @@ -import requests -import os - - -# 定义知识库名称和对应的集合名称 -knowledge_dictionary = { - "未选择": "", - "青少年编程": "codeboy", - "对数课堂": "logbot", -} - - -def search_knowledge(collection, query): - """Define a knowledge base retrieval function""" - gpt_address = os.getenv("GPT_SERVICE_ADDRESS") - api_token = os.getenv("GPT_SERVICE_TOKEN") - url = f"{gpt_address}/knowledge/search" - headers = { - "Accept": "application/json", - "Content-Type": "application/json", - "Authorization": f"Bearer {api_token}" - } - payload = { - "collection": collection, - "query": query - } - - response = requests.post(url, headers=headers, json=payload) - if response.status_code != 200: - return f"Error searching knowledge: {response.text}" - data = response.json() - - def fmt(v): - return f'**Score**: {v["score"]}\n\n{v["content"]}\n\n---\n\n' - - return "\n\n".join([fmt(v) for v in data["result"]["data"]]) - diff --git a/libs/llms.py b/libs/llms.py deleted file mode 100644 index 3e0b4c8..0000000 --- a/libs/llms.py +++ /dev/null @@ -1,52 +0,0 @@ -import base64 - -from openai import OpenAI -import os - - -def openai_streaming(sysmsg, historys: list): - """OpenAI Streaming API""" - client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) - messages = [ - {"role": "system", "content": sysmsg}, - ] - for history in historys: - messages.append(history) - print(messages) - completion = client.chat.completions.create( - model="gpt-4-1106-preview", - messages=messages, - stream=True - ) - for chunk in completion: - yield chunk.choices[0].delta - - -# 定义函数来调用 OpenAI GPT-4 Vision API -def openai_analyze_image(prompt_str, imagefs): - client = OpenAI() - # 将图像转换为 Base64 编码,这里需要一些额外的处理 - # 假设已经将图像转换为 base64_string - base64_string = base64.b64encode(imagefs.getvalue()).decode('utf-8') - - response = client.chat.completions.create( - model="gpt-4-vision-preview", - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt_str or "分析图片内容"}, - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64," + base64_string, - "detail": "high" - }, - }, - ], - } - ], - max_tokens=300, - ) - - return response.choices[0].message.content diff --git a/libs/msal.py b/libs/msal.py deleted file mode 100644 index dc98cd8..0000000 --- a/libs/msal.py +++ /dev/null @@ -1,29 +0,0 @@ -from msal_streamlit_authentication import msal_authentication -import os - -dev_token = {"msal_token":{}} - - -def msal_auth(): - if os.getenv("DEV_MODE") in ["true", "1", "on"]: - return dev_token - tenant_id = os.getenv("MSAL_TENANTID") - app_id = os.getenv("MSAL_APPID") - return msal_authentication( - auth={ - "clientId": app_id, - "authority": f"https://login.microsoftonline.com/{tenant_id}", - "redirectUri": "/", - "postLogoutRedirectUri": "/" - }, - cache={ - "cacheLocation": "sessionStorage", - "storeAuthStateInCookie": False - }, - login_button_text="Microsoft Account Login", - logout_button_text="Microsoft Account Logout", - login_request={ - "scopes": [f"{app_id}/.default"] - }, - key="msal_token" - ) diff --git a/libs/prompts.py b/libs/prompts.py deleted file mode 100644 index 5df37e6..0000000 --- a/libs/prompts.py +++ /dev/null @@ -1,32 +0,0 @@ - - -def get_ta365_sysmsg(kmsg: str) -> str: - sysmsg = f''' -你是一个通用型人工智能助手,可以帮助你解决各种问题。 - -// 指导原则 -- 你可以回答各种问题,包括生活,工作, 学习,娱乐等等 -- 总是基于事实回答问题, 不会编造不存在的事实 -- 对于不明确的问题, 会提示你提供更多的信息,引导用户 -- 避免使用复杂的语言, 保持简单, 便于理解 -- 遵守社会公德, 不会回答不当问题 -- 对于复杂的问题, 你会采取一步一步分析,逐步推理的方式回答问题 - -''' - kmsgs = f""" - -// 知识库使用指南 - -以下是从知识库检索的一些可能有关的信息, 你应该优先分析判断,和用户的输入相关度是否够高。 -如果不够高, 你可以选择不回答, 或者提示用户提供更多的信息。 -如果相关度够高, 你可以采用这些信息来辅助回答。 - -''' -{kmsg} -''' - -""" - if kmsg not in "": - sysmsg += kmsgs - - return sysmsg diff --git a/libs/session.py b/libs/session.py deleted file mode 100644 index 0eab863..0000000 --- a/libs/session.py +++ /dev/null @@ -1,29 +0,0 @@ -import streamlit as st - - -class PageSessionState: - def __init__(self, prefix): - self._prefix = prefix - - def initn_attr(self, key: str, default_value: object): - if not hasattr(self, key): - setattr(self, key, default_value) - - def __getattr__(self, key): - if key == "_prefix": - return self.__dict__[key] - return st.session_state.get(f"{self._prefix}_{key}", None) - - def __setattr__(self, key, value): - if key == "_prefix": - self.__dict__[key] = value - else: - st.session_state[f"{self._prefix}_{key}"] = value - - def __delattr__(self, key): - if key == "_prefix": - raise AttributeError("Cannot delete _prefix attribute") - st.session_state.pop(f"{self._prefix}_{key}", None) - - def newkey(self, key): - return f"{self._prefix}_{key}" diff --git a/pages/02_Knowledge_Search.py b/pages/02_Knowledge_Search.py deleted file mode 100644 index 8cfcd1c..0000000 --- a/pages/02_Knowledge_Search.py +++ /dev/null @@ -1,57 +0,0 @@ -import streamlit as st -import os -import sys -from dotenv import load_dotenv - -sys.path.append(os.path.abspath('..')) -load_dotenv() -from libs.knowledge import search_knowledge, knowledge_dictionary -from libs.msal import msal_auth - -# 通过 msal_auth() 函数验证用户身份 -with st.sidebar: - value = msal_auth() - if value is None: - st.stop() - -st.sidebar.markdown("# 知识库搜索") - -st.title("知识库搜索") -st.markdown("> 对已经创建的知识库进行检索, 检索结果包含了主题相关内容以及匹配度。") -st.divider() - -if "knowledge_messages" not in st.session_state.keys(): - st.session_state.knowledge_messages = [{"role": "assistant", "content": "欢迎使用知识库检索, 请输入主题"}] - -collection = st.sidebar.selectbox("选择知识库", knowledge_dictionary.keys()) -collection_value = knowledge_dictionary[collection] - -for knowledge_messages in st.session_state.knowledge_messages: - with st.chat_message(knowledge_messages["role"]): - st.write(knowledge_messages["content"]) - - -def clear_chat_history(): - st.session_state.knowledge_messages = [{"role": "assistant", "content": "欢迎使用知识库检索,请输入主题"}] - - -st.sidebar.button('清除历史', on_click=clear_chat_history) - -if collection_value == "": - st.warning("请选择知识库") - st.stop() - -if prompt := st.chat_input("输入检索主题"): - st.session_state.knowledge_messages.append({"role": "user", "content": prompt}) - with st.chat_message("user"): - st.write(prompt) - -if st.session_state.knowledge_messages[-1]["role"] != "assistant": - with st.chat_message("assistant"): - with st.spinner("Thinking..."): - response = search_knowledge(collection_value, prompt) - if response is None: - response = "没有找到相关知识" - st.markdown(response) - message = {"role": "assistant", "content": response} - st.session_state.knowledge_messages.append(message) diff --git a/pages/03_Ta365_Chatbot.py b/pages/03_Ta365_Chatbot.py deleted file mode 100644 index 557b185..0000000 --- a/pages/03_Ta365_Chatbot.py +++ /dev/null @@ -1,102 +0,0 @@ -import streamlit as st -import sys -import os -from dotenv import load_dotenv -from libs.knowledge import knowledge_dictionary, search_knowledge -from libs.prompts import get_ta365_sysmsg -from libs.msal import msal_auth -from libs.llms import openai_streaming - -sys.path.append(os.path.abspath('..')) -load_dotenv() - -with st.sidebar: - value = msal_auth() - if value is None: - st.stop() - -st.sidebar.markdown("# 💡Ta365 AI 助手") - -st.title("💡Ta365 AI 助手") -st.markdown("> 一个通用型人工智能助手,可以帮助你解决各种问题, 左侧栏可以选择知识库。") - -if "ta365_messages" not in st.session_state.keys(): - st.session_state.ta365_messages = [{"role": "assistant", "content": "我是 Ta365 AI 助手,欢迎提问"}] - -if "ta365_last_user_msg_processed" not in st.session_state: - st.session_state.ta365_last_user_msg_processed = True - -if "ta365_streaming_end" not in st.session_state: - st.session_state.ta365_streaming_end = True - - -def stop_streaming(): - """当停止按钮被点击时执行,用于修改处理标志""" - st.session_state.ta365_streaming_end = True - st.session_state.ta365_last_user_msg_processed = True - - -collection = st.sidebar.selectbox("选择知识库", knowledge_dictionary.keys()) -collection_value = knowledge_dictionary[collection] - -for ta365_messages in st.session_state.ta365_messages: - with st.chat_message(ta365_messages["role"]): - st.write(ta365_messages["content"]) - - -def clear_chat_history(): - st.session_state.ta365_messages = [{"role": "assistant", "content": "我是 Ta365 AI 助手,欢迎提问"}] - - -st.sidebar.button('清除对话历史', on_click=clear_chat_history) - -# 用户输入 -if prompt := st.chat_input("输入你的问题"): - # 用于标记用户消息还没有处理 - st.session_state.ta365_streaming_end = False - st.session_state.ta365_last_user_msg_processed = False - st.session_state.ta365_messages.append({"role": "user", "content": prompt}) - with st.chat_message("user"): - st.write(prompt) - -stop_action = st.sidebar.empty() - -if not st.session_state.ta365_streaming_end: - stop_action.button('停止输出', on_click=stop_streaming, help="点击此按钮停止流式输出") - - -# 用户输入响应,如果上一条消息不是助手的消息,且上一条用户消息还没有处理完毕 -if (st.session_state.ta365_messages[-1]["role"] != "assistant" - and not st.session_state.ta365_last_user_msg_processed): - with st.chat_message("assistant"): - with st.spinner("Thinking..."): - # 检索知识库 - kmsg = "" - if collection_value not in "": - kmsg = search_knowledge(collection_value, prompt) - if kmsg != "": - st.expander("📚 知识库检索结果", expanded=False).markdown(kmsg) - sysmsg = get_ta365_sysmsg(kmsg) - response = openai_streaming(sysmsg, st.session_state.ta365_messages[-10:]) - # 流式输出 - placeholder = st.empty() - full_response = '' - for item in response: - # 如果用户手动停止了流式输出,就退出循环 - if st.session_state.ta365_streaming_end: - break - text = item.content - if text is not None: - full_response += text - placeholder.markdown(full_response) - placeholder.markdown(full_response) - - - stop_action.empty() - # 用于标记流式输出已经结束 - st.session_state.ta365_streaming_end = True - # 用于标记上一条用户消息已经处理完毕 - st.session_state.ta365_last_user_msg_processed = True - message = {"role": "assistant", "content": full_response} - st.session_state.ta365_messages.append(message) - diff --git a/pages/04_Speech_Transcribe.py b/pages/04_Speech_Transcribe.py deleted file mode 100644 index 0bcb774..0000000 --- a/pages/04_Speech_Transcribe.py +++ /dev/null @@ -1,102 +0,0 @@ -import uuid - -import streamlit as st -from st_audiorec import st_audiorec -from audio_recorder_streamlit import audio_recorder -from openai import OpenAI -from pydub import AudioSegment -from libs.msal import msal_auth -import io -import sys -import os -from dotenv import load_dotenv - -from libs.session import PageSessionState - -sys.path.append(os.path.abspath('..')) -load_dotenv() - -page_state = PageSessionState("speech_transcribe") - -with st.sidebar: - value = msal_auth() - if value is None: - st.stop() - -st.sidebar.markdown("# 🎙️语音转录🎤") - -st.markdown("# 🎙️语音转录🎤") -st.markdown("> 上传文本或者录制语音识别,然后合成新的语音") - -# 音频录制内容 -page_state.initn_attr("audio_recode", None) -# 语音合成内容 -page_state.initn_attr("speech_recode", None) -# 是否正在处理中 -page_state.initn_attr("audio_processing", False) - -# 用于存储临时文件 -data_dir = os.getenv("DATA_DIR", "/tmp/data") -if not os.path.exists(data_dir): - os.makedirs(data_dir) - -content_box = st.empty() - -uploaded_file = st.sidebar.file_uploader("上传文本文件", type=["txt", "md"]) -if uploaded_file is not None: - stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8")) - string_data = stringio.read() - page_state.audio_recode = string_data - -if st.sidebar.button("录制音频"): - page_state.audio_recode = None - page_state.audio_processing = False - page_state.speech_recode = None - content_box.empty() - st.rerun() - -if page_state.audio_recode is None: - with st.spinner('正在识别语音...'): - wav_audio_recode = audio_recorder("点击录音", icon_size="2x", pause_threshold=3.0) - if wav_audio_recode is not None: - st.audio(wav_audio_recode, format="audio/wav") - audio_segment = AudioSegment.from_wav(io.BytesIO(wav_audio_recode)) - filename = os.path.join(data_dir, f"{uuid.uuid4()}.audio.wav") - audio_segment.export(filename, format="wav") - client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) - transcript = client.audio.transcriptions.create( - model="whisper-1", - response_format="json", - file=open(filename, "rb"), - ) - page_state.audio_recode = transcript.text - st.rerun() - -if page_state.audio_recode is not None: - content_box.markdown(page_state.audio_recode) - sound = st.selectbox("选择音色", ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]) - c1, c2, c3 = st.columns(3) - if c1.button("合成语音"): - client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) - speech_file_path = os.path.join(data_dir, f"{uuid.uuid4()}.speech.mp3") - with st.status("正在合成语音", expanded=True) as status: - response = client.audio.speech.create( - model="tts-1", - voice=sound, - input=page_state.audio_recode - ) - page_state.speech_recode = response.read() - st.write(f"🎧{sound}音色") - st.audio(page_state.speech_recode, format="audio/mp3") - st.write(f"语音{sound}合成完成") - status.update(label="语音合成完成!", state="complete") - - if page_state.speech_recode is not None: - c3.download_button( - label="下载语音", - data=page_state.speech_recode, - file_name='speech.mp3', - ) -else: - if page_state.audio_processing: - st.write("还有任务在处理") diff --git a/pages/05_Video.py b/pages/05_Video.py deleted file mode 100644 index 97958cb..0000000 --- a/pages/05_Video.py +++ /dev/null @@ -1,25 +0,0 @@ -import cv2 -import numpy as np -import streamlit as st -from camera_input_live import camera_input_live - -"# Streamlit camera input live Demo" -"## Try holding a qr code in front of your webcam" - -image = camera_input_live(debounce=2000, width=360, height=240, show_controls=True) - -if image is not None: - st.image(image) - bytes_data = image.getvalue() - cv2_img = cv2.imdecode(np.frombuffer(bytes_data, np.uint8), cv2.IMREAD_COLOR) - - detector = cv2.QRCodeDetector() - - data, bbox, straight_qrcode = detector.detectAndDecode(cv2_img) - - if data: - st.write("# Found QR code") - st.write(data) - with st.expander("Show details"): - st.write("BBox:", bbox) - st.write("Straight QR code:", straight_qrcode) diff --git a/pages/06_Image_Analysis.py b/pages/06_Image_Analysis.py deleted file mode 100644 index e84d66b..0000000 --- a/pages/06_Image_Analysis.py +++ /dev/null @@ -1,109 +0,0 @@ -import streamlit as st -from libs.llms import openai_analyze_image, openai_streaming -from libs.msal import msal_auth -from libs.session import PageSessionState - -st.set_page_config(page_title="视觉分析", page_icon="🔬") - -page_state = PageSessionState("image_analysis") - -with st.sidebar: - value = msal_auth() - if value is None: - st.stop() - - -# 用于存储对话记录 -page_state.initn_attr("messages", []) - -# 用于标记上一条用户消息是否已经处理 -page_state.initn_attr("last_user_msg_processed", True) - -# 用于存储图像分析结果 -page_state.initn_attr("analysis_result", "") - -page_state.initn_attr("input_type", "camera") - -st.sidebar.markdown("# 🔬视觉分析") - -st.title("🔬视觉分析") - -# 图像分析提示输入 -prompt = st.sidebar.text_area("图像分析提示", "识别分析图片内容", height=40) - - -def clear_result(): - page_state.analysis_result = "" - page_state.last_user_msg_processed = True - page_state.messages = [] - if page_state.input_type == "camera" and page_state.camera_image is not None: - with st.spinner("分析中..."): - page_state.analysis_result = openai_analyze_image(prompt, page_state.camera_image) - page_state.messages.append({"role": "assistant", "content": page_state.analysis_result}) - - -# 摄像头输入获取图片 -if st.sidebar.selectbox("选择图片输入方式", ["摄像头", "上传图片"]) == "摄像头": - page_state.input_type = "camera" - image = st.camera_input("点击按钮截图", on_change=clear_result, key="image_analysis_camera_image") -else: - page_state.input_type = "upload" - image = st.sidebar.file_uploader("上传图片", type=["png", "jpg", "jpeg"], - on_change=clear_result, key="image_analysis_camera_image") - -if page_state.camera_image is not None: - if page_state.input_type == "upload": - st.image(page_state.camera_image, caption="上传的图片", use_column_width=True) - c1, c2 = st.columns(2) - if c1.button("分析图像"): - with st.spinner("分析中..."): - page_state.analysis_result = openai_analyze_image(prompt, page_state.camera_image) - page_state.messages.append({"role": "assistant", "content": page_state.analysis_result}) - - if c2.button("清除结果"): - clear_result() - -# 设置对话记录 -for msg in page_state.messages: - with st.chat_message(msg["role"]): - st.write(msg["content"]) - -# 输入用户消息 -if uprompt := st.chat_input("输入你的问题"): - # 用于标记用户消息还没有处理 - page_state.last_user_msg_processed = False - page_state.messages.append({"role": "user", "content": uprompt}) - with st.chat_message("user"): - st.write(uprompt) - -# 用户输入响应,如果上一条消息不是助手的消息,且上一条用户消息还没有处理完毕 -if ((page_state.messages - and page_state.messages[-1]["role"] != "assistant" - and not page_state.last_user_msg_processed) - and page_state.analysis_result not in [""]): - # 处理响应 - with st.chat_message("assistant"): - with st.spinner("Thinking..."): - sysmsg = f"""" - 以下是来自一图片识别获取的内容结果: - ''' - {page_state.analysis_result} - ''' - 我们将围绕这个内容进行深入讨论。 - """ - response = openai_streaming(sysmsg, page_state.messages[-10:]) - # 流式输出 - placeholder = st.empty() - full_response = '' - for item in response: - text = item.content - if text is not None: - full_response += text - placeholder.markdown(full_response) - placeholder.markdown(full_response) - - # 用于标记上一条用户消息已经处理完毕 - page_state.last_user_msg_processed = True - # 追加对话记录 - message = {"role": "assistant", "content": full_response} - page_state.messages.append(message) diff --git a/pages/07_Audio.py b/pages/07_Audio.py deleted file mode 100644 index 44ac032..0000000 --- a/pages/07_Audio.py +++ /dev/null @@ -1,6 +0,0 @@ -import streamlit as st -from audio_recorder_streamlit import audio_recorder - -audio_bytes = audio_recorder("", icon_size="1x", pause_threshold=3.0) -if audio_bytes: - st.audio(audio_bytes, format="audio/wav") diff --git a/requirements-gptstudio.txt b/requirements-gptstudio.txt deleted file mode 100644 index af3d18a..0000000 --- a/requirements-gptstudio.txt +++ /dev/null @@ -1,19 +0,0 @@ -pydantic -python-dotenv -tiktoken -openai -PyMuPDF -langchain -qdrant-client -python-multipart -pytesseract -pillow -graphviz -jinja2 -streamlit -pydub -msal_streamlit_authentication -opencv-python-headless -streamlit_webrtc -streamlit-audiorec -audio-recorder-streamlit