diff --git a/components/__init__.py b/components/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/components/streamlit_tesseract_scanner/__init__.py b/components/streamlit_tesseract_scanner/__init__.py
new file mode 100644
index 0000000..d0509fe
--- /dev/null
+++ b/components/streamlit_tesseract_scanner/__init__.py
@@ -0,0 +1,79 @@
+import base64
+from io import BytesIO
+from pathlib import Path
+from typing import Optional
+import cv2
+import numpy as np
+import pytesseract
+from pytesseract import Output
+
+import streamlit as st
+import streamlit.components.v1 as components
+
+# Tell streamlit that there is a component called camera_input_live,
+# and that the code to display that component is in the "frontend" folder
+frontend_dir = (Path(__file__).parent / "frontend").absolute()
+_component_func = components.declare_component(
+ "tesseract_scanner", path=str(frontend_dir)
+)
+
+
+def tesseract_scanner(showimg: bool =False,
+ lang: str = 'eng',
+ blacklist: str = None,
+ whitelist: str = None,
+ psm: str = '3',
+ hrate: float=0.2,
+ key: Optional[str] = None
+ ) -> Optional[BytesIO]:
+ """
+ Add a descriptive docstring
+ """
+ b64_data: Optional[str] = _component_func(hrate=hrate, key=key)
+
+ if b64_data is None:
+ return None
+
+ raw_data = b64_data.split(",")[1] # Strip the data: type prefix
+
+ component_value = BytesIO(base64.b64decode(raw_data))
+
+ # return component_value
+ # image = cv2.imdecode(np.frombuffer(component_value, np.uint8), cv2.IMREAD_COLOR)
+
+ image = base64.b64decode(raw_data)
+ image = np.fromstring(image, dtype=np.uint8)
+ image = cv2.imdecode(image, cv2.IMREAD_COLOR)
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+
+ if showimg:
+ st.image(image)
+
+ # blacklist = '@*|©_Ⓡ®¢§š'
+ if blacklist:
+ custom_config = f'''--oem 3 --psm 11'''
+ else:
+ custom_config = f'''--oem 3 --psm 3'''
+
+ text = pytesseract.image_to_string(image, lang=lang, config=custom_config)
+ # text = text.split('\n')
+ # while("" in text): text.remove("")
+ # while(" " in text): text.remove(" ")
+ # text.remove("\x0c")
+
+ return text
+
+
+def main():
+ st.write("## Example")
+
+ blacklist='@*|©_Ⓡ®¢§š'
+ data = tesseract_scanner(showimg=False, lang='vie+eng',
+ blacklist=blacklist, psm=3)
+
+ if data is not None:
+ st.write(data)
+
+if __name__ == "__main__":
+ main()
diff --git a/components/streamlit_tesseract_scanner/frontend/index.html b/components/streamlit_tesseract_scanner/frontend/index.html
new file mode 100644
index 0000000..6215874
--- /dev/null
+++ b/components/streamlit_tesseract_scanner/frontend/index.html
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+ streamlit-camera-input-live
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/components/streamlit_tesseract_scanner/frontend/main.js b/components/streamlit_tesseract_scanner/frontend/main.js
new file mode 100644
index 0000000..3c16ead
--- /dev/null
+++ b/components/streamlit_tesseract_scanner/frontend/main.js
@@ -0,0 +1,84 @@
+// The `Streamlit` object exists because our html file includes
+// `streamlit-component-lib.js`.
+// If you get an error about "Streamlit" not being defined, that
+// means you're missing that file.
+
+function sendValue(value) {
+ Streamlit.setComponentValue(value)
+ }
+
+ /**
+ * The component's render function. This will be called immediately after
+ * the component is initially loaded, and then again every time the
+ * component gets new data from Python.
+ */
+ function onRender(event) {
+ // Only run the render code the first time the component is loaded.
+ if (!window.rendered) {
+ // You most likely want to get the data passed in like this
+ var {hrate} = event.detail.args;
+
+ let video = document.getElementById('video');
+ let videoheight = document.getElementById('videoheight');
+ let canvas = document.getElementById('canvas');
+
+ video.setAttribute('width', '100%');
+
+ var device = "desktop";
+ width = video.clientWidth;
+ height = 3 / 4 * width;
+
+ const ua = navigator.userAgent;
+ if (/(tablet|ipad|playbook|silk)|(android(?!.*mobi))/i.test(ua)) {
+ device = "mobile"; // return "tablet";
+ height = 16/9 * width;
+ }
+ if (/Mobile|iP(hone|od)|Android|BlackBerry|IEMobile|Kindle|Silk-Accelerated|(hpw|web)OS|Opera M(obi|ini)/.test(ua)) {
+ device = "mobile";
+ height = 16/9 * width;
+ }
+
+ Streamlit.setFrameHeight(hrate * height);
+
+ const constraints = { facingMode: 'environment', advanced : [{focusMode: "continuous"}]};
+ navigator.mediaDevices.getUserMedia({ video: constraints })
+ .then(function(stream) {
+ video.srcObject = stream;
+ video.play();
+ })
+ .catch(function(err) {
+ console.log("An error occurred: " + err);
+ });
+
+ function beep() {
+ var snd = new Audio("data:audio/mpeg;base64,SUQzAwAAAAAAJlRQRTEAAAAcAAAAU291bmRKYXkuY29tIFNvdW5kIEVmZmVjdHMA//uSwAAAAAABLBQAAAL6QWkrN1ADDCBAACAQBAQECQD//2c7OmpoX/btmzIxt4R/7tmdKRqBVldEDICeA2szOT5E0ANLDoERvAwYDvXUwGPgUBhQVAiIAGFQb9toDBQAwSGwMLgECIPAUE/7v4YoAwyHQMSh8BgNl0r//5ofWmt///4swTaBg0CgSAgNoClQMSAwCgBAwiA//t9/GRFBlcXORYXAN8ZQggBgCACBH////4WYFjpmaRcLZcYggswUoBgEEgYPBf////////+VwfOBAwA7llUiIABQAAAgAAAEBgUARBzKEVmNPo26GUFGinz0RnZcAARtaVqlvTwGDx8BvHbgkEQMtcYIQgBjzkgaETYGFhuAEeRQ5m4ZcMEAsmKArYXE7qZFkXGOGkI5L4yqTIqRZNK45ociBkoKE6brSDUgMNi8mkJqHfAwaMBz11/t23+yEgox4FicKWLheWtJMWkAYIGpvvKwpgAQBJxVki+QFZOmhfJkQWCICACENuqdNB1Ba39WSI1wxkIsPSalHkFsZloPyHLBoEwssSa3Xf/7ksBnABz9nUn5qoACZTMov7FQAGsyLZRDwG7X+vJcfAjUzWVJMUz/DadX/DPVVPTwxgAAYggAShABbnnd5DQOPbj70zVpiaxayfheoOiDfgbrAYWXYHf90BlMZAYvDQUAYhKOIfxmTyebVJ71qsPaSBSPnR4NTPoOShOniyMyQEMSAScgXMjmnkkTJ71ob1q2rei1TUOy0Ss5w4QYIA0HbOG3Pf//3+j8i6LMiQ0CAFFXbU9Xf//+/mJHJOsyLwYXJ1mr16/1AJZ4ZlMAACAAADEFHpoLU2ytFsJ1sql3c1hG7r4LivRJ06AgAMwNgSDQUFJMGgAAOAXR8a+/8op8Ln/Z5+X/z+4/yc+vLe5V+QXz/52DO8uxhuYWBWA9SESgTZOJpmtaG2rbR2u29NqluNQrUjU4EoAfZG1SNfVX/928+3ccDzJEmgCCQc41Szj/V9S/r+o29Qn1qrhQY9Wg/rb/9fzku8RCoAABQAABKjQCK1VNcqoJHKmjjRanrzeKUiQHJyu63xb0wtDo+TRcFFkPAS68UpPuY2f+v/4/+///+5LAbIATtdU/7HqNwlm0aD2O0bDv9q3qS1nq12Z9yUSRRMBjQF4wHfMidi6aVlt2PVI7a6n11d7ashxpscCbQWBa2qP1tnq22q7VatDVj01aygAkcI0TXnHr1tX2/W+qrqmQ03rwUBNXnK7dvTeRh2VkYwAAKAAANmkNuUCQrNCopStlXHuCRUS6Xmb1FJdyyQKCxhEZZ3xiBiIE5ZJ45VZj9nK/39d7n/5////b0Sx1MW7zwd/89STW8J+EAoCwJcYM2OAvmjE5VzayGr+nvpash5arY4EJIBQOJrNaZL1tUtS9v9uqd08Zl2RSIaASHQ402MXko1etvr+632qPbKLI3F1YDQRecybarX+3qq+o+upVkRCAAAgAAAZGbDPFHmW35hRX4JfLKULFfuWuey1yVKB0FwsZRmlgZgIFCHdUjlw/BVq9h3Cxnzv4Y5659JYr7ortvLj4fn/eR6xq5K3oC4vgc9EKDIAQdSBMspPTXT3+m/tOp1oR0qQtBCwCiw3RPTpb+qvtV6mbzJqGMtZSBTAMIhsaBxUyNXV0GV0l//uSwJkAFGnXPex2rcKXuuf9jtG4L9f0z2nQFK1JqQAUDM681f7/Zf1e82WAioiGUwAAMAAAKBrafL7Ku+qidGFD4nVyacggTALkCEoYIANAGBgXCWBiVFyBp/PgBhGCEAMFAMVk+dH2TBoYrm9BHTe8nCjIANs3I8ixWIx9JAjDVNA6IXAeEUDDEBoBQCAuBTqPtesy39Nt61bVKrZRgnRMDwIQGA4EBFC0aIHUG/9/1P/pUBjTdzhgOgBwDBF1qQrb1Nv/v+tfWok07GBcC4En3VljsdIclUMYgIgAAAAAAAAAAAASAeJK1eXElURk3DcGCI9jsylQ8LhANGAxQ48DSKDgORA0gBiAYAwXjYCQG0TUCwHBzEUHUy2WsrkHMi4kpqDJuxmVE5bNC+GOAYPAailFSeFzgYZQCCf1rIiJtAwuASGAkyNqtKt9Zmmo0NE1npbEqCAAZga6aaQ5YDQMiJm+VzQqiugHAgLRxk7b6x6FDBZX75ZUM+BYBydBk7okIKFC+iTM9m1zp8pB4zfVX1uU2H2I2agtPQdZuiWhqv/7ksC6gBV1o0P1iwADaDro+x9gAEEdFvX///mZ/eT/6Dx8wAyYoAUAAAADAEAFAAAAAAPVTzyO6U2P8w8nM8P6bv+PBRjw07pfb/AciANoiwLBCM1LAysBAFCABgMGhMABswkysR0CIHAMAAMBiAo5JOE9XhikQ4LmBQgtKRMlgyJ74xQblBiMCQEEeCOyis1IcTRb/IEKMJ0FbiyRtCUCGmKBskYnP43B0i4xpidRkB2DlmSRsUTE8ZGTl3/juHAOeOaSQzA/ENHPGXE+oqeicUbFExb/5UKhAzhEiIEXIqViCEoQ0i46x2GSTooqeipSRii3//YliLmBPE4RcmSsQQjP//mQ0nLjQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+5LAvgAcldNN2bqASAAAJYOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//uSwP+AAAABLAAAAAAAACWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/7ksD/gAAAASwAAAAAAAAlgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+5LA/4AAAAEsAAAAAAAAJYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//uSwP+AAAABLAAAAAAAACWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/7ksD/gAAAASwAAAAAAAAlgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/+5LA/4AAAAEsAAAAAAAAJYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//uSwP+AAAABLAAAAAAAACWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/7ksD/gAAAASwAAAAAAAAlgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAD/+5LA/4AAAAEsAAAAAAAAJYAAAAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAA");
+ snd.play();
+ }
+
+ function hrateChange() {
+ hrate = videoheight.value / 100;
+ Streamlit.setFrameHeight(hrate * height);
+ }
+
+ function takePicture() {
+ let context = canvas.getContext('2d');
+ canvas.width = width;
+ canvas.height = hrate * height;
+ context.drawImage(video, 0, 0, width, height);
+ var data = canvas.toDataURL('image/png');
+ beep();
+ sendValue(data);
+ }
+ videoheight.addEventListener('change', hrateChange);
+ video.addEventListener('click', takePicture);
+ window.rendered = true
+ }
+ }
+
+ // Render the component whenever python send a "render event"
+ Streamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender)
+ // Tell Streamlit that the component is ready to receive events
+ Streamlit.setComponentReady()
+ // Don't actually need to display anything, so set the height to 0
+ Streamlit.setFrameHeight(0)
+
\ No newline at end of file
diff --git a/components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js b/components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js
new file mode 100644
index 0000000..6b2ec02
--- /dev/null
+++ b/components/streamlit_tesseract_scanner/frontend/streamlit-component-lib.js
@@ -0,0 +1,35 @@
+
+// Borrowed minimalistic Streamlit API from Thiago
+// https://discuss.streamlit.io/t/code-snippet-create-components-without-any-frontend-tooling-no-react-babel-webpack-etc/13064
+function sendMessageToStreamlitClient(type, data) {
+ console.log(type, data)
+ const outData = Object.assign({
+ isStreamlitMessage: true,
+ type: type,
+ }, data);
+ window.parent.postMessage(outData, "*");
+ }
+
+ const Streamlit = {
+ setComponentReady: function() {
+ sendMessageToStreamlitClient("streamlit:componentReady", {apiVersion: 1});
+ },
+ setFrameHeight: function(height) {
+ sendMessageToStreamlitClient("streamlit:setFrameHeight", {height: height});
+ },
+ setComponentValue: function(value) {
+ sendMessageToStreamlitClient("streamlit:setComponentValue", {value: value});
+ },
+ RENDER_EVENT: "streamlit:render",
+ events: {
+ addEventListener: function(type, callback) {
+ window.addEventListener("message", function(event) {
+ if (event.data.type === type) {
+ event.detail = event.data
+ callback(event);
+ }
+ });
+ }
+ }
+ }
+
\ No newline at end of file
diff --git a/components/streamlit_tesseract_scanner/frontend/test.js b/components/streamlit_tesseract_scanner/frontend/test.js
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/components/streamlit_tesseract_scanner/frontend/test.js
@@ -0,0 +1 @@
+
diff --git a/libs/http.py b/libs/http.py
new file mode 100644
index 0000000..881c188
--- /dev/null
+++ b/libs/http.py
@@ -0,0 +1,28 @@
+import requests
+import os
+
+
+def search_knowledge(collection, query):
+ gpt_address = os.getenv("GPT_SERVICE_ADDRESS")
+ api_token = os.getenv("GPT_SERVICE_TOKEN")
+ url = f"{gpt_address}/knowledge/search"
+ headers = {
+ "Accept": "application/json",
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {api_token}"
+ }
+ payload = {
+ "collection": collection,
+ "query": query
+ }
+
+ response = requests.post(url, headers=headers, json=payload)
+ if response.status_code != 200:
+ return f"Error searching knowledge: {response.text}"
+ data = response.json()
+
+ def fmt(v):
+ return f'**Score**: {v["score"]}\n\n{v["content"]}\n\n---\n\n'
+
+ return "\n\n".join([fmt(v) for v in data["result"]["data"]])
+
diff --git a/libs/msal.py b/libs/msal.py
new file mode 100644
index 0000000..ab5c405
--- /dev/null
+++ b/libs/msal.py
@@ -0,0 +1,24 @@
+from msal_streamlit_authentication import msal_authentication
+import os
+
+
+def msal_auth():
+ tenant_id = os.getenv("MSAL_TENANTID")
+ app_id = os.getenv("MSAL_APPID")
+ return msal_authentication(
+ auth={
+ "clientId": app_id,
+ "authority": f"https://login.microsoftonline.com/{tenant_id}",
+ "redirectUri": "/",
+ "postLogoutRedirectUri": "/"
+ },
+ cache={
+ "cacheLocation": "sessionStorage",
+ "storeAuthStateInCookie": False
+ },
+ login_button_text="Microsoft Account Login",
+ login_request={
+ "scopes": [f"{app_id}/.default"]
+ },
+ key="msal_token"
+ )
diff --git a/pages/02_Knowledge_Search.py b/pages/02_Knowledge_Search.py
index 0dc0414..d4d48b8 100644
--- a/pages/02_Knowledge_Search.py
+++ b/pages/02_Knowledge_Search.py
@@ -1,13 +1,56 @@
import streamlit as st
+import os
+import sys
+from dotenv import load_dotenv
-# 在其他页面
-if 'authenticated' not in st.session_state or not st.session_state['authenticated']:
- st.error("请先登录。")
- st.stop() # 阻止未认证的用户访问页面内容
+sys.path.append(os.path.abspath('..'))
+load_dotenv()
+from libs.http import search_knowledge
+from libs.msal import msal_auth
+
+if os.getenv("DEV_MODE") not in ["true", "1", "on"]:
+ value = msal_auth()
+ if value is None:
+ st.stop()
+
+knowledges = {
+ "青少年编程": "codeboy",
+ "对数课堂": "logbot",
+}
st.sidebar.markdown("# 知识库搜索")
st.title("知识库搜索")
-st.subheader("搜索知识库内容")
st.divider()
+if "messages" not in st.session_state.keys():
+ st.session_state.messages = [{"role": "assistant", "content": "欢迎使用知识库检索, 请输入主题"}]
+
+collection = st.selectbox("选择知识库", knowledges.keys())
+collection_value = knowledges[collection]
+
+for message in st.session_state.messages:
+ with st.chat_message(message["role"]):
+ st.write(message["content"])
+
+
+def clear_chat_history():
+ st.session_state.messages = [{"role": "assistant", "content": "欢迎使用知识库检索,请输入主题"}]
+
+
+st.sidebar.button('清除历史', on_click=clear_chat_history)
+
+if prompt := st.chat_input("输入检索主题"):
+ st.session_state.messages.append({"role": "user", "content": prompt})
+ with st.chat_message("user"):
+ st.write(prompt)
+
+if st.session_state.messages[-1]["role"] != "assistant":
+ with st.chat_message("assistant"):
+ with st.spinner("Thinking..."):
+ response = search_knowledge(collection_value, prompt)
+ if response is None:
+ response = "没有找到相关知识"
+ st.markdown(response)
+ message = {"role": "assistant", "content": response}
+ st.session_state.messages.append(message)
diff --git a/pages/03_Msal.py b/pages/03_Msal.py
index 31c1ffd..4072357 100644
--- a/pages/03_Msal.py
+++ b/pages/03_Msal.py
@@ -1,33 +1,12 @@
import streamlit as st
-from msal_streamlit_authentication import msal_authentication
+import sys
import os
+from libs.msal import msal_auth
from dotenv import load_dotenv
+sys.path.append(os.path.abspath('..'))
load_dotenv()
-MSAL_TENANTID = os.getenv("MSAL_TENANTID")
-MSAL_APPID = os.getenv("MSAL_APPID")
-
-
-st.session_state
-
-if "token" in st.session_state and st.session_state["token"]:
- st.write("Token", st.session_state["token"])
-else:
- value = msal_authentication(
- auth={
- "clientId": MSAL_APPID,
- "authority": f"https://login.microsoftonline.com/{MSAL_TENANTID}",
- "redirectUri": "/",
- "postLogoutRedirectUri": "/"
- },
- cache={
- "cacheLocation": "sessionStorage",
- "storeAuthStateInCookie": False
- },
- login_request={
- "scopes": [f"{MSAL_APPID}/.default"]
- },
- key=1)
- st.session_state["token"] = value
+value = msal_auth()
+st.write(value)
diff --git a/pages/04_OCR.py b/pages/04_OCR.py
new file mode 100644
index 0000000..3f00287
--- /dev/null
+++ b/pages/04_OCR.py
@@ -0,0 +1,10 @@
+import streamlit as st
+from components.streamlit_tesseract_scanner import tesseract_scanner
+
+img_file_buffer = st.camera_input("Take a picture")
+
+blacklist='@*|©_Ⓡ®¢§š'
+data = tesseract_scanner(showimg=True, lang='chi_sim+eng', psm=11)
+
+if data is not None:
+ st.write(data)
diff --git a/requirements-gptstudio.txt b/requirements-gptstudio.txt
index dfb6628..8f2fde7 100644
--- a/requirements-gptstudio.txt
+++ b/requirements-gptstudio.txt
@@ -12,3 +12,5 @@ graphviz
jinja2
streamlit
msal_streamlit_authentication
+opencv-python-headless
+