Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[大模型]功能整合 #7

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ config.json
/output/*.txt/
venv
logs

96 changes: 96 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
name: ocr
channels:
- defaults
dependencies:
- blas=1.0=mkl
- bzip2=1.0.8=h2bbff1b_6
- ca-certificates=2024.12.31=haa95532_0
- cairo=1.16.0=hc68a040_5
- eigen=3.4.0=h59b6b97_0
- expat=2.6.4=h8ddb27b_0
- fontconfig=2.14.1=hb33846d_3
- freetype=2.12.1=ha860e81_0
- glib=2.78.4=hd77b12b_0
- glib-tools=2.78.4=hd77b12b_0
- graphite2=1.3.14=hd77b12b_1
- gst-plugins-base=1.22.3=hfc3ed34_1
- gstreamer=1.22.3=h1779075_1
- harfbuzz=4.3.0=hb646838_2
- hdf5=1.12.1=h51c971a_3
- icc_rt=2022.1.0=h6049295_2
- icu=73.1=h6c2663c_0
- intel-openmp=2021.4.0=haa95532_3556
- jpeg=9e=h827c3e9_3
- krb5=1.20.1=h5b6d351_0
- lerc=4.0.0=h5da7b33_0
- libabseil=20240116.2=cxx17_h5da7b33_0
- libclang=14.0.6=default_hb5a9fac_2
- libclang13=14.0.6=default_h8e68704_2
- libdeflate=1.22=h5bf469e_0
- libffi=3.4.4=hd77b12b_1
- libglib=2.78.4=ha17d25a_0
- libiconv=1.16=h2bbff1b_3
- libogg=1.3.5=h2bbff1b_1
- libpng=1.6.39=h8cc25b3_0
- libpq=17.2=h70ee33d_0
- libprotobuf=4.25.3=hf2fb9eb_0
- libtiff=4.5.1=h44ae7cf_1
- libvorbis=1.3.7=he774522_0
- libwebp-base=1.3.2=h3d04722_1
- libxml2=2.13.5=h24da03e_0
- lz4-c=1.9.4=h2bbff1b_1
- mkl=2021.4.0=haa95532_640
- mkl-service=2.4.0=py38h2bbff1b_0
- mkl_fft=1.3.1=py38h277e83a_0
- mkl_random=1.2.2=py38hf11a4ad_0
- numpy=1.24.3=py38hf95b240_0
- numpy-base=1.24.3=py38h005ec55_0
- opencv=4.10.0=py38hd762f8c_0
- openjpeg=2.5.2=hae555c5_0
- openssl=3.0.15=h827c3e9_0
- pcre2=10.42=h0ff8eda_1
- pip=24.2=py38haa95532_0
- pixman=0.40.0=h2bbff1b_1
- python=3.8.20=h8205438_0
- qt-main=5.15.2=h19c9488_11
- setuptools=75.1.0=py38haa95532_0
- six=1.16.0=pyhd3eb1b0_1
- sqlite=3.45.3=h2bbff1b_0
- vc=14.40=haa95532_2
- vs2015_runtime=14.42.34433=h9531ae6_2
- wheel=0.44.0=py38haa95532_0
- xz=5.4.6=h8cc25b3_1
- zlib=1.2.13=h8cc25b3_1
- zstd=1.5.6=h8880b57_0
- pip:
- annotated-types==0.7.0
- anyio==4.5.2
- certifi==2025.1.31
- charset-normalizer==3.4.1
- colorama==0.4.6
- distro==1.9.0
- exceptiongroup==1.2.2
- h11==0.14.0
- httpcore==1.0.7
- httpx==0.28.1
- idna==3.10
- jiter==0.8.2
- mouseinfo==0.1.3
- mss==9.0.2
- openai==1.61.0
- pillow==10.4.0
- pyautogui==0.9.54
- pydantic==2.10.6
- pydantic-core==2.27.2
- pygetwindow==0.0.9
- pymsgbox==1.0.9
- pyperclip==1.9.0
- pyrect==0.2.0
- pyscreeze==1.0.1
- pytweening==1.2.0
- requests==2.32.3
- sniffio==1.3.1
- tqdm==4.67.1
- typing-extensions==4.12.2
- urllib3==2.2.3
prefix: E:\Anaconda\software\envs\ocr
Binary file added img/test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
91 changes: 51 additions & 40 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
# -*- coding: utf-8 -*-
import base64
import io
import json
import os
import time
import re
import sys
import logging

from PIL import ImageChops

from PIL import Image, ImageChops
from process.LLM import getResponse
from process.ScreenCapture import ScreenCapture
from process.OCR import OCR
from process.Query import Query
from process.Click import Click
from process.logger import logger
import numpy as np

cc = Click(0, 40)
current_dir = os.path.dirname(os.path.abspath(__file__))
image_path = os.path.join(current_dir, 'tmp', 'test.png')
threshold = 90

END_WORDS_DICT = {
"VICTORY": True
Expand All @@ -23,55 +26,63 @@ def isSame(imgA, imgB):
if imgA is None or imgB is None:
return False
diff = ImageChops.difference(imgA.convert('RGB'), imgB.convert('RGB'))
if diff.getbbox():
diff_array = np.array(diff)
# 计算非零像素的数量
non_zero_pixels = np.sum(diff_array)
# 计算图像的总像素数量
total_pixels = imgA.size[0] * imgA.size[1]
# 计算相似度百分比
similarity_percentage = ((total_pixels - non_zero_pixels) / total_pixels) * 100
print(f"相似度: {similarity_percentage:.2f}%")

if similarity_percentage >= threshold:
return True
else:
return False
return True


def getOCRConfig():
with open("./config.json", "r", encoding="utf-8") as fp:
return json.load(fp)

def test_base64(base64_string):

# 将 base64 字符串解码为二进制数据
image_binary = base64.b64decode(base64_string)
# 使用 BytesIO 将二进制数据转换为图像对象
buffered = io.BytesIO(image_binary)
image = Image.open(buffered)
# 保存还原的图像
image.save("restored_image.jpg")
# 显示还原的图像
image.show()

if __name__ == "__main__":
config = getOCRConfig()

sc = ScreenCapture()
ocr = OCR(config["APP_ID"], config["API_KEY"], config["SECRET_KEY"])
query = Query()

quesImg, answImg = None, None
tmpQuesText = ''
Img = None

while True:
tmpQuesImg, tmpAnswImg, appImg = sc.run()

tmpImg = sc.run()
# print(tmpQuesImg)
# print(tmpAnswImg)

if not isSame(quesImg, tmpQuesImg):
quesImg, answImg, appImg = tmpQuesImg, tmpAnswImg, appImg
ques, answ = ocr.run(quesImg, answImg)
if not isSame(Img, tmpImg):
Img = tmpImg
print("new pic")
Imgb64 = sc.base64(image_path)
#test_base64(Imgb64)
answ = getResponse(Imgb64)

# 如果匹配victory|defeat退出程序
if re.search('victory|defeat|defert|自动匹配|排行榜|看广告', "".join(answ), flags=re.I):
if answ==None or re.search('victory|defeat|defert|自动匹配|排行榜|看广告', "".join(answ), flags=re.I):
sys.exit()

if (len(ques) > 0 and (tmpQuesText != ques)):
tmpQuesText = ques

freq, rightAnswer, hint = query.run(ques, answ)

if(rightAnswer is not None):
logger.info("问题: %s", ques)
logger.info("\033[1;47;32m正确答案: %s\033[0m", rightAnswer)
freqText = ''
for index in range(len(freq)):
freqText += (answ[index] + ' :' + str(round(100 * freq[index], 1)) + '% ')
logger.info('概率: %s', freqText)
logger.info('依据: %s', hint)
cc.run(appImg, answ, rightAnswer)
logger.info('-----------------')
logger.info('')

time.sleep(0.1)

logger.info("\033[1;47;32m正确答案: %s\033[0m", answ)
#cc.run(appImg, answ, rightAnswer)
logger.info('-----------------')
logger.info('')
else:
print("no new pic, wait 2 sec...")

time.sleep(2)

Binary file added output/images/test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
43 changes: 43 additions & 0 deletions process/LLM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
from openai import OpenAI



client = OpenAI(
api_key=os.environ.get('API_SILICON_KEY'), # 从https://cloud.siliconflow.cn/account/ak获取
base_url="https://api.siliconflow.cn/v1"
)

def getResponse(base64imgurl):

response = client.chat.completions.create(
model="Qwen/Qwen2-VL-72B-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": base64imgurl,
"detail":"low"
}
},
{
"type": "text",
"text": "首先请识别图中文字。如果该图片的内容为问答题,则根据图中问题及所给选项,选择并只输出正确的答案选项,无需输出其他额外信息;否则请输出识别的图中文字即可"

}
]
}],
stream=True
)

strdata = ""
for chunk in response:
chunk_message = chunk.choices[0].delta.content
strdata = strdata + chunk_message
#print(chunk_message, end='', flush=True)
#print(strdata)

return strdata
58 changes: 48 additions & 10 deletions process/ScreenCapture.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# -*- coding: utf-8 -*-
from PIL import ImageGrab
import base64
import io
from PIL import ImageGrab,Image
from base64 import b64encode
from io import BytesIO
import os
import pygetwindow as gw
import pyautogui
import time

# mode = 'JIE_TU'
mode = 'MINI_APP'
Expand All @@ -20,6 +26,9 @@ def __init__(self):

# self.bound = (50, 80, 414 + 50, 736 + 80)
self.rpx = self._rpx2px(self.bound[2] - self.bound[0])
self.current_dir = os.path.dirname(os.path.abspath(__file__))
self.image_path = os.path.join(self.current_dir, '..', 'img', 'test.png')
self.image_path2 = os.path.join(self.current_dir, '..', 'tmp', 'test.png')

# rpx转px
def _rpx2px(self, base):
Expand All @@ -31,19 +40,46 @@ def _rpx(rpx):
# 截图
def _getCapture(self):
img = ImageGrab.grab(self.bound)
img.save('output/images/test.png')
return img

def take_screenshot_of_window(self,window_title):
# 找到窗口
window = gw.getWindowsWithTitle(window_title)
if not window:
return "窗口未找到"
window = window[0] # 假设只有一个窗口匹配
window.activate()
# 让窗口有时间激活
time.sleep(0.5)
# 获取窗口的屏幕坐标
window_position = window.left, window.top, window.width, window.height

# 屏幕截图
screenshot = pyautogui.screenshot(region=window_position)
# 保存截图到文件
screenshot.save(self.image_path2)
return screenshot

# base64
def base64(self, img):
buffer = BytesIO()
img.save(buffer, format='PNG')
img.close()
def base64(self, image_path):
with open(image_path, "rb") as image_file:
base64_encoded_data = base64.b64encode(image_file.read())
base64_encoded_image = base64_encoded_data.decode('utf-8')
return f"data:image/png;base64,{base64_encoded_image}"

def base64_online(self,img):
with io.BytesIO() as buffer:
img.save(buffer, format='PNG')
img_binary = b64encode(buffer.getvalue())

# 对二进制数据进行 base64 编码
base64_encoded_data = base64.b64encode(img_binary)
# 将 base64 编码的二进制数据转换为字符串
base64_encoded_image = base64_encoded_data.decode('utf-8')

b64_str = b64encode(buffer.getvalue())
return base64_encoded_image

return b64_str

# 切割
def _splitCapture(self, img):
if(type == 'FRIEND_PK'):
Expand All @@ -63,6 +99,8 @@ def _splitCapture(self, img):


def run(self):
img = self._getCapture()
return self._splitCapture(img)
img = self.take_screenshot_of_window("微信读书")
#img = self._test()
#return self.base64(self.image_path)
return img

Loading