lllyin · genhh · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,3 @@ config.json
 /output/*.txt/
 venv
 logs
-
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,96 @@
+name: ocr
+channels:
+  - defaults
+dependencies:
+  - blas=1.0=mkl
+  - bzip2=1.0.8=h2bbff1b_6
+  - ca-certificates=2024.12.31=haa95532_0
+  - cairo=1.16.0=hc68a040_5
+  - eigen=3.4.0=h59b6b97_0
+  - expat=2.6.4=h8ddb27b_0
+  - fontconfig=2.14.1=hb33846d_3
+  - freetype=2.12.1=ha860e81_0
+  - glib=2.78.4=hd77b12b_0
+  - glib-tools=2.78.4=hd77b12b_0
+  - graphite2=1.3.14=hd77b12b_1
+  - gst-plugins-base=1.22.3=hfc3ed34_1
+  - gstreamer=1.22.3=h1779075_1
+  - harfbuzz=4.3.0=hb646838_2
+  - hdf5=1.12.1=h51c971a_3
+  - icc_rt=2022.1.0=h6049295_2
+  - icu=73.1=h6c2663c_0
+  - intel-openmp=2021.4.0=haa95532_3556
+  - jpeg=9e=h827c3e9_3
+  - krb5=1.20.1=h5b6d351_0
+  - lerc=4.0.0=h5da7b33_0
+  - libabseil=20240116.2=cxx17_h5da7b33_0
+  - libclang=14.0.6=default_hb5a9fac_2
+  - libclang13=14.0.6=default_h8e68704_2
+  - libdeflate=1.22=h5bf469e_0
+  - libffi=3.4.4=hd77b12b_1
+  - libglib=2.78.4=ha17d25a_0
+  - libiconv=1.16=h2bbff1b_3
+  - libogg=1.3.5=h2bbff1b_1
+  - libpng=1.6.39=h8cc25b3_0
+  - libpq=17.2=h70ee33d_0
+  - libprotobuf=4.25.3=hf2fb9eb_0
+  - libtiff=4.5.1=h44ae7cf_1
+  - libvorbis=1.3.7=he774522_0
+  - libwebp-base=1.3.2=h3d04722_1
+  - libxml2=2.13.5=h24da03e_0
+  - lz4-c=1.9.4=h2bbff1b_1
+  - mkl=2021.4.0=haa95532_640
+  - mkl-service=2.4.0=py38h2bbff1b_0
+  - mkl_fft=1.3.1=py38h277e83a_0
+  - mkl_random=1.2.2=py38hf11a4ad_0
+  - numpy=1.24.3=py38hf95b240_0
+  - numpy-base=1.24.3=py38h005ec55_0
+  - opencv=4.10.0=py38hd762f8c_0
+  - openjpeg=2.5.2=hae555c5_0
+  - openssl=3.0.15=h827c3e9_0
+  - pcre2=10.42=h0ff8eda_1
+  - pip=24.2=py38haa95532_0
+  - pixman=0.40.0=h2bbff1b_1
+  - python=3.8.20=h8205438_0
+  - qt-main=5.15.2=h19c9488_11
+  - setuptools=75.1.0=py38haa95532_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.45.3=h2bbff1b_0
+  - vc=14.40=haa95532_2
+  - vs2015_runtime=14.42.34433=h9531ae6_2
+  - wheel=0.44.0=py38haa95532_0
+  - xz=5.4.6=h8cc25b3_1
+  - zlib=1.2.13=h8cc25b3_1
+  - zstd=1.5.6=h8880b57_0
+  - pip:
+    - annotated-types==0.7.0
+    - anyio==4.5.2
+    - certifi==2025.1.31
+    - charset-normalizer==3.4.1
+    - colorama==0.4.6
+    - distro==1.9.0
+    - exceptiongroup==1.2.2
+    - h11==0.14.0
+    - httpcore==1.0.7
+    - httpx==0.28.1
+    - idna==3.10
+    - jiter==0.8.2
+    - mouseinfo==0.1.3
+    - mss==9.0.2
+    - openai==1.61.0
+    - pillow==10.4.0
+    - pyautogui==0.9.54
+    - pydantic==2.10.6
+    - pydantic-core==2.27.2
+    - pygetwindow==0.0.9
+    - pymsgbox==1.0.9
+    - pyperclip==1.9.0
+    - pyrect==0.2.0
+    - pyscreeze==1.0.1
+    - pytweening==1.2.0
+    - requests==2.32.3
+    - sniffio==1.3.1
+    - tqdm==4.67.1
+    - typing-extensions==4.12.2
+    - urllib3==2.2.3
+prefix: E:\Anaconda\software\envs\ocr
diff --git a/img/test.png b/img/test.png
diff --git a/main.py b/main.py
@@ -1,19 +1,22 @@
 # -*- coding: utf-8 -*-
+import base64
+import io
 import json
+import os
 import time
 import re
 import sys
 import logging
-
-from PIL import ImageChops
-
+from PIL import Image, ImageChops
+from process.LLM import getResponse
 from process.ScreenCapture import ScreenCapture
-from process.OCR import OCR
-from process.Query import Query
 from process.Click import Click
 from process.logger import logger
+import numpy as np
 
-cc = Click(0, 40)
+current_dir = os.path.dirname(os.path.abspath(__file__))
+image_path = os.path.join(current_dir, 'tmp', 'test.png')
+threshold = 90
 
 END_WORDS_DICT = {
   "VICTORY": True
@@ -23,55 +26,63 @@ def isSame(imgA, imgB):
     if imgA is None or imgB is None:
         return False
     diff = ImageChops.difference(imgA.convert('RGB'), imgB.convert('RGB'))
-    if diff.getbbox():
+    diff_array = np.array(diff)
+    # 计算非零像素的数量
+    non_zero_pixels = np.sum(diff_array)   
+    # 计算图像的总像素数量
+    total_pixels = imgA.size[0] * imgA.size[1]
+    # 计算相似度百分比
+    similarity_percentage = ((total_pixels - non_zero_pixels) / total_pixels) * 100
+    print(f"相似度: {similarity_percentage:.2f}%")
+
+    if similarity_percentage >= threshold:
+        return True
+    else:
         return False
-    return True
 
 
 def getOCRConfig():
     with open("./config.json", "r", encoding="utf-8") as fp:
         return json.load(fp)
 
+def test_base64(base64_string):
+
+    # 将 base64 字符串解码为二进制数据
+    image_binary = base64.b64decode(base64_string)
+    # 使用 BytesIO 将二进制数据转换为图像对象
+    buffered = io.BytesIO(image_binary)
+    image = Image.open(buffered)
+    # 保存还原的图像
+    image.save("restored_image.jpg")
+    # 显示还原的图像
+    image.show()
 
 if __name__ == "__main__":
-    config = getOCRConfig()
-
     sc = ScreenCapture()
-    ocr = OCR(config["APP_ID"], config["API_KEY"], config["SECRET_KEY"])
-    query = Query()
-
-    quesImg, answImg = None, None
-    tmpQuesText = ''
+    Img = None
 
     while True:
-        tmpQuesImg, tmpAnswImg, appImg = sc.run()
-
+        tmpImg = sc.run()
         # print(tmpQuesImg)
         # print(tmpAnswImg)
 
-        if not isSame(quesImg, tmpQuesImg):
-            quesImg, answImg, appImg = tmpQuesImg, tmpAnswImg, appImg
-            ques, answ = ocr.run(quesImg, answImg)
+        if not isSame(Img, tmpImg):
+            Img = tmpImg
+            print("new pic")
+            Imgb64 = sc.base64(image_path)
+            #test_base64(Imgb64)
+            answ = getResponse(Imgb64)
 
             # 如果匹配victory｜defeat退出程序
-            if re.search('victory|defeat|defert|自动匹配|排行榜|看广告', "".join(answ), flags=re.I):
+            if answ==None or re.search('victory|defeat|defert|自动匹配|排行榜|看广告', "".join(answ), flags=re.I):
                 sys.exit()
-
-            if (len(ques) > 0 and (tmpQuesText != ques)):
-                tmpQuesText = ques
-
-                freq, rightAnswer, hint = query.run(ques, answ)
-
-                if(rightAnswer is not None):
-                    logger.info("问题: %s", ques)
-                    logger.info("\033[1;47;32m正确答案: %s\033[0m", rightAnswer)
-                    freqText = ''
-                    for index in range(len(freq)):
-                        freqText += (answ[index] + ' :' + str(round(100 * freq[index], 1)) + '%    ')
-                    logger.info('概率: %s', freqText)
-                    logger.info('依据: %s', hint)
-                    cc.run(appImg, answ, rightAnswer)
-                    logger.info('-----------------')
-                    logger.info('')
-
-        time.sleep(0.1)
+
+            logger.info("\033[1;47;32m正确答案: %s\033[0m", answ)
+            #cc.run(appImg, answ, rightAnswer)
+            logger.info('-----------------')
+            logger.info('')
+        else:
+            print("no new pic, wait 2 sec...")
+
+        time.sleep(2)
+
diff --git a/output/images/test.png b/output/images/test.png
diff --git a/process/LLM.py b/process/LLM.py
@@ -0,0 +1,43 @@
+import os
+from openai import OpenAI
+
+
+
+client = OpenAI(
+    api_key=os.environ.get('API_SILICON_KEY'), # 从https://cloud.siliconflow.cn/account/ak获取
+    base_url="https://api.siliconflow.cn/v1"
+)
+
+def getResponse(base64imgurl):
+
+    response = client.chat.completions.create(
+            model="Qwen/Qwen2-VL-72B-Instruct",            
+            messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": base64imgurl,
+                            "detail":"low"
+                        }
+                    },
+                    {
+                        "type": "text",
+                        "text": "首先请识别图中文字。如果该图片的内容为问答题，则根据图中问题及所给选项，选择并只输出正确的答案选项，无需输出其他额外信息；否则请输出识别的图中文字即可"
+
+                    }
+                ]
+            }],
+            stream=True
+    )
+
+    strdata = ""
+    for chunk in response:
+        chunk_message = chunk.choices[0].delta.content
+        strdata = strdata + chunk_message
+        #print(chunk_message, end='', flush=True)
+    #print(strdata)
+
+    return strdata
diff --git a/process/ScreenCapture.py b/process/ScreenCapture.py
@@ -1,7 +1,13 @@
 # -*- coding: utf-8 -*-
-from PIL import ImageGrab
+import base64
+import io
+from PIL import ImageGrab,Image
 from base64 import b64encode
 from io import BytesIO
+import os
+import pygetwindow as gw
+import pyautogui
+import time
 
 # mode = 'JIE_TU'
 mode = 'MINI_APP'
@@ -20,6 +26,9 @@ def __init__(self):
 
         # self.bound = (50, 80, 414 + 50, 736 + 80)
         self.rpx = self._rpx2px(self.bound[2] - self.bound[0])
+        self.current_dir = os.path.dirname(os.path.abspath(__file__))
+        self.image_path = os.path.join(self.current_dir, '..', 'img', 'test.png')
+        self.image_path2 = os.path.join(self.current_dir, '..', 'tmp', 'test.png')
 
     # rpx转px
     def _rpx2px(self, base):
@@ -31,19 +40,46 @@ def _rpx(rpx):
     # 截图
     def _getCapture(self):
         img = ImageGrab.grab(self.bound)
+        img.save('output/images/test.png')
         return img
 
+    def take_screenshot_of_window(self,window_title):
+        # 找到窗口
+        window = gw.getWindowsWithTitle(window_title)
+        if not window:
+            return "窗口未找到"
+        window = window[0]  # 假设只有一个窗口匹配
+        window.activate()
+        # 让窗口有时间激活
+        time.sleep(0.5)
+        # 获取窗口的屏幕坐标
+        window_position = window.left, window.top, window.width, window.height
+
+        # 屏幕截图
+        screenshot = pyautogui.screenshot(region=window_position)
+        # 保存截图到文件
+        screenshot.save(self.image_path2)
+        return screenshot
 
     # base64
-    def base64(self, img):
-        buffer = BytesIO()
-        img.save(buffer, format='PNG')
-        img.close()
+    def base64(self, image_path):
+        with open(image_path, "rb") as image_file:
+            base64_encoded_data = base64.b64encode(image_file.read())
+            base64_encoded_image = base64_encoded_data.decode('utf-8')
+        return f"data:image/png;base64,{base64_encoded_image}"
+
+    def base64_online(self,img):
+        with io.BytesIO() as buffer:
+            img.save(buffer, format='PNG')
+            img_binary = b64encode(buffer.getvalue())
+
+        # 对二进制数据进行 base64 编码
+        base64_encoded_data = base64.b64encode(img_binary)
+        # 将 base64 编码的二进制数据转换为字符串
+        base64_encoded_image = base64_encoded_data.decode('utf-8')
 
-        b64_str = b64encode(buffer.getvalue())
+        return base64_encoded_image
 
-        return b64_str
-
     # 切割
     def _splitCapture(self, img):
         if(type == 'FRIEND_PK'):
@@ -63,6 +99,8 @@ def _splitCapture(self, img):
 
 
     def run(self):
-        img = self._getCapture()
-        return self._splitCapture(img)
+        img = self.take_screenshot_of_window("微信读书")
+        #img = self._test()
+        #return self.base64(self.image_path)
+        return img
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,4 +11,3 @@ config.json
		/output/*.txt/
		venv
		logs