From efe7f73d6bcc8c45d736ed6ad2f4f0212fd35d20 Mon Sep 17 00:00:00 2001 From: ston1th Date: Wed, 17 Jan 2024 21:02:36 +0100 Subject: [PATCH 01/26] implemented text recognition (ocr) I implemented another comparison method based on OCR. This could be a useful addition in cases where modern game rendering and visual effects (clutter) cause difficulties to find good comparison images. It currently depends on pytesseract and Tesseract-OCR but tests with EasyOCR have also been conducted. Both seem to get similar good recognition results. EasyOCR looks like to cause higher CPU load then tesseract. Tesseract on the other hand is an external dependency that needs to be installed seperatly. The text comparison of the expected and recognized string has two modes. A perfect 1:1 match or the levenshtein ratio. I also introduced two new file config options: * Rectangle position (only used for text files) * FPS limit per text or image file Please let me know what you think of this feature. --- README.md | 20 ++++++++++++++++++++ scripts/requirements.txt | 3 +++ src/AutoSplit.py | 16 ++++++++++++---- src/AutoSplitImage.py | 36 +++++++++++++++++++++++++++++++++++- src/compare.py | 33 +++++++++++++++++++++++++++++++++ src/split_parser.py | 29 ++++++++++++++++++++++++++++- 6 files changed, 131 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 298e7d7d..15f8f4c6 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ This program can be used to automatically start, split, and reset your preferred - Wayland is not currently supported - WSL2/WSLg requires an additional Desktop Environment, external X11 server, and/or systemd - Python 3.10+ (Not required for normal use. Refer to the [build instructions](/docs/build%20instructions.md) if you'd like run the application directly in Python). +- Tesseract-OCR (optional; requierd for text recognition as an alternative comparison method). See https://github.com/UB-Mannheim/tesseract/wiki for installation instructions. ## OPTIONS @@ -193,6 +194,8 @@ This option is mainly meant to be toggled with the `Toggle auto Reset Image` hot - **Custom thresholds** are place between parenthesis `()` in the filename. This value will override the default threshold. - **Custom pause times** are placed between square brackets `[]` in the filename. This value will override the default pause time. - **Custom delay times** are placed between hash signs `##` in the filename. Note that these are in milliseconds. For example, a 10 second split delay would be `#10000#`. You cannot skip or undo splits during split delays. +- **Custom comparison FPS** is placed between exclamation marks `!!` in the filename. This value will override the default FPS for the given image or text file and does also apply to the reset image. +- **Custom rectangle position** is placed between plus signs `++` in the filename. Note that these will only apply for text files when using text recognition (.txt). The scheme looks like this: `+740-1180-60-150+`. These are the X and Y coordinates in the image to draw a rectangle. They are seperated by the minus sign `-` and follow the form `+X-X-Y-Y+`. The second X and Y values need to be bigger then the first ones. You will need to adjust these values depending on your capture resolution. - A different **comparison method** can be specified with their 0-base index between carets `^^`: - `^0^`: L2 Norm - `^1^`: Histogram @@ -226,6 +229,23 @@ You can have one (and only one) image with the keyword `reset` in its name. Auto The Start Image is similar to the Reset Image. You can only have one Start Image with the keyword `start_auto_splitter`.You can reload the image using the "`Reload Start Image`" button. The pause time is the amount of seconds AutoSplit will wait before starting comparisons of the first split image. Delay times will be used to delay starting your timer after the threshold is met. +### Text Recognition (OCR) + +You can use text recognition as an alternative comparison method. +To use this feature you need to place a text file (.txt) in your splits folder instead of an image file. +Place the expected text in the text file that should be looked for. + +An example file name and content could look like this: + +Filename: `001_start_auto_splitter+275-540-70-95+.txt` + +Content: `complete any 2 encounters` + +This will look for the text `complete any 2 encounters` at the capture position `+275-540-70-95+`. + +Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the limit option `!1!` in the file name. +The size of the selected rectangle can also impact the CPU load (bigger = more CPU load). + ### Profiles diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 0357b646..b7ba4e2b 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -4,15 +4,18 @@ # # Dependencies: git+https://github.com/boppreh/keyboard.git#egg=keyboard # Fix install on macos and linux-ci https://github.com/boppreh/keyboard/pull/568 +Levenshtein numpy>=1.26 # Python 3.12 support opencv-python-headless>=4.9.0.80 # Typing fixes packaging +Pillow>=10.0 # Python 3.12 support psutil>=5.9.6 # Python 3.12 fixes # PyAutoGUI # See install.ps1 PyWinCtl>=0.0.42 # py.typed # When needed, dev builds can be found at https://download.qt.io/snapshots/ci/pyside/dev?C=M;O=D PySide6-Essentials>=6.6.0 ; sys_platform == 'win32' # Python 3.12 support PySide6-Essentials<6.5.1 ; sys_platform == 'linux' # Wayland issue on Ubuntu 22.04 https://bugreports.qt.io/browse/QTBUG-114635 +pytessract scipy>=1.11.2 # Python 3.12 support toml typing-extensions>=4.4.0 # @override decorator support diff --git a/src/AutoSplit.py b/src/AutoSplit.py index 67628555..8b45f29e 100644 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -283,7 +283,8 @@ def __reload_start_image(self, started_by_button: bool = False, wait_for_delay: self.timer_start_image.stop() self.current_image_file_label.setText("-") self.start_image_status_value_label.setText("not found") - set_preview_image(self.current_split_image, None) + if self.current_split_image.text == None: + set_preview_image(self.current_split_image, None) if not (validate_before_parsing(self, started_by_button) and parse_and_validate_images(self)): QApplication.processEvents() @@ -307,7 +308,10 @@ def __reload_start_image(self, started_by_button: bool = False, wait_for_delay: self.highest_similarity = 0.0 self.reset_highest_similarity = 0.0 self.split_below_threshold = False - self.timer_start_image.start(int(ONE_SECOND / self.settings_dict["fps_limit"])) + start_image_fps = self.settings_dict["fps_limit"] + if self.start_image.fps != 0: + start_image_fps = self.start_image.fps + self.timer_start_image.start(int(ONE_SECOND / start_image_fps)) QApplication.processEvents() @@ -682,8 +686,12 @@ def __similarity_threshold_loop(self, number_of_split_images: int, dummy_splits_ self.undo_split_button.setEnabled(self.split_image_number != 0) QApplication.processEvents() + fps = self.settings_dict["fps_limit"] + if self.split_image.fps != 0: + fps = self.split_image.fps + # Limit the number of time the comparison runs to reduce cpu usage - frame_interval = 1 / self.settings_dict["fps_limit"] + frame_interval = 1 / fps # Use a time delta to have a consistant check interval wait_delta_ms = int((frame_interval - (time() - start) % frame_interval) * ONE_SECOND) @@ -867,7 +875,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): # Get split image self.split_image = specific_image or self.split_images_and_loop_number[0 + self.split_image_number][0] - if is_valid_image(self.split_image.byte_array): + if self.split_image.text == None and is_valid_image(self.split_image.byte_array): set_preview_image(self.current_split_image, self.split_image.byte_array) self.current_image_file_label.setText(self.split_image.filename) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index d4176560..56feaf2e 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -37,8 +37,10 @@ class AutoSplitImage: filename: str flags: int loops: int + fps: int image_type: ImageType byte_array: MatLike | None = None + text: str | None = None mask: MatLike | None = None # This value is internal, check for mask instead _has_transparency = False @@ -47,6 +49,10 @@ class AutoSplitImage: __comparison_method: int | None = None __pause_time: float | None = None __similarity_threshold: float | None = None + __x: int + __xx: int + __y: int + __yy: int def get_delay_time(self, default: "AutoSplit | int"): """Get image's delay time or fallback to the default value from spinbox.""" @@ -89,7 +95,12 @@ def __init__(self, path: str): self.__comparison_method = comparison_method_from_filename(self.filename) self.__pause_time = pause_from_filename(self.filename) self.__similarity_threshold = threshold_from_filename(self.filename) - self.__read_image_bytes(path) + if path.endswith("txt"): + self.fps = fps_from_filename(self.filename) + self.__read_text(path) + self.__region(region_from_filename(self.filename)) + else: + self.__read_image_bytes(path) if START_KEYWORD in self.filename: self.image_type = ImageType.START @@ -98,6 +109,20 @@ def __init__(self, path: str): else: self.image_type = ImageType.SPLIT + def __region(self, region: str): + r = region.split("-") + if len(r) != 4: + return + self.__x = int(r[0]) + self.__xx = int(r[1]) + self.__y = int(r[2]) + self.__yy = int(r[3]) + + def __read_text(self, path: str): + f = open(path, "r") + self.text = f.read().lower().strip() + f.close() + def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) if not is_valid_image(image): @@ -140,6 +165,10 @@ def compare_with_capture( default: "AutoSplit | int", capture: MatLike | None, ): + """Extract image text from rectangle position and compare it with the expected string.""" + if self.text != None: + return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.text) + """Compare image with capture using image's comparison method. Falls back to combobox.""" if not is_valid_image(self.byte_array) or not is_valid_image(capture): return 0.0 @@ -162,4 +191,9 @@ def compare_with_capture( loop_from_filename, pause_from_filename, threshold_from_filename, + region_from_filename, + fps_from_filename, + ) + from compare import ( + extract_and_compare_text, ) diff --git a/src/compare.py b/src/compare.py index 49bec7e6..d6306cb8 100644 --- a/src/compare.py +++ b/src/compare.py @@ -5,6 +5,16 @@ from cv2.typing import MatLike from scipy import fft +from PIL import Image +# TODO: easyocr vs. pytesseract? +# tesseract seems to cause less overall CPU load +#from easyocr import Reader +#reader = Reader(["en"], gpu=False, verbose=False, download_enabled=False) +import pytesseract +# TODO: make me configureable +pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract' +import Levenshtein as levenshtein + from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image MAXRANGE = MAXBYTE + 1 @@ -125,6 +135,29 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) +def extract_and_compare_text(capture: MatLike, text: str): + """ + Compares the extracted text of the given image and returns the similarity between the two texts. + + @param capture: Image of any given shape as a numpy array + @param text: a string to match for + @return: The similarity between the text in the image and the text supplied as a number 0 to 1. + """ + # if the string is found 1:1 in the string extracted from the image a 1 is returned. + # otherwise the levenshtein ratio is calculated between the two strings and gets returned. + ratio = 0.0 + # TODO: easyocr vs. pytesseract? + #image_string = " ".join(reader.readtext(capture, detail=0)).lower().strip() + image_string = pytesseract.image_to_string(Image.fromarray(capture), config='--oem 1 --psm 6').lower().strip() + + if text in image_string: + ratio = 1.0 + else: + ratio = levenshtein.ratio(text, image_string) + # TODO: debug: remove me + if ratio > 0.9: + print(f'text from image ({ratio:,.2f}): {image_string}') + return ratio def __compare_dummy(*_: object): return 0.0 diff --git a/src/split_parser.py b/src/split_parser.py index acdf6dd7..38e50658 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -53,6 +53,33 @@ def threshold_from_filename(filename: str): # Check to make sure if it is a valid threshold return value if 0 <= value <= 1 else None +def region_from_filename(filename: str): + """ + Retrieve the capture region from the filename + + @param filename: String containing the file's name + @return: A region string, if not then None + """ + # Check to make sure there is a valid region string between + # plus signs of the filename + value = __value_from_filename(filename, "++", "") + + # Check to make sure if it is a valid threshold + return value if value != "" else None + +def fps_from_filename(filename: str): + """ + Retrieve the FPS specifix to the split file + + @param filename: String containing the file's name + @return: A FPS int value between 0 and 60. A value of 0 indictaes to use the global FPS value. + """ + # Check to make sure there is a valid number between + # exclamation marks of the filename + value = __value_from_filename(filename, "!!", 0) + + # Check to make sure if it is a valid threshold + return value if value >= 0 and value <= 60 else 0 def pause_from_filename(filename: str): """ @@ -208,7 +235,7 @@ def parse_and_validate_images(autosplit: "AutoSplit"): else: for image in split_images: # Test for image without transparency - if not is_valid_image(image.byte_array): + if image.text == None and not is_valid_image(image.byte_array): def image_validity(filename: str): return lambda: error_messages.image_validity(filename) From a00dd41f81e44eec60a5268d292c2a9257e47f03 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Jan 2024 20:18:58 +0000 Subject: [PATCH 02/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/AutoSplitImage.py | 10 ++++------ src/compare.py | 25 ++++++++++++++----------- src/split_parser.py | 3 +++ 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 56feaf2e..3a0e8066 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -119,7 +119,7 @@ def __region(self, region: str): self.__yy = int(r[3]) def __read_text(self, path: str): - f = open(path, "r") + f = open(path) self.text = f.read().lower().strip() f.close() @@ -184,16 +184,14 @@ def compare_with_capture( if True: + from compare import extract_and_compare_text from split_parser import ( comparison_method_from_filename, delay_time_from_filename, flags_from_filename, + fps_from_filename, loop_from_filename, pause_from_filename, - threshold_from_filename, region_from_filename, - fps_from_filename, - ) - from compare import ( - extract_and_compare_text, + threshold_from_filename, ) diff --git a/src/compare.py b/src/compare.py index d6306cb8..4b3d6d39 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,21 +1,22 @@ +from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image +import Levenshtein as levenshtein from math import sqrt import cv2 import numpy as np -from cv2.typing import MatLike -from scipy import fft -from PIL import Image # TODO: easyocr vs. pytesseract? # tesseract seems to cause less overall CPU load -#from easyocr import Reader -#reader = Reader(["en"], gpu=False, verbose=False, download_enabled=False) +# from easyocr import Reader +# reader = Reader(["en"], gpu=False, verbose=False, download_enabled=False) import pytesseract +from cv2.typing import MatLike +from PIL import Image +from scipy import fft + # TODO: make me configureable -pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract' -import Levenshtein as levenshtein +pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract" -from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image MAXRANGE = MAXBYTE + 1 CHANNELS = [ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value] @@ -135,6 +136,7 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) + def extract_and_compare_text(capture: MatLike, text: str): """ Compares the extracted text of the given image and returns the similarity between the two texts. @@ -147,8 +149,8 @@ def extract_and_compare_text(capture: MatLike, text: str): # otherwise the levenshtein ratio is calculated between the two strings and gets returned. ratio = 0.0 # TODO: easyocr vs. pytesseract? - #image_string = " ".join(reader.readtext(capture, detail=0)).lower().strip() - image_string = pytesseract.image_to_string(Image.fromarray(capture), config='--oem 1 --psm 6').lower().strip() + # image_string = " ".join(reader.readtext(capture, detail=0)).lower().strip() + image_string = pytesseract.image_to_string(Image.fromarray(capture), config="--oem 1 --psm 6").lower().strip() if text in image_string: ratio = 1.0 @@ -156,9 +158,10 @@ def extract_and_compare_text(capture: MatLike, text: str): ratio = levenshtein.ratio(text, image_string) # TODO: debug: remove me if ratio > 0.9: - print(f'text from image ({ratio:,.2f}): {image_string}') + print(f"text from image ({ratio:,.2f}): {image_string}") return ratio + def __compare_dummy(*_: object): return 0.0 diff --git a/src/split_parser.py b/src/split_parser.py index 38e50658..44839fba 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -53,6 +53,7 @@ def threshold_from_filename(filename: str): # Check to make sure if it is a valid threshold return value if 0 <= value <= 1 else None + def region_from_filename(filename: str): """ Retrieve the capture region from the filename @@ -67,6 +68,7 @@ def region_from_filename(filename: str): # Check to make sure if it is a valid threshold return value if value != "" else None + def fps_from_filename(filename: str): """ Retrieve the FPS specifix to the split file @@ -81,6 +83,7 @@ def fps_from_filename(filename: str): # Check to make sure if it is a valid threshold return value if value >= 0 and value <= 60 else 0 + def pause_from_filename(filename: str): """ Retrieve the pause time from the filename, if there is no pause time or the pause time From ead3463f66542180fa7272bd31caf8ba85fc157e Mon Sep 17 00:00:00 2001 From: ston1th Date: Wed, 17 Jan 2024 23:25:46 +0100 Subject: [PATCH 03/26] fixed typo in requirements.txt and make linter happy --- scripts/requirements.txt | 2 +- src/AutoSplit.py | 4 ++-- src/AutoSplitImage.py | 9 ++++----- src/compare.py | 10 +++------- src/split_parser.py | 10 +++++----- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index b7ba4e2b..b30bc7c8 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -15,7 +15,7 @@ PyWinCtl>=0.0.42 # py.typed # When needed, dev builds can be found at https://download.qt.io/snapshots/ci/pyside/dev?C=M;O=D PySide6-Essentials>=6.6.0 ; sys_platform == 'win32' # Python 3.12 support PySide6-Essentials<6.5.1 ; sys_platform == 'linux' # Wayland issue on Ubuntu 22.04 https://bugreports.qt.io/browse/QTBUG-114635 -pytessract +pytesseract scipy>=1.11.2 # Python 3.12 support toml typing-extensions>=4.4.0 # @override decorator support diff --git a/src/AutoSplit.py b/src/AutoSplit.py index 8b45f29e..3ca9bf52 100644 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -283,7 +283,7 @@ def __reload_start_image(self, started_by_button: bool = False, wait_for_delay: self.timer_start_image.stop() self.current_image_file_label.setText("-") self.start_image_status_value_label.setText("not found") - if self.current_split_image.text == None: + if self.current_split_image.text is None: set_preview_image(self.current_split_image, None) if not (validate_before_parsing(self, started_by_button) and parse_and_validate_images(self)): @@ -875,7 +875,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): # Get split image self.split_image = specific_image or self.split_images_and_loop_number[0 + self.split_image_number][0] - if self.split_image.text == None and is_valid_image(self.split_image.byte_array): + if self.split_image.text is None and is_valid_image(self.split_image.byte_array): set_preview_image(self.current_split_image, self.split_image.byte_array) self.current_image_file_label.setText(self.split_image.filename) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 3a0e8066..b256877f 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -111,7 +111,7 @@ def __init__(self, path: str): def __region(self, region: str): r = region.split("-") - if len(r) != 4: + if len(r) != 4: # noqa: PLR2004 return self.__x = int(r[0]) self.__xx = int(r[1]) @@ -119,9 +119,8 @@ def __region(self, region: str): self.__yy = int(r[3]) def __read_text(self, path: str): - f = open(path) - self.text = f.read().lower().strip() - f.close() + with open(path, mode="r", encoding="utf-8") as f: + self.text = f.read().lower().strip() def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) @@ -166,7 +165,7 @@ def compare_with_capture( capture: MatLike | None, ): """Extract image text from rectangle position and compare it with the expected string.""" - if self.text != None: + if self.text is not None: return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.text) """Compare image with capture using image's comparison method. Falls back to combobox.""" diff --git a/src/compare.py b/src/compare.py index 4b3d6d39..69a95be3 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,5 +1,5 @@ from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image -import Levenshtein as levenshtein +import Levenshtein from math import sqrt import cv2 @@ -147,17 +147,13 @@ def extract_and_compare_text(capture: MatLike, text: str): """ # if the string is found 1:1 in the string extracted from the image a 1 is returned. # otherwise the levenshtein ratio is calculated between the two strings and gets returned. - ratio = 0.0 # TODO: easyocr vs. pytesseract? # image_string = " ".join(reader.readtext(capture, detail=0)).lower().strip() image_string = pytesseract.image_to_string(Image.fromarray(capture), config="--oem 1 --psm 6").lower().strip() - if text in image_string: - ratio = 1.0 - else: - ratio = levenshtein.ratio(text, image_string) + ratio = 1.0 if text in image_string else Levenshtein.ratio(text, image_string) # TODO: debug: remove me - if ratio > 0.9: + if ratio > 0.9: # noqa: PLR2004 print(f"text from image ({ratio:,.2f}): {image_string}") return ratio diff --git a/src/split_parser.py b/src/split_parser.py index 44839fba..ed0779c0 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -56,7 +56,7 @@ def threshold_from_filename(filename: str): def region_from_filename(filename: str): """ - Retrieve the capture region from the filename + Retrieve the capture region from the filename. @param filename: String containing the file's name @return: A region string, if not then None @@ -66,12 +66,12 @@ def region_from_filename(filename: str): value = __value_from_filename(filename, "++", "") # Check to make sure if it is a valid threshold - return value if value != "" else None + return value if value else None def fps_from_filename(filename: str): """ - Retrieve the FPS specifix to the split file + Retrieve the FPS specifix to the split file. @param filename: String containing the file's name @return: A FPS int value between 0 and 60. A value of 0 indictaes to use the global FPS value. @@ -81,7 +81,7 @@ def fps_from_filename(filename: str): value = __value_from_filename(filename, "!!", 0) # Check to make sure if it is a valid threshold - return value if value >= 0 and value <= 60 else 0 + return value if value >= 0 and value <= 60 else 0 # noqa: PLR2004 def pause_from_filename(filename: str): @@ -238,7 +238,7 @@ def parse_and_validate_images(autosplit: "AutoSplit"): else: for image in split_images: # Test for image without transparency - if image.text == None and not is_valid_image(image.byte_array): + if image.text is None and not is_valid_image(image.byte_array): def image_validity(filename: str): return lambda: error_messages.image_validity(filename) From d36e2d4e164407acef82bc7b3c4828dd890af1c9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Jan 2024 21:26:13 +0000 Subject: [PATCH 04/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/AutoSplitImage.py | 2 +- src/compare.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index b256877f..a948d5fa 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -119,7 +119,7 @@ def __region(self, region: str): self.__yy = int(r[3]) def __read_text(self, path: str): - with open(path, mode="r", encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: self.text = f.read().lower().strip() def __read_image_bytes(self, path: str): diff --git a/src/compare.py b/src/compare.py index 69a95be3..c6d8fed8 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,8 +1,7 @@ -from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image -import Levenshtein from math import sqrt import cv2 +import Levenshtein import numpy as np # TODO: easyocr vs. pytesseract? @@ -14,6 +13,8 @@ from PIL import Image from scipy import fft +from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image + # TODO: make me configureable pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract" From c9766bf02bd69d4bfeeba34658c2acf2ef0803c5 Mon Sep 17 00:00:00 2001 From: ston1th Date: Thu, 18 Jan 2024 00:01:48 +0100 Subject: [PATCH 05/26] keep pillow on windows and more linter fixes --- scripts/install.ps1 | 2 +- scripts/requirements.txt | 1 - src/AutoSplit.py | 4 ++-- src/AutoSplitImage.py | 7 ++++++- src/compare.py | 2 +- src/split_parser.py | 5 +---- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 6072c5a3..118e5bc9 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -76,7 +76,7 @@ If ($IsLinux) { # Pillow, pygetwindow, pymsgbox, pytweening, MouseInfo are picked up by PySide6 # (also --exclude from build script, but more consistent with unfrozen run) &"$python" -m pip uninstall pyscreenshot mss pygetwindow pymsgbox pytweening MouseInfo -y -If ($IsWindows) { &"$python" -m pip uninstall pyscreeze Pillow -y } +If ($IsWindows) { &"$python" -m pip uninstall pyscreeze -y } # Don't compile resources on the Build CI job as it'll do so in build script If ($dev) { diff --git a/scripts/requirements.txt b/scripts/requirements.txt index b30bc7c8..fa4fb186 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -8,7 +8,6 @@ Levenshtein numpy>=1.26 # Python 3.12 support opencv-python-headless>=4.9.0.80 # Typing fixes packaging -Pillow>=10.0 # Python 3.12 support psutil>=5.9.6 # Python 3.12 fixes # PyAutoGUI # See install.ps1 PyWinCtl>=0.0.42 # py.typed diff --git a/src/AutoSplit.py b/src/AutoSplit.py index 3ca9bf52..ff5ece5b 100644 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -283,7 +283,7 @@ def __reload_start_image(self, started_by_button: bool = False, wait_for_delay: self.timer_start_image.stop() self.current_image_file_label.setText("-") self.start_image_status_value_label.setText("not found") - if self.current_split_image.text is None: + if not self.current_split_image.text: set_preview_image(self.current_split_image, None) if not (validate_before_parsing(self, started_by_button) and parse_and_validate_images(self)): @@ -875,7 +875,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): # Get split image self.split_image = specific_image or self.split_images_and_loop_number[0 + self.split_image_number][0] - if self.split_image.text is None and is_valid_image(self.split_image.byte_array): + if not self.split_image.text and is_valid_image(self.split_image.byte_array): set_preview_image(self.current_split_image, self.split_image.byte_array) self.current_image_file_label.setText(self.split_image.filename) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index a948d5fa..d0422790 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -40,8 +40,8 @@ class AutoSplitImage: fps: int image_type: ImageType byte_array: MatLike | None = None - text: str | None = None mask: MatLike | None = None + text: str # This value is internal, check for mask instead _has_transparency = False # These values should be overriden by some Defaults if None. Use getters instead @@ -95,6 +95,11 @@ def __init__(self, path: str): self.__comparison_method = comparison_method_from_filename(self.filename) self.__pause_time = pause_from_filename(self.filename) self.__similarity_threshold = threshold_from_filename(self.filename) + self.__x = 0 + self.__xx = 0 + self.__y = 0 + self.__yy = 0 + self.text = "" if path.endswith("txt"): self.fps = fps_from_filename(self.filename) self.__read_text(path) diff --git a/src/compare.py b/src/compare.py index c6d8fed8..64865e3d 100644 --- a/src/compare.py +++ b/src/compare.py @@ -150,7 +150,7 @@ def extract_and_compare_text(capture: MatLike, text: str): # otherwise the levenshtein ratio is calculated between the two strings and gets returned. # TODO: easyocr vs. pytesseract? # image_string = " ".join(reader.readtext(capture, detail=0)).lower().strip() - image_string = pytesseract.image_to_string(Image.fromarray(capture), config="--oem 1 --psm 6").lower().strip() + image_string = str(pytesseract.image_to_string(Image.fromarray(capture), config="--oem 1 --psm 6")).lower().strip() ratio = 1.0 if text in image_string else Levenshtein.ratio(text, image_string) # TODO: debug: remove me diff --git a/src/split_parser.py b/src/split_parser.py index ed0779c0..624ccfca 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -63,10 +63,7 @@ def region_from_filename(filename: str): """ # Check to make sure there is a valid region string between # plus signs of the filename - value = __value_from_filename(filename, "++", "") - - # Check to make sure if it is a valid threshold - return value if value else None + return __value_from_filename(filename, "++", "") def fps_from_filename(filename: str): From 89983603acaac7c78d8dfb4b765435fe0670cc45 Mon Sep 17 00:00:00 2001 From: ston1th Date: Thu, 18 Jan 2024 00:08:22 +0100 Subject: [PATCH 06/26] use pathlib to read file --- src/AutoSplitImage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index d0422790..fe3ed9f5 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,6 +1,7 @@ import os from enum import IntEnum, auto from math import sqrt +from pathlib import Path from typing import TYPE_CHECKING import cv2 @@ -124,8 +125,7 @@ def __region(self, region: str): self.__yy = int(r[3]) def __read_text(self, path: str): - with open(path, encoding="utf-8") as f: - self.text = f.read().lower().strip() + self.text = Path(path).read_text(encoding="utf-8").lower().strip() def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) From f25fc7e475d042ce7bcf7dadb549421b1a266f2e Mon Sep 17 00:00:00 2001 From: ston1th Date: Thu, 18 Jan 2024 00:11:48 +0100 Subject: [PATCH 07/26] fix str None comparison --- src/split_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/split_parser.py b/src/split_parser.py index 624ccfca..b9964290 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -235,7 +235,7 @@ def parse_and_validate_images(autosplit: "AutoSplit"): else: for image in split_images: # Test for image without transparency - if image.text is None and not is_valid_image(image.byte_array): + if not image.text and not is_valid_image(image.byte_array): def image_validity(filename: str): return lambda: error_messages.image_validity(filename) From 2f8d298aef1bfae861b8d52222137b6f3be8f3e7 Mon Sep 17 00:00:00 2001 From: ston1th Date: Sat, 3 Feb 2024 16:11:40 +0100 Subject: [PATCH 08/26] rewrite some stuff * rewrite text files to contain the rectangle position * switch to easyocr since there was no way to use pytesseract or tesserocr reliably without PIL * display text that is searched for * set default FPS limit for OCR to 1 * minor fixes --- README.md | 26 ++++++++++++++++------ scripts/install.ps1 | 2 +- scripts/requirements.txt | 4 +++- src/AutoSplit.py | 8 ++++--- src/AutoSplitImage.py | 47 +++++++++++++++++++++------------------- src/compare.py | 31 ++++++++++++-------------- src/split_parser.py | 29 +------------------------ 7 files changed, 68 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 15f8f4c6..d52d5829 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,6 @@ This program can be used to automatically start, split, and reset your preferred - Wayland is not currently supported - WSL2/WSLg requires an additional Desktop Environment, external X11 server, and/or systemd - Python 3.10+ (Not required for normal use. Refer to the [build instructions](/docs/build%20instructions.md) if you'd like run the application directly in Python). -- Tesseract-OCR (optional; requierd for text recognition as an alternative comparison method). See https://github.com/UB-Mannheim/tesseract/wiki for installation instructions. ## OPTIONS @@ -194,8 +193,6 @@ This option is mainly meant to be toggled with the `Toggle auto Reset Image` hot - **Custom thresholds** are place between parenthesis `()` in the filename. This value will override the default threshold. - **Custom pause times** are placed between square brackets `[]` in the filename. This value will override the default pause time. - **Custom delay times** are placed between hash signs `##` in the filename. Note that these are in milliseconds. For example, a 10 second split delay would be `#10000#`. You cannot skip or undo splits during split delays. -- **Custom comparison FPS** is placed between exclamation marks `!!` in the filename. This value will override the default FPS for the given image or text file and does also apply to the reset image. -- **Custom rectangle position** is placed between plus signs `++` in the filename. Note that these will only apply for text files when using text recognition (.txt). The scheme looks like this: `+740-1180-60-150+`. These are the X and Y coordinates in the image to draw a rectangle. They are seperated by the minus sign `-` and follow the form `+X-X-Y-Y+`. The second X and Y values need to be bigger then the first ones. You will need to adjust these values depending on your capture resolution. - A different **comparison method** can be specified with their 0-base index between carets `^^`: - `^0^`: L2 Norm - `^1^`: Histogram @@ -237,13 +234,28 @@ Place the expected text in the text file that should be looked for. An example file name and content could look like this: -Filename: `001_start_auto_splitter+275-540-70-95+.txt` +Filename: `001_start_auto_splitter.txt` -Content: `complete any 2 encounters` +Content: -This will look for the text `complete any 2 encounters` at the capture position `+275-540-70-95+`. +``` +texts = ["complete any 2 encounters"] +top_left = 275 +top_right = 540 +bottom_left = 70 +bottom_right = 95 +fps_limit = 1 +``` -Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the limit option `!1!` in the file name. +The `texts` field is an array and can take more than one text to look for: + +``` +texts = ["look for me", "or this text"] +``` + +The `top`, `bottom`, `left` and `right` options define a rectangle where the text you are looking for is expected to appear in the image. + +Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option. The size of the selected rectangle can also impact the CPU load (bigger = more CPU load). ### Profiles diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 118e5bc9..6072c5a3 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -76,7 +76,7 @@ If ($IsLinux) { # Pillow, pygetwindow, pymsgbox, pytweening, MouseInfo are picked up by PySide6 # (also --exclude from build script, but more consistent with unfrozen run) &"$python" -m pip uninstall pyscreenshot mss pygetwindow pymsgbox pytweening MouseInfo -y -If ($IsWindows) { &"$python" -m pip uninstall pyscreeze -y } +If ($IsWindows) { &"$python" -m pip uninstall pyscreeze Pillow -y } # Don't compile resources on the Build CI job as it'll do so in build script If ($dev) { diff --git a/scripts/requirements.txt b/scripts/requirements.txt index fa4fb186..ec77bee5 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -3,6 +3,7 @@ # Read /docs/build%20instructions.md for more information on how to install, run and build the python code. # # Dependencies: +easyocr git+https://github.com/boppreh/keyboard.git#egg=keyboard # Fix install on macos and linux-ci https://github.com/boppreh/keyboard/pull/568 Levenshtein numpy>=1.26 # Python 3.12 support @@ -14,9 +15,10 @@ PyWinCtl>=0.0.42 # py.typed # When needed, dev builds can be found at https://download.qt.io/snapshots/ci/pyside/dev?C=M;O=D PySide6-Essentials>=6.6.0 ; sys_platform == 'win32' # Python 3.12 support PySide6-Essentials<6.5.1 ; sys_platform == 'linux' # Wayland issue on Ubuntu 22.04 https://bugreports.qt.io/browse/QTBUG-114635 -pytesseract scipy>=1.11.2 # Python 3.12 support toml +torch +torchvision typing-extensions>=4.4.0 # @override decorator support # # Build and compile resources diff --git a/src/AutoSplit.py b/src/AutoSplit.py index ff5ece5b..51e16cde 100644 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -283,8 +283,7 @@ def __reload_start_image(self, started_by_button: bool = False, wait_for_delay: self.timer_start_image.stop() self.current_image_file_label.setText("-") self.start_image_status_value_label.setText("not found") - if not self.current_split_image.text: - set_preview_image(self.current_split_image, None) + set_preview_image(self.current_split_image, None) if not (validate_before_parsing(self, started_by_button) and parse_and_validate_images(self)): QApplication.processEvents() @@ -875,7 +874,10 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): # Get split image self.split_image = specific_image or self.split_images_and_loop_number[0 + self.split_image_number][0] - if not self.split_image.text and is_valid_image(self.split_image.byte_array): + if self.split_image.ocr: + text = "\nor\n".join(self.split_image.texts) + self.current_split_image.setText(f"Looking for OCR text:\n{text}") + elif is_valid_image(self.split_image.byte_array): set_preview_image(self.current_split_image, self.split_image.byte_array) self.current_image_file_label.setText(self.split_image.filename) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index fe3ed9f5..78a21508 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,4 +1,5 @@ import os +import tomllib from enum import IntEnum, auto from math import sqrt from pathlib import Path @@ -42,7 +43,8 @@ class AutoSplitImage: image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None - text: str + texts: [] + ocr: bool # This value is internal, check for mask instead _has_transparency = False # These values should be overriden by some Defaults if None. Use getters instead @@ -100,11 +102,12 @@ def __init__(self, path: str): self.__xx = 0 self.__y = 0 self.__yy = 0 - self.text = "" + self.texts = [] + self.fps = 0 + self.ocr = False if path.endswith("txt"): - self.fps = fps_from_filename(self.filename) - self.__read_text(path) - self.__region(region_from_filename(self.filename)) + self.ocr = True + self.__parse_text_file(path) else: self.__read_image_bytes(path) @@ -115,17 +118,17 @@ def __init__(self, path: str): else: self.image_type = ImageType.SPLIT - def __region(self, region: str): - r = region.split("-") - if len(r) != 4: # noqa: PLR2004 - return - self.__x = int(r[0]) - self.__xx = int(r[1]) - self.__y = int(r[2]) - self.__yy = int(r[3]) - - def __read_text(self, path: str): - self.text = Path(path).read_text(encoding="utf-8").lower().strip() + def __parse_text_file(self, path: str): + with open(path, "rb") as f: + data = tomllib.load(f) + self.texts = data["texts"] + self.__x = data["top_left"] + self.__xx = data["top_right"] + self.__y = data["bottom_left"] + self.__yy = data["bottom_right"] + self.fps = 1 + if "fps_limit" in data: + self.fps = data["fps_limit"] def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) @@ -169,11 +172,13 @@ def compare_with_capture( default: "AutoSplit | int", capture: MatLike | None, ): - """Extract image text from rectangle position and compare it with the expected string.""" - if self.text is not None: - return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.text) + """ + Compare image with capture using image's comparison method. Falls back to combobox. + For OCR text files: extract image text from rectangle position and compare it with the expected string. + """ + if self.ocr: + return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.texts) - """Compare image with capture using image's comparison method. Falls back to combobox.""" if not is_valid_image(self.byte_array) or not is_valid_image(capture): return 0.0 resized_capture = cv2.resize(capture, self.byte_array.shape[1::-1]) @@ -193,9 +198,7 @@ def compare_with_capture( comparison_method_from_filename, delay_time_from_filename, flags_from_filename, - fps_from_filename, loop_from_filename, pause_from_filename, - region_from_filename, threshold_from_filename, ) diff --git a/src/compare.py b/src/compare.py index 64865e3d..1d46430f 100644 --- a/src/compare.py +++ b/src/compare.py @@ -4,21 +4,14 @@ import Levenshtein import numpy as np -# TODO: easyocr vs. pytesseract? -# tesseract seems to cause less overall CPU load -# from easyocr import Reader -# reader = Reader(["en"], gpu=False, verbose=False, download_enabled=False) -import pytesseract +from easyocr import Reader +OCR = Reader(["en"], gpu=False, verbose=False, download_enabled=False) + from cv2.typing import MatLike -from PIL import Image from scipy import fft from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image -# TODO: make me configureable -pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract" - - MAXRANGE = MAXBYTE + 1 CHANNELS = [ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value] HISTOGRAM_SIZE = [8, 8, 8] @@ -138,21 +131,25 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) -def extract_and_compare_text(capture: MatLike, text: str): +def extract_and_compare_text(capture: MatLike, texts): """ Compares the extracted text of the given image and returns the similarity between the two texts. + The best match of all texts is returned. @param capture: Image of any given shape as a numpy array - @param text: a string to match for + @param texts: a list of strings to match for @return: The similarity between the text in the image and the text supplied as a number 0 to 1. """ # if the string is found 1:1 in the string extracted from the image a 1 is returned. # otherwise the levenshtein ratio is calculated between the two strings and gets returned. - # TODO: easyocr vs. pytesseract? - # image_string = " ".join(reader.readtext(capture, detail=0)).lower().strip() - image_string = str(pytesseract.image_to_string(Image.fromarray(capture), config="--oem 1 --psm 6")).lower().strip() - - ratio = 1.0 if text in image_string else Levenshtein.ratio(text, image_string) + image_string = "".join(OCR.readtext(capture, detail=0)).lower().strip() + + ratio = 0.0 + for text in texts: + if text in image_string: + ratio = 1.0 + break + ratio = max(ratio, Levenshtein.ratio(text, image_string)) # TODO: debug: remove me if ratio > 0.9: # noqa: PLR2004 print(f"text from image ({ratio:,.2f}): {image_string}") diff --git a/src/split_parser.py b/src/split_parser.py index b9964290..9f08c278 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -54,33 +54,6 @@ def threshold_from_filename(filename: str): return value if 0 <= value <= 1 else None -def region_from_filename(filename: str): - """ - Retrieve the capture region from the filename. - - @param filename: String containing the file's name - @return: A region string, if not then None - """ - # Check to make sure there is a valid region string between - # plus signs of the filename - return __value_from_filename(filename, "++", "") - - -def fps_from_filename(filename: str): - """ - Retrieve the FPS specifix to the split file. - - @param filename: String containing the file's name - @return: A FPS int value between 0 and 60. A value of 0 indictaes to use the global FPS value. - """ - # Check to make sure there is a valid number between - # exclamation marks of the filename - value = __value_from_filename(filename, "!!", 0) - - # Check to make sure if it is a valid threshold - return value if value >= 0 and value <= 60 else 0 # noqa: PLR2004 - - def pause_from_filename(filename: str): """ Retrieve the pause time from the filename, if there is no pause time or the pause time @@ -235,7 +208,7 @@ def parse_and_validate_images(autosplit: "AutoSplit"): else: for image in split_images: # Test for image without transparency - if not image.text and not is_valid_image(image.byte_array): + if not image.ocr and not is_valid_image(image.byte_array): def image_validity(filename: str): return lambda: error_messages.image_validity(filename) From fddd0aed354e652791a86e6406ff77d8bfe7f7de Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 Feb 2024 14:12:06 +0000 Subject: [PATCH 09/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/AutoSplitImage.py | 3 +-- src/compare.py | 9 ++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 78a21508..946180f1 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,12 +1,11 @@ import os -import tomllib from enum import IntEnum, auto from math import sqrt -from pathlib import Path from typing import TYPE_CHECKING import cv2 import numpy as np +import tomllib from cv2.typing import MatLike import error_messages diff --git a/src/compare.py b/src/compare.py index 1d46430f..e7fe93ff 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,16 +1,15 @@ +from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image +from scipy import fft +from cv2.typing import MatLike from math import sqrt import cv2 import Levenshtein import numpy as np - from easyocr import Reader -OCR = Reader(["en"], gpu=False, verbose=False, download_enabled=False) -from cv2.typing import MatLike -from scipy import fft +OCR = Reader(["en"], gpu=False, verbose=False, download_enabled=False) -from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image MAXRANGE = MAXBYTE + 1 CHANNELS = [ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value] From cd2c212fbbbd5a6032372bb17bf4607b03cbd440 Mon Sep 17 00:00:00 2001 From: ston1th Date: Sat, 3 Feb 2024 19:49:51 +0100 Subject: [PATCH 10/26] switch back to tesseract * switch back to tesseract * ditch all python binding libraries to not include Pillow * call tesseract ourselfs --- README.md | 1 + scripts/requirements.txt | 3 --- src/AutoSplitImage.py | 6 +++--- src/compare.py | 46 ++++++++++++++++++++++++++++++++++------ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index d52d5829..29ac5750 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ This program can be used to automatically start, split, and reset your preferred - Wayland is not currently supported - WSL2/WSLg requires an additional Desktop Environment, external X11 server, and/or systemd - Python 3.10+ (Not required for normal use. Refer to the [build instructions](/docs/build%20instructions.md) if you'd like run the application directly in Python). +- Tesseract-OCR (optional; requierd for text recognition as an alternative comparison method). See https://github.com/UB-Mannheim/tesseract/wiki for installation instructions. ## OPTIONS diff --git a/scripts/requirements.txt b/scripts/requirements.txt index ec77bee5..5a3e59e9 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -3,7 +3,6 @@ # Read /docs/build%20instructions.md for more information on how to install, run and build the python code. # # Dependencies: -easyocr git+https://github.com/boppreh/keyboard.git#egg=keyboard # Fix install on macos and linux-ci https://github.com/boppreh/keyboard/pull/568 Levenshtein numpy>=1.26 # Python 3.12 support @@ -17,8 +16,6 @@ PySide6-Essentials>=6.6.0 ; sys_platform == 'win32' # Python 3.12 support PySide6-Essentials<6.5.1 ; sys_platform == 'linux' # Wayland issue on Ubuntu 22.04 https://bugreports.qt.io/browse/QTBUG-114635 scipy>=1.11.2 # Python 3.12 support toml -torch -torchvision typing-extensions>=4.4.0 # @override decorator support # # Build and compile resources diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 946180f1..5f8092a6 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,11 +1,11 @@ import os +import tomllib from enum import IntEnum, auto from math import sqrt from typing import TYPE_CHECKING import cv2 import numpy as np -import tomllib from cv2.typing import MatLike import error_messages @@ -42,7 +42,7 @@ class AutoSplitImage: image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None - texts: [] + texts: list[str] ocr: bool # This value is internal, check for mask instead _has_transparency = False @@ -101,7 +101,7 @@ def __init__(self, path: str): self.__xx = 0 self.__y = 0 self.__yy = 0 - self.texts = [] + self.texts = list[str]() self.fps = 0 self.ocr = False if path.endswith("txt"): diff --git a/src/compare.py b/src/compare.py index e7fe93ff..e0b97724 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,15 +1,16 @@ -from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image -from scipy import fft -from cv2.typing import MatLike from math import sqrt import cv2 import Levenshtein import numpy as np -from easyocr import Reader -OCR = Reader(["en"], gpu=False, verbose=False, download_enabled=False) +import subprocess +from os import environ +from cv2.typing import MatLike +from scipy import fft + +from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image MAXRANGE = MAXBYTE + 1 CHANNELS = [ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value] @@ -17,6 +18,9 @@ RANGES = [0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE] MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE +# TODO: use PATH variable +TESSERACT_CMD = [r'C:\Program Files\Tesseract-OCR\tesseract', '-', '-', '--oem', '1', '--psm', '6'] +DEFAULT_ENCODING = "utf-8" def compare_histograms(source: MatLike, capture: MatLike, mask: MatLike | None = None): """ @@ -130,7 +134,35 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) -def extract_and_compare_text(capture: MatLike, texts): +# copied from https://github.com/madmaze/pytesseract +def subprocess_args(): + # See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess + # for reference and comments. + + kwargs = { + 'stdin': subprocess.PIPE, + 'stdout': subprocess.PIPE, + 'stderr': subprocess.DEVNULL, + 'startupinfo': None, + 'env': environ, + } + + if hasattr(subprocess, 'STARTUPINFO'): + kwargs['startupinfo'] = subprocess.STARTUPINFO() + kwargs['startupinfo'].dwFlags |= subprocess.STARTF_USESHOWWINDOW + kwargs['startupinfo'].wShowWindow = subprocess.SW_HIDE + + return kwargs + + +def run_tesseract(capture: MatLike): + png = np.array(cv2.imencode('.png', capture)[1]).tobytes() + p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) + output = p.communicate(input=png)[0] + return output.decode(DEFAULT_ENCODING) + + +def extract_and_compare_text(capture: MatLike, texts: list[str]): """ Compares the extracted text of the given image and returns the similarity between the two texts. The best match of all texts is returned. @@ -141,7 +173,7 @@ def extract_and_compare_text(capture: MatLike, texts): """ # if the string is found 1:1 in the string extracted from the image a 1 is returned. # otherwise the levenshtein ratio is calculated between the two strings and gets returned. - image_string = "".join(OCR.readtext(capture, detail=0)).lower().strip() + image_string = run_tesseract(capture).lower().strip() ratio = 0.0 for text in texts: From 80140cbd407e9512a1d6048f88690be637e8f155 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 Feb 2024 17:51:45 +0000 Subject: [PATCH 11/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/AutoSplitImage.py | 2 +- src/compare.py | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 5f8092a6..1fe22c53 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,11 +1,11 @@ import os -import tomllib from enum import IntEnum, auto from math import sqrt from typing import TYPE_CHECKING import cv2 import numpy as np +import tomllib from cv2.typing import MatLike import error_messages diff --git a/src/compare.py b/src/compare.py index e0b97724..b83a7290 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,12 +1,10 @@ +import subprocess from math import sqrt +from os import environ import cv2 import Levenshtein import numpy as np - -import subprocess -from os import environ - from cv2.typing import MatLike from scipy import fft @@ -19,9 +17,10 @@ MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE # TODO: use PATH variable -TESSERACT_CMD = [r'C:\Program Files\Tesseract-OCR\tesseract', '-', '-', '--oem', '1', '--psm', '6'] +TESSERACT_CMD = [r"C:\Program Files\Tesseract-OCR\tesseract", "-", "-", "--oem", "1", "--psm", "6"] DEFAULT_ENCODING = "utf-8" + def compare_histograms(source: MatLike, capture: MatLike, mask: MatLike | None = None): """ Compares two images by calculating their histograms, normalizing @@ -140,23 +139,23 @@ def subprocess_args(): # for reference and comments. kwargs = { - 'stdin': subprocess.PIPE, - 'stdout': subprocess.PIPE, - 'stderr': subprocess.DEVNULL, - 'startupinfo': None, - 'env': environ, + "stdin": subprocess.PIPE, + "stdout": subprocess.PIPE, + "stderr": subprocess.DEVNULL, + "startupinfo": None, + "env": environ, } - if hasattr(subprocess, 'STARTUPINFO'): - kwargs['startupinfo'] = subprocess.STARTUPINFO() - kwargs['startupinfo'].dwFlags |= subprocess.STARTF_USESHOWWINDOW - kwargs['startupinfo'].wShowWindow = subprocess.SW_HIDE + if hasattr(subprocess, "STARTUPINFO"): + kwargs["startupinfo"] = subprocess.STARTUPINFO() + kwargs["startupinfo"].dwFlags |= subprocess.STARTF_USESHOWWINDOW + kwargs["startupinfo"].wShowWindow = subprocess.SW_HIDE return kwargs def run_tesseract(capture: MatLike): - png = np.array(cv2.imencode('.png', capture)[1]).tobytes() + png = np.array(cv2.imencode(".png", capture)[1]).tobytes() p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) output = p.communicate(input=png)[0] return output.decode(DEFAULT_ENCODING) From feeb58ead4ccaacc4d25f7beb3cc9917fdc49523 Mon Sep 17 00:00:00 2001 From: ston1th Date: Sun, 4 Feb 2024 11:51:35 +0100 Subject: [PATCH 12/26] internal logic changes * moved some code around * implemented fps_limit getter * switch to PATH variable use * minor fixes --- README.md | 13 +++++++++++- src/AutoSplit.py | 15 +++++--------- src/AutoSplitImage.py | 43 +++++++++++++++++++++++++-------------- src/compare.py | 47 +++++++++---------------------------------- src/split_parser.py | 2 +- src/utils.py | 40 ++++++++++++++++++++++++++++++++++++ 6 files changed, 95 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 29ac5750..a40c728a 100644 --- a/README.md +++ b/README.md @@ -230,8 +230,19 @@ The Start Image is similar to the Reset Image. You can only have one Start Image ### Text Recognition (OCR) You can use text recognition as an alternative comparison method. +First you need to install tesseract and include it in your PATH variable. See [Compatibility](#Compatibility) above. + +To include tesseract in your PATH variable you can use this powershell snippet. + +Note: change the `$tesseract_path` variable to the location where tesseract is installed. + +``` +$path = [System.Environment]::GetEnvironmentVariable("Path", "User") +$tesseract_path = "C:\Program Files\Tesseract-OCR" +[System.Environment]::SetEnvironmentVariable("Path", "$path;$tesseract_path", "User") +``` + To use this feature you need to place a text file (.txt) in your splits folder instead of an image file. -Place the expected text in the text file that should be looked for. An example file name and content could look like this: diff --git a/src/AutoSplit.py b/src/AutoSplit.py index be8e1a89..f223dced 100644 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -307,10 +307,8 @@ def __reload_start_image(self, started_by_button: bool = False, wait_for_delay: self.highest_similarity = 0.0 self.reset_highest_similarity = 0.0 self.split_below_threshold = False - start_image_fps = self.settings_dict["fps_limit"] - if self.start_image.fps != 0: - start_image_fps = self.start_image.fps - self.timer_start_image.start(int(ONE_SECOND / start_image_fps)) + + self.timer_start_image.start(int(ONE_SECOND / self.start_image.get_fps_limit(self))) QApplication.processEvents() @@ -685,12 +683,8 @@ def __similarity_threshold_loop(self, number_of_split_images: int, dummy_splits_ self.undo_split_button.setEnabled(self.split_image_number != 0) QApplication.processEvents() - fps = self.settings_dict["fps_limit"] - if self.split_image.fps != 0: - fps = self.split_image.fps - # Limit the number of time the comparison runs to reduce cpu usage - frame_interval = 1 / fps + frame_interval = 1 / self.split_image.get_fps_limit(self) # Use a time delta to have a consistant check interval wait_delta_ms = int((frame_interval - (time() - start) % frame_interval) * ONE_SECOND) @@ -874,7 +868,8 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): # Get split image self.split_image = specific_image or self.split_images_and_loop_number[0 + self.split_image_number][0] - if self.split_image.ocr: + if self.split_image.is_ocr: + # TODO: test if setText clears a set image text = "\nor\n".join(self.split_image.texts) self.current_split_image.setText(f"Looking for OCR text:\n{text}") elif is_valid_image(self.split_image.byte_array): diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 1fe22c53..88d43c91 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -5,11 +5,11 @@ import cv2 import numpy as np -import tomllib +import toml from cv2.typing import MatLike import error_messages -from compare import check_if_image_has_transparency, get_comparison_method_by_index +from compare import check_if_image_has_transparency, extract_and_compare_text, get_comparison_method_by_index from utils import BGR_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image if TYPE_CHECKING: @@ -38,12 +38,10 @@ class AutoSplitImage: filename: str flags: int loops: int - fps: int image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None texts: list[str] - ocr: bool # This value is internal, check for mask instead _has_transparency = False # These values should be overriden by some Defaults if None. Use getters instead @@ -55,6 +53,15 @@ class AutoSplitImage: __xx: int __y: int __yy: int + __fps_limit: int + + @property + def is_ocr(self): + """ + Whether a "split image" is actually for Optical Text Recognition + based on whether there's any text strings to search for. + """ + return bool(self.texts) def get_delay_time(self, default: "AutoSplit | int"): """Get image's delay time or fallback to the default value from spinbox.""" @@ -88,11 +95,18 @@ def get_similarity_threshold(self, default: "AutoSplit | float"): return default return default.settings_dict["default_similarity_threshold"] + def get_fps_limit(self, default: "AutoSplit"): + """Get image's fps limit or fallback to the default value from spinbox.""" + if self.__fps_limit != 0: + return self.__fps_limit + return default.settings_dict["fps_limit"] + def __init__(self, path: str): self.path = path self.filename = os.path.split(path)[-1].lower() self.flags = flags_from_filename(self.filename) self.loops = loop_from_filename(self.filename) + self.texts = list[str]() self.__delay_time = delay_time_from_filename(self.filename) self.__comparison_method = comparison_method_from_filename(self.filename) self.__pause_time = pause_from_filename(self.filename) @@ -101,11 +115,8 @@ def __init__(self, path: str): self.__xx = 0 self.__y = 0 self.__yy = 0 - self.texts = list[str]() - self.fps = 0 - self.ocr = False + self.__fps_limit = 0 if path.endswith("txt"): - self.ocr = True self.__parse_text_file(path) else: self.__read_image_bytes(path) @@ -118,16 +129,16 @@ def __init__(self, path: str): self.image_type = ImageType.SPLIT def __parse_text_file(self, path: str): - with open(path, "rb") as f: - data = tomllib.load(f) + with open(path, "r") as f: + data = toml.load(f) self.texts = data["texts"] self.__x = data["top_left"] self.__xx = data["top_right"] self.__y = data["bottom_left"] self.__yy = data["bottom_right"] - self.fps = 1 + self.__fps_limit = 1 if "fps_limit" in data: - self.fps = data["fps_limit"] + self.fps_limit = data["fps_limit"] def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) @@ -175,10 +186,13 @@ def compare_with_capture( Compare image with capture using image's comparison method. Falls back to combobox. For OCR text files: extract image text from rectangle position and compare it with the expected string. """ - if self.ocr: + if not is_valid_image(capture): + return 0.0 + + if self.is_ocr: return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.texts) - if not is_valid_image(self.byte_array) or not is_valid_image(capture): + if not is_valid_image(self.byte_array): return 0.0 resized_capture = cv2.resize(capture, self.byte_array.shape[1::-1]) @@ -192,7 +206,6 @@ def compare_with_capture( if True: - from compare import extract_and_compare_text from split_parser import ( comparison_method_from_filename, delay_time_from_filename, diff --git a/src/compare.py b/src/compare.py index b83a7290..d9f6de1a 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,6 +1,4 @@ -import subprocess from math import sqrt -from os import environ import cv2 import Levenshtein @@ -8,7 +6,7 @@ from cv2.typing import MatLike from scipy import fft -from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image +from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image, run_tesseract MAXRANGE = MAXBYTE + 1 CHANNELS = [ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value] @@ -16,10 +14,6 @@ RANGES = [0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE] MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE -# TODO: use PATH variable -TESSERACT_CMD = [r"C:\Program Files\Tesseract-OCR\tesseract", "-", "-", "--oem", "1", "--psm", "6"] -DEFAULT_ENCODING = "utf-8" - def compare_histograms(source: MatLike, capture: MatLike, mask: MatLike | None = None): """ @@ -133,34 +127,6 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) -# copied from https://github.com/madmaze/pytesseract -def subprocess_args(): - # See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess - # for reference and comments. - - kwargs = { - "stdin": subprocess.PIPE, - "stdout": subprocess.PIPE, - "stderr": subprocess.DEVNULL, - "startupinfo": None, - "env": environ, - } - - if hasattr(subprocess, "STARTUPINFO"): - kwargs["startupinfo"] = subprocess.STARTUPINFO() - kwargs["startupinfo"].dwFlags |= subprocess.STARTF_USESHOWWINDOW - kwargs["startupinfo"].wShowWindow = subprocess.SW_HIDE - - return kwargs - - -def run_tesseract(capture: MatLike): - png = np.array(cv2.imencode(".png", capture)[1]).tobytes() - p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) - output = p.communicate(input=png)[0] - return output.decode(DEFAULT_ENCODING) - - def extract_and_compare_text(capture: MatLike, texts: list[str]): """ Compares the extracted text of the given image and returns the similarity between the two texts. @@ -170,12 +136,17 @@ def extract_and_compare_text(capture: MatLike, texts: list[str]): @param texts: a list of strings to match for @return: The similarity between the text in the image and the text supplied as a number 0 to 1. """ - # if the string is found 1:1 in the string extracted from the image a 1 is returned. - # otherwise the levenshtein ratio is calculated between the two strings and gets returned. - image_string = run_tesseract(capture).lower().strip() + png = np.array(cv2.imencode(".png", capture)[1]).tobytes() + # If the string is found 1:1 in the string extracted from the image a 1 is returned. + # Otherwise the levenshtein ratio is calculated between the two strings and gets returned. + # Especially with stylised characters, OCR could conceivably get the right letter, but mix up the casing (m/M, o/O, t/T, etc.) + image_string = run_tesseract(png).lower().strip() ratio = 0.0 for text in texts: + # TODO: this 1:1 matching could lead to false positives + # maybe remove it and only rely on fuzzy matching? + # discussion: https://github.com/Toufool/AutoSplit/pull/272#discussion_r1477120477 if text in image_string: ratio = 1.0 break diff --git a/src/split_parser.py b/src/split_parser.py index b72b752b..38a202d8 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -209,7 +209,7 @@ def parse_and_validate_images(autosplit: "AutoSplit"): else: for image in split_images: # Test for image without transparency - if not image.ocr and not is_valid_image(image.byte_array): + if not image.is_ocr and not is_valid_image(image.byte_array): error_message = partial(error_messages.image_validity, image.filename) break diff --git a/src/utils.py b/src/utils.py index f8b492ac..420e47f7 100644 --- a/src/utils.py +++ b/src/utils.py @@ -5,6 +5,7 @@ from enum import IntEnum from functools import partial from itertools import chain +from os import environ from platform import version from threading import Thread from typing import TYPE_CHECKING, Any, TypeGuard, TypeVar @@ -37,6 +38,9 @@ T = TypeVar("T") +TESSERACT_CMD = ["tesseract", "-", "-", "--oem", "1", "--psm", "6"] +DEFAULT_ENCODING = "utf-8" + DWMWA_EXTENDED_FRAME_BOUNDS = 9 MAXBYTE = 255 ONE_SECOND = 1000 @@ -209,6 +213,42 @@ def flatten(nested_iterable: Iterable[Iterable[T]]) -> chain[T]: return chain.from_iterable(nested_iterable) +def subprocess_args(): + """ + See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess + for reference and comments. + + This code snippet was copied from https://github.com/madmaze/pytesseract + """ + + kwargs = { + "stdin": subprocess.PIPE, + "stdout": subprocess.PIPE, + "stderr": subprocess.DEVNULL, + "startupinfo": None, + "env": environ, + } + + if hasattr(subprocess, "STARTUPINFO"): + kwargs["startupinfo"] = subprocess.STARTUPINFO() + kwargs["startupinfo"].dwFlags |= subprocess.STARTF_USESHOWWINDOW + kwargs["startupinfo"].wShowWindow = subprocess.SW_HIDE + + return kwargs + + +def run_tesseract(png: bytes): + """ + Executes the tesseract CLI and pipes a PNG encoded image to it. + + @param capture: PNG encoded image + @return: The recognized output string from tesseract + """ + p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) # noqa: S603 + output = p.communicate(input=png)[0] + return output.decode(DEFAULT_ENCODING) + + # Environment specifics WINDOWS_BUILD_NUMBER = int(version().split(".")[-1]) if sys.platform == "win32" else -1 FIRST_WIN_11_BUILD = 22000 From fb8ed6f4180a76710545fc16c81fb89daa663ac0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 09:52:14 +0000 Subject: [PATCH 13/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/AutoSplitImage.py | 2 +- src/compare.py | 3 ++- src/utils.py | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 88d43c91..cb86d31a 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -129,7 +129,7 @@ def __init__(self, path: str): self.image_type = ImageType.SPLIT def __parse_text_file(self, path: str): - with open(path, "r") as f: + with open(path) as f: data = toml.load(f) self.texts = data["texts"] self.__x = data["top_left"] diff --git a/src/compare.py b/src/compare.py index d9f6de1a..027be0c7 100644 --- a/src/compare.py +++ b/src/compare.py @@ -139,7 +139,8 @@ def extract_and_compare_text(capture: MatLike, texts: list[str]): png = np.array(cv2.imencode(".png", capture)[1]).tobytes() # If the string is found 1:1 in the string extracted from the image a 1 is returned. # Otherwise the levenshtein ratio is calculated between the two strings and gets returned. - # Especially with stylised characters, OCR could conceivably get the right letter, but mix up the casing (m/M, o/O, t/T, etc.) + # Especially with stylised characters, OCR could conceivably get the right + # letter, but mix up the casing (m/M, o/O, t/T, etc.) image_string = run_tesseract(png).lower().strip() ratio = 0.0 diff --git a/src/utils.py b/src/utils.py index 420e47f7..5c215da7 100644 --- a/src/utils.py +++ b/src/utils.py @@ -220,7 +220,6 @@ def subprocess_args(): This code snippet was copied from https://github.com/madmaze/pytesseract """ - kwargs = { "stdin": subprocess.PIPE, "stdout": subprocess.PIPE, @@ -244,7 +243,7 @@ def run_tesseract(png: bytes): @param capture: PNG encoded image @return: The recognized output string from tesseract """ - p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) # noqa: S603 + p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) output = p.communicate(input=png)[0] return output.decode(DEFAULT_ENCODING) From da830c6edffbe2256f80727f55af97d48854330c Mon Sep 17 00:00:00 2001 From: ston1th Date: Sun, 4 Feb 2024 12:03:03 +0100 Subject: [PATCH 14/26] import subprocess globally --- src/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/utils.py b/src/utils.py index 5c215da7..7db9ddfc 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,5 +1,6 @@ import asyncio import os +import subprocess # noqa: PLC0415, S404 import sys from collections.abc import Callable, Iterable from enum import IntEnum @@ -130,8 +131,6 @@ def open_file(file_path: str | bytes | os.PathLike[str] | os.PathLike[bytes]): if sys.platform == "win32": os.startfile(file_path) # noqa: S606 else: - import subprocess # noqa: PLC0415, S404 - opener = "xdg-open" if sys.platform == "linux" else "open" subprocess.call([opener, file_path]) # noqa: S603 From 56740888b7315e6e04097081bcecae322302dbd5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 10:03:27 +0000 Subject: [PATCH 15/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.py b/src/utils.py index 7db9ddfc..7ed31d35 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,6 +1,6 @@ import asyncio import os -import subprocess # noqa: PLC0415, S404 +import subprocess # noqa: S404 import sys from collections.abc import Callable, Iterable from enum import IntEnum From b5f6639627232679beafdab41fd4797b92f5ac27 Mon Sep 17 00:00:00 2001 From: ston1th Date: Sun, 4 Feb 2024 12:07:49 +0100 Subject: [PATCH 16/26] make linter happy --- src/AutoSplitImage.py | 2 +- src/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index cb86d31a..5a5fc298 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -129,7 +129,7 @@ def __init__(self, path: str): self.image_type = ImageType.SPLIT def __parse_text_file(self, path: str): - with open(path) as f: + with open(path, encoding="utf-8") as f: data = toml.load(f) self.texts = data["texts"] self.__x = data["top_left"] diff --git a/src/utils.py b/src/utils.py index 7ed31d35..684cec56 100644 --- a/src/utils.py +++ b/src/utils.py @@ -242,7 +242,7 @@ def run_tesseract(png: bytes): @param capture: PNG encoded image @return: The recognized output string from tesseract """ - p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) + p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) # noqa: S603 output = p.communicate(input=png)[0] return output.decode(DEFAULT_ENCODING) From f1ba4108a91aa23ace77f002b279b9f985979924 Mon Sep 17 00:00:00 2001 From: ston1th Date: Sun, 4 Feb 2024 12:24:48 +0100 Subject: [PATCH 17/26] fixed typo in docstring --- src/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.py b/src/utils.py index 684cec56..5a26affa 100644 --- a/src/utils.py +++ b/src/utils.py @@ -239,7 +239,7 @@ def run_tesseract(png: bytes): """ Executes the tesseract CLI and pipes a PNG encoded image to it. - @param capture: PNG encoded image + @param png: PNG encoded image as byte array @return: The recognized output string from tesseract """ p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) # noqa: S603 From d1dfff05bd16f4056e462c8a3aaa90cd388f258e Mon Sep 17 00:00:00 2001 From: ston1th Date: Sat, 10 Feb 2024 21:45:42 +0100 Subject: [PATCH 18/26] input validation and comparison methods * added input validation for OCR text file settings * added different OCR comparison methods * fixed the requested changes --- README.md | 32 ++++++++++------ src/AutoSplitImage.py | 47 +++++++++++++---------- src/compare.py | 43 ++++++++++++++++----- src/error_messages.py | 16 ++++++++ src/utils.py | 87 ++++++++++++++++++++++++++++++------------- 5 files changed, 158 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index a40c728a..7e0f53f9 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ This program can be used to automatically start, split, and reset your preferred - Download the [latest version](/../../releases/latest) - You can also check out the [latest dev builds](/../../actions/workflows/lint-and-build.yml?query=event%3Apush+is%3Asuccess) (requires a GitHub account) (If you don't have a GitHub account, you can try [nightly.link](https://nightly.link/Toufool/AutoSplit/workflows/lint-and-build/dev)) +- Tesseract-OCR (optional; required for text recognition as an alternative comparison method). See [Tesseract install](#tesseract-install) below for installation instructions. - Linux users must ensure they are in the `tty` and `input` groups and have write access to `/dev/uinput`. You can run the following commands to do so: @@ -51,7 +52,6 @@ This program can be used to automatically start, split, and reset your preferred - Wayland is not currently supported - WSL2/WSLg requires an additional Desktop Environment, external X11 server, and/or systemd - Python 3.10+ (Not required for normal use. Refer to the [build instructions](/docs/build%20instructions.md) if you'd like run the application directly in Python). -- Tesseract-OCR (optional; requierd for text recognition as an alternative comparison method). See https://github.com/UB-Mannheim/tesseract/wiki for installation instructions. ## OPTIONS @@ -230,17 +230,16 @@ The Start Image is similar to the Reset Image. You can only have one Start Image ### Text Recognition (OCR) You can use text recognition as an alternative comparison method. -First you need to install tesseract and include it in your PATH variable. See [Compatibility](#Compatibility) above. -To include tesseract in your PATH variable you can use this powershell snippet. +#### Tesseract install -Note: change the `$tesseract_path` variable to the location where tesseract is installed. +First you need to install tesseract and include it in your system or user environment variables. +- See for installation instruction on all platforms. +- For Windows: + 1. You can go directly to to find the installer. + 2. If you change the "Destination Folder" during install, then you'll also need to add it to your `PATH` environment variable. -``` -$path = [System.Environment]::GetEnvironmentVariable("Path", "User") -$tesseract_path = "C:\Program Files\Tesseract-OCR" -[System.Environment]::SetEnvironmentVariable("Path", "$path;$tesseract_path", "User") -``` +#### Usage To use this feature you need to place a text file (.txt) in your splits folder instead of an image file. @@ -250,22 +249,31 @@ Filename: `001_start_auto_splitter.txt` Content: -``` +```toml texts = ["complete any 2 encounters"] top_left = 275 top_right = 540 bottom_left = 70 bottom_right = 95 +method = 0 fps_limit = 1 ``` The `texts` field is an array and can take more than one text to look for: -``` +```toml texts = ["look for me", "or this text"] ``` -The `top`, `bottom`, `left` and `right` options define a rectangle where the text you are looking for is expected to appear in the image. +Note: for now we only use lowercase letters in the comparison. All uppercase letters are converted to lowercase before the comparison. + +The `top_left` and `top_right` (both X-axis) and `bottom_left` and `bottom_right` (both Y-axis) options define a rectangle where the text you are looking for is expected to appear in the image. + +Currently there are three comparison methods: + +* `0` - uses the Levenshtein distance (the default) +* `1` - checks if the OCR text contains the searched text +* `2` - looks for a perfect 1:1 match Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option. The size of the selected rectangle can also impact the CPU load (bigger = more CPU load). diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 5a5fc298..d9c7f211 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -10,7 +10,7 @@ import error_messages from compare import check_if_image_has_transparency, extract_and_compare_text, get_comparison_method_by_index -from utils import BGR_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image +from utils import BGR_CHANNEL_COUNT, MAXBYTE, TESSERACT_PATH, ColorChannel, ImageShape, is_valid_image if TYPE_CHECKING: from AutoSplit import AutoSplit @@ -41,7 +41,7 @@ class AutoSplitImage: image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None - texts: list[str] + texts: list[str] = [] # This value is internal, check for mask instead _has_transparency = False # These values should be overriden by some Defaults if None. Use getters instead @@ -49,11 +49,12 @@ class AutoSplitImage: __comparison_method: int | None = None __pause_time: float | None = None __similarity_threshold: float | None = None - __x: int - __xx: int - __y: int - __yy: int - __fps_limit: int + __x: int = 0 + __xx: int = 0 + __y: int = 0 + __yy: int = 0 + __ocr_comparison_method: int = 0 + __fps_limit: int = 0 @property def is_ocr(self): @@ -106,16 +107,10 @@ def __init__(self, path: str): self.filename = os.path.split(path)[-1].lower() self.flags = flags_from_filename(self.filename) self.loops = loop_from_filename(self.filename) - self.texts = list[str]() self.__delay_time = delay_time_from_filename(self.filename) self.__comparison_method = comparison_method_from_filename(self.filename) self.__pause_time = pause_from_filename(self.filename) self.__similarity_threshold = threshold_from_filename(self.filename) - self.__x = 0 - self.__xx = 0 - self.__y = 0 - self.__yy = 0 - self.__fps_limit = 0 if path.endswith("txt"): self.__parse_text_file(path) else: @@ -129,16 +124,26 @@ def __init__(self, path: str): self.image_type = ImageType.SPLIT def __parse_text_file(self, path: str): + if not TESSERACT_PATH: + error_messages.tesseract_missing(path) + return + with open(path, encoding="utf-8") as f: data = toml.load(f) - self.texts = data["texts"] - self.__x = data["top_left"] - self.__xx = data["top_right"] - self.__y = data["bottom_left"] - self.__yy = data["bottom_right"] + self.texts = [text.lower().strip() for text in data["texts"]] + self.__x = abs(data["top_left"]) + self.__xx = abs(data["top_right"]) + self.__y = abs(data["bottom_left"]) + self.__yy = abs(data["bottom_right"]) + if "method" in data: + self.__ocr_comparison_method = abs(data["method"]) self.__fps_limit = 1 if "fps_limit" in data: - self.fps_limit = data["fps_limit"] + self.__fps_limit = abs(data["fps_limit"]) + + if self.__xx <= self.__x or self.__yy <= self.__y: + error_messages.wrong_ocr_coordinates(path) + return def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) @@ -190,7 +195,9 @@ def compare_with_capture( return 0.0 if self.is_ocr: - return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.texts) + return extract_and_compare_text( + capture[self.__y:self.__yy, self.__x:self.__xx], self.texts, self.__ocr_comparison_method, + ) if not is_valid_image(self.byte_array): return 0.0 diff --git a/src/compare.py b/src/compare.py index 027be0c7..edb01fa9 100644 --- a/src/compare.py +++ b/src/compare.py @@ -13,6 +13,7 @@ HISTOGRAM_SIZE = [8, 8, 8] RANGES = [0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE] MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE +MAX_VALUE = 1.0 def compare_histograms(source: MatLike, capture: MatLike, mask: MatLike | None = None): @@ -127,41 +128,65 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) -def extract_and_compare_text(capture: MatLike, texts: list[str]): +def extract_and_compare_text(capture: MatLike, texts: list[str], method_index: int): """ Compares the extracted text of the given image and returns the similarity between the two texts. The best match of all texts is returned. @param capture: Image of any given shape as a numpy array @param texts: a list of strings to match for + @param method_index: the comparison method index to use @return: The similarity between the text in the image and the text supplied as a number 0 to 1. """ + method = get_ocr_comparison_method_by_index(method_index) png = np.array(cv2.imencode(".png", capture)[1]).tobytes() - # If the string is found 1:1 in the string extracted from the image a 1 is returned. - # Otherwise the levenshtein ratio is calculated between the two strings and gets returned. # Especially with stylised characters, OCR could conceivably get the right # letter, but mix up the casing (m/M, o/O, t/T, etc.) image_string = run_tesseract(png).lower().strip() ratio = 0.0 for text in texts: - # TODO: this 1:1 matching could lead to false positives - # maybe remove it and only rely on fuzzy matching? - # discussion: https://github.com/Toufool/AutoSplit/pull/272#discussion_r1477120477 - if text in image_string: - ratio = 1.0 + ratio = max(ratio, method(text, image_string)) + if ratio == MAX_VALUE: break - ratio = max(ratio, Levenshtein.ratio(text, image_string)) # TODO: debug: remove me if ratio > 0.9: # noqa: PLR2004 print(f"text from image ({ratio:,.2f}): {image_string}") return ratio +def compare_levenshtein(a: str, b: str): + return Levenshtein.ratio(a, b) # pyright: ignore [reportUnknownMemberType] + + +def compare_submatch(a: str, b: str): + if a in b: + return MAX_VALUE + return 0.0 + + +def compare_one_to_one(a: str, b: str): + if a == b: + return MAX_VALUE + return 0.0 + + def __compare_dummy(*_: object): return 0.0 +def get_ocr_comparison_method_by_index(comparison_method_index: int): + match comparison_method_index: + case 0: + return compare_levenshtein + case 1: + return compare_submatch + case 2: + return compare_one_to_one + case _: + return __compare_dummy + + def get_comparison_method_by_index(comparison_method_index: int): match comparison_method_index: case 0: diff --git a/src/error_messages.py b/src/error_messages.py index 6cf64805..74563548 100644 --- a/src/error_messages.py +++ b/src/error_messages.py @@ -228,3 +228,19 @@ def handle_top_level_exceptions(exception: Exception) -> NoReturn: else: traceback.print_exception(type(exception), exception, exception.__traceback__) sys.exit(1) + + +def tesseract_missing(ocr_split_file_path: str): + set_text_message( + f"{ocr_split_file_path!r} is an Optical Character Recognition split file but tesseract couldn't be found." + + f'\nPlease read ' + + f"github.com/{GITHUB_REPOSITORY}#install-tesseract for installation instructions.", + ) + + +def wrong_ocr_coordinates(ocr_split_file_path: str): + set_text_message( + f"{ocr_split_file_path!r} has invalid coordinates." + + "\nPlease make sure that the 'top_right' and 'bottom_right' coordinates are not euqal to or lower then the " + + "'top_left' and 'bottom_left' coordinates.", + ) diff --git a/src/utils.py b/src/utils.py index 5a26affa..3b34e023 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,15 +1,15 @@ import asyncio import os +import shutil import subprocess # noqa: S404 import sys from collections.abc import Callable, Iterable from enum import IntEnum from functools import partial from itertools import chain -from os import environ from platform import version from threading import Thread -from typing import TYPE_CHECKING, Any, TypeGuard, TypeVar +from typing import TYPE_CHECKING, Any, TypedDict, TypeGuard, TypeVar from cv2.typing import MatLike @@ -39,7 +39,17 @@ T = TypeVar("T") -TESSERACT_CMD = ["tesseract", "-", "-", "--oem", "1", "--psm", "6"] + +def find_tesseract_path(): + search_path = str(os.environ.get("PATH")) + if sys.platform == "win32": + search_path += r";C:\Program Files\Tesseract-OCR;C:\Program Files (x86)\Tesseract-OCR" + return shutil.which(TESSERACT_EXE, path=search_path) + + +TESSERACT_EXE = "tesseract" +TESSERACT_PATH = find_tesseract_path() +TESSERACT_CMD = (TESSERACT_PATH or TESSERACT_EXE, "-", "-", "--oem", "1", "--psm", "6") DEFAULT_ENCODING = "utf-8" DWMWA_EXTENDED_FRAME_BOUNDS = 9 @@ -65,6 +75,14 @@ class ColorChannel(IntEnum): Alpha = 3 +class SubprocessKWArgs(TypedDict): + stdin: int + stdout: int + stderr: int + startupinfo: subprocess.STARTUPINFO | None + env: os._Environ[str] | None # pyright: ignore[reportPrivateUsage] + + def decimal(value: float): # Using ljust instead of :2f because of python float rounding errors return f"{int(value * 100) / 100}".ljust(4, "0") @@ -212,39 +230,56 @@ def flatten(nested_iterable: Iterable[Iterable[T]]) -> chain[T]: return chain.from_iterable(nested_iterable) -def subprocess_args(): +def subprocess_kwargs(): """ - See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess - for reference and comments. - - This code snippet was copied from https://github.com/madmaze/pytesseract + Create a set of arguments which make a ``subprocess.Popen`` (and + variants) call work with or without Pyinstaller, ``--noconsole`` or + not, on Windows and Linux. + Typical use: + subprocess.call(['program_to_run', 'arg_1'], **subprocess_args()) + --- + Originally found in https://github.com/madmaze/pytesseract/blob/master/pytesseract/pytesseract.py + Recipe from https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess + which itself is taken from https://github.com/bjones1/enki/blob/master/enki/lib/get_console_output.py """ - kwargs = { - "stdin": subprocess.PIPE, - "stdout": subprocess.PIPE, - "stderr": subprocess.DEVNULL, - "startupinfo": None, - "env": environ, - } - + # The following is true only on Windows. if hasattr(subprocess, "STARTUPINFO"): - kwargs["startupinfo"] = subprocess.STARTUPINFO() - kwargs["startupinfo"].dwFlags |= subprocess.STARTF_USESHOWWINDOW - kwargs["startupinfo"].wShowWindow = subprocess.SW_HIDE - - return kwargs + # On Windows, subprocess calls will pop up a command window by default when run from + # Pyinstaller with the ``--noconsole`` option. Avoid this distraction. + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + # https://github.com/madmaze/pytesseract/blob/88839f03590578a10e806a5244704437c9d477da/pytesseract/pytesseract.py#L236 + startupinfo.wShowWindow = subprocess.SW_HIDE + # Windows doesn't search the path by default. Pass it an environment so it will. + env = os.environ + else: + startupinfo = None + env = None + # On Windows, running this from the binary produced by Pyinstaller + # with the ``--noconsole`` option requires redirecting everything + # (stdin, stdout, stderr) to avoid an OSError exception + # "[Error 6] the handle is invalid." + return SubprocessKWArgs( + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + startupinfo=startupinfo, + env=env, + ) def run_tesseract(png: bytes): """ Executes the tesseract CLI and pipes a PNG encoded image to it. - @param png: PNG encoded image as byte array - @return: The recognized output string from tesseract + @return: The recognized output string from tesseract. """ - p = subprocess.Popen(TESSERACT_CMD, **subprocess_args()) # noqa: S603 - output = p.communicate(input=png)[0] - return output.decode(DEFAULT_ENCODING) + return ( + subprocess + .Popen(TESSERACT_CMD, **subprocess_kwargs()) # noqa: S603 # Only using known literal strings + .communicate(input=png)[0] + .decode() + ) # Environment specifics From f3c0e3eac3883d0616017d3b8b1182280edecac5 Mon Sep 17 00:00:00 2001 From: ston1th Date: Sat, 10 Feb 2024 21:57:09 +0100 Subject: [PATCH 19/26] fix linter --- src/AutoSplitImage.py | 4 ++-- src/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index d9c7f211..570c2e99 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,7 +1,7 @@ import os from enum import IntEnum, auto from math import sqrt -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar import cv2 import numpy as np @@ -41,7 +41,7 @@ class AutoSplitImage: image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None - texts: list[str] = [] + texts: ClassVar[list[str]] = [] # This value is internal, check for mask instead _has_transparency = False # These values should be overriden by some Defaults if None. Use getters instead diff --git a/src/utils.py b/src/utils.py index 3b34e023..c1a9e9e5 100644 --- a/src/utils.py +++ b/src/utils.py @@ -241,7 +241,7 @@ def subprocess_kwargs(): Originally found in https://github.com/madmaze/pytesseract/blob/master/pytesseract/pytesseract.py Recipe from https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess which itself is taken from https://github.com/bjones1/enki/blob/master/enki/lib/get_console_output.py - """ + """ # noqa: D415,D400 # The following is true only on Windows. if hasattr(subprocess, "STARTUPINFO"): # On Windows, subprocess calls will pop up a command window by default when run from From 75cd0e9296f7aa727259597d7233741c94463e7b Mon Sep 17 00:00:00 2001 From: ston1th Date: Fri, 29 Mar 2024 14:51:07 +0100 Subject: [PATCH 20/26] improvements to coordinates and matching methods this commit improves the handling of the rectangle coordinates. the new scheme uses the top_left and bottom_right (X/Y) coordinates. the migration from the old scheme works as follows: ``` top_left = [, ] bottom_right = [, ] old: top_left = 275 top_right = 540 bottom_left = 70 bottom_right = 95 new: top_left = [275, 70] bottom_right = [540, 95] ``` you can now specify multiple matching methods and look for the best `text : method` match: ``` old: method = 0 new: methods = [0] or: methods = [2, 1, 0] ``` --- docs/tutorial.md | 64 ++++++++++++++++++++++++++++++++++++++++ scripts/requirements.txt | 2 +- src/AutoSplitImage.py | 56 ++++++++++++++++++++++------------- src/compare.py | 28 +++++++----------- src/error_messages.py | 8 ++--- src/utils.py | 10 ++++--- 6 files changed, 120 insertions(+), 48 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index 3167d377..032b727d 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -174,6 +174,70 @@ You can have one (and only one) image with the keyword `reset` in its name. Auto The Start Image is similar to the Reset Image. You can only have one Start Image with the keyword `start_auto_splitter`.You can reload the image using the "`Reload Start Image`" button. The pause time is the amount of seconds AutoSplit will wait before starting comparisons of the first split image. Delay times will be used to delay starting your timer after the threshold is met. +### Text Recognition (OCR) + +You can use text recognition as an alternative comparison method. + +#### Tesseract install + +First you need to install tesseract and include it in your system or user environment variables. +- See for installation instruction on all platforms. +- For Windows: + 1. You can go directly to to find the installer. + 2. If you change the "Destination Folder" during install, then you'll also need to add it to your `PATH` environment variable. + +#### Usage + +To use this feature you need to place a text file (.txt) in your splits folder instead of an image file. + +An example file name and content could look like this: + +Filename: `001_start_auto_splitter.txt` + +Content: + +```toml +texts = ["complete any 2 encounters"] +top_left = [275, 70] +bottom_right = [540, 95] +methods = [0] +fps_limit = 1 +``` + +The `texts` field is an array and can take more than one text to look for: + +```toml +texts = ["look for me", "or this text"] +``` + +Note: for now we only use lowercase letters in the comparison. All uppercase letters are converted to lowercase before the comparison. + +The rectangle coordinates where the text you are looking for is expected to appear in the image are configured as follows: + +```toml +top_left = [X, Y] +bottom_right = [X, Y] +``` + +`top_left` is the top left and `bottom_right` is the bottom right corner of the rectangle. + +Currently there are three comparison methods: + +* `0` - uses the Levenshtein distance (the default) +* `1` - checks if the OCR text contains the searched text +* `2` - looks for a perfect 1:1 match + +You can also chain multiple comparison methods using the array notation: + +```toml +methods = [1, 0] +``` + +The methods are then checked in the order you defined and the best match apon them wins. + +Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option. +The size of the selected rectangle can also impact the CPU load (bigger = more CPU load). + ### Profiles diff --git a/scripts/requirements.txt b/scripts/requirements.txt index f686ae2e..93a6a541 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -4,7 +4,7 @@ # # Dependencies: git+https://github.com/boppreh/keyboard.git#egg=keyboard # Fix install on macos and linux-ci https://github.com/boppreh/keyboard/pull/568 -Levenshtein +Levenshtein>=0.25 numpy>=1.26 # Python 3.12 support opencv-python-headless>=4.9.0.80 # Typing fixes packaging diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 570c2e99..a7a88360 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -1,7 +1,7 @@ import os from enum import IntEnum, auto from math import sqrt -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING import cv2 import numpy as np @@ -41,7 +41,7 @@ class AutoSplitImage: image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None - texts: ClassVar[list[str]] = [] + texts: list[str] # This value is internal, check for mask instead _has_transparency = False # These values should be overriden by some Defaults if None. Use getters instead @@ -49,11 +49,8 @@ class AutoSplitImage: __comparison_method: int | None = None __pause_time: float | None = None __similarity_threshold: float | None = None - __x: int = 0 - __xx: int = 0 - __y: int = 0 - __yy: int = 0 - __ocr_comparison_method: int = 0 + __rect: list[int] + __ocr_comparison_methods: list[int] __fps_limit: int = 0 @property @@ -111,6 +108,7 @@ def __init__(self, path: str): self.__comparison_method = comparison_method_from_filename(self.filename) self.__pause_time = pause_from_filename(self.filename) self.__similarity_threshold = threshold_from_filename(self.filename) + self.texts = [] if path.endswith("txt"): self.__parse_text_file(path) else: @@ -130,21 +128,32 @@ def __parse_text_file(self, path: str): with open(path, encoding="utf-8") as f: data = toml.load(f) - self.texts = [text.lower().strip() for text in data["texts"]] - self.__x = abs(data["top_left"]) - self.__xx = abs(data["top_right"]) - self.__y = abs(data["bottom_left"]) - self.__yy = abs(data["bottom_right"]) - if "method" in data: - self.__ocr_comparison_method = abs(data["method"]) - self.__fps_limit = 1 - if "fps_limit" in data: - self.__fps_limit = abs(data["fps_limit"]) - - if self.__xx <= self.__x or self.__yy <= self.__y: - error_messages.wrong_ocr_coordinates(path) + + self.texts = [text.lower().strip() for text in data["texts"]] + self.__rect = [ + data["top_left"][0], + data["bottom_right"][0], + data["top_left"][1], + data["bottom_right"][1], + ] + self.__ocr_comparison_methods = data.get("methods", [0]) + self.__fps_limit = data.get("fps_limit", 0) + + if self.__validate_ocr(): + error_messages.wrong_ocr_values(path) return + def __validate_ocr(self): + values = self.__rect + self.__ocr_comparison_methods + values.append(self.__fps_limit) + return ( + any( # Check for invalid negative values + value < 0 for value in values + ) + or self.__rect[1] <= self.__rect[0] + or self.__rect[3] <= self.__rect[2] + ) + def __read_image_bytes(self, path: str): image = cv2.imread(path, cv2.IMREAD_UNCHANGED) if not is_valid_image(image): @@ -196,7 +205,12 @@ def compare_with_capture( if self.is_ocr: return extract_and_compare_text( - capture[self.__y:self.__yy, self.__x:self.__xx], self.texts, self.__ocr_comparison_method, + capture[ + self.__rect[2]:self.__rect[3], + self.__rect[0]:self.__rect[1], + ], + self.texts, + self.__ocr_comparison_methods, ) if not is_valid_image(self.byte_array): diff --git a/src/compare.py b/src/compare.py index 499fe8da..211d9b6b 100644 --- a/src/compare.py +++ b/src/compare.py @@ -128,17 +128,17 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) -def extract_and_compare_text(capture: MatLike, texts: list[str], method_index: int): +def extract_and_compare_text(capture: MatLike, texts: list[str], methods_index: list[int]): """ Compares the extracted text of the given image and returns the similarity between the two texts. - The best match of all texts is returned. + The best match of all texts and methods is returned. @param capture: Image of any given shape as a numpy array @param texts: a list of strings to match for - @param method_index: the comparison method index to use + @param methods_index: a list of comparison methods to use in order @return: The similarity between the text in the image and the text supplied as a number 0 to 1. """ - method = get_ocr_comparison_method_by_index(method_index) + methods = [get_ocr_comparison_method_by_index(i) for i in methods_index] png = np.array(cv2.imencode(".png", capture)[1]).tobytes() # Especially with stylised characters, OCR could conceivably get the right # letter, but mix up the casing (m/M, o/O, t/T, etc.) @@ -146,23 +146,15 @@ def extract_and_compare_text(capture: MatLike, texts: list[str], method_index: i ratio = 0.0 for text in texts: - ratio = max(ratio, method(text, image_string)) - if ratio == MAX_VALUE: - break - # TODO: debug: remove me - if ratio > 0.9: # noqa: PLR2004 - print(f"text from image ({ratio:,.2f}): {image_string}") + for method in methods: + ratio = max(ratio, method(text, image_string)) + if ratio == MAX_VALUE: + return ratio # we found the best match; try to return early return ratio -def compare_levenshtein(a: str, b: str): - return Levenshtein.ratio(a, b) # pyright: ignore [reportUnknownMemberType] - - def compare_submatch(a: str, b: str): - if a in b: - return MAX_VALUE - return 0.0 + return float(a in b) def compare_one_to_one(a: str, b: str): @@ -178,7 +170,7 @@ def __compare_dummy(*_: object): def get_ocr_comparison_method_by_index(comparison_method_index: int): match comparison_method_index: case 0: - return compare_levenshtein + return Levenshtein.ratio case 1: return compare_submatch case 2: diff --git a/src/error_messages.py b/src/error_messages.py index d448ac8e..e1270a2a 100644 --- a/src/error_messages.py +++ b/src/error_messages.py @@ -238,9 +238,9 @@ def tesseract_missing(ocr_split_file_path: str): ) -def wrong_ocr_coordinates(ocr_split_file_path: str): +def wrong_ocr_values(ocr_split_file_path: str): set_text_message( - f"{ocr_split_file_path!r} has invalid coordinates." - + "\nPlease make sure that the 'top_right' and 'bottom_right' coordinates are not euqal to or lower then the " - + "'top_left' and 'bottom_left' coordinates.", + f"{ocr_split_file_path!r} has invalid values." + + "\nPlease make sure that the X and Y coordinates of 'bottom_right' are not euqal to or lower then the " + + "X and Y coordinates of 'top_left'. Also check for negative values in the 'methods' or 'fps_limit' settings", ) diff --git a/src/utils.py b/src/utils.py index c1a9e9e5..fbee5ba8 100644 --- a/src/utils.py +++ b/src/utils.py @@ -41,7 +41,7 @@ def find_tesseract_path(): - search_path = str(os.environ.get("PATH")) + search_path = os.environ.get("PATH", os.defpath) if sys.platform == "win32": search_path += r";C:\Program Files\Tesseract-OCR;C:\Program Files (x86)\Tesseract-OCR" return shutil.which(TESSERACT_EXE, path=search_path) @@ -49,8 +49,8 @@ def find_tesseract_path(): TESSERACT_EXE = "tesseract" TESSERACT_PATH = find_tesseract_path() +"""The path to execute tesseract. `None` if it can't be found.""" TESSERACT_CMD = (TESSERACT_PATH or TESSERACT_EXE, "-", "-", "--oem", "1", "--psm", "6") -DEFAULT_ENCODING = "utf-8" DWMWA_EXTENDED_FRAME_BOUNDS = 9 MAXBYTE = 255 @@ -236,12 +236,14 @@ def subprocess_kwargs(): variants) call work with or without Pyinstaller, ``--noconsole`` or not, on Windows and Linux. Typical use: - subprocess.call(['program_to_run', 'arg_1'], **subprocess_args()) + ```python + subprocess.call(['program_to_run', 'arg_1'], **subprocess_args()) + ``` --- Originally found in https://github.com/madmaze/pytesseract/blob/master/pytesseract/pytesseract.py Recipe from https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess which itself is taken from https://github.com/bjones1/enki/blob/master/enki/lib/get_console_output.py - """ # noqa: D415,D400 + """ # The following is true only on Windows. if hasattr(subprocess, "STARTUPINFO"): # On Windows, subprocess calls will pop up a command window by default when run from From 7be9a0e033495ba25f42335158ce2d9e63829499 Mon Sep 17 00:00:00 2001 From: ston1th Date: Fri, 29 Mar 2024 14:59:23 +0100 Subject: [PATCH 21/26] fix ruff linter --- src/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils.py b/src/utils.py index fbee5ba8..981b1741 100644 --- a/src/utils.py +++ b/src/utils.py @@ -235,6 +235,7 @@ def subprocess_kwargs(): Create a set of arguments which make a ``subprocess.Popen`` (and variants) call work with or without Pyinstaller, ``--noconsole`` or not, on Windows and Linux. + Typical use: ```python subprocess.call(['program_to_run', 'arg_1'], **subprocess_args()) From 043b6b50bc74805285e8a056b47a14fbdf2ceb2a Mon Sep 17 00:00:00 2001 From: Avasam Date: Sat, 15 Jun 2024 17:07:56 -0400 Subject: [PATCH 22/26] Address my own PR comments and fix linting --- .github/workflows/lint-and-build.yml | 1 + docs/tutorial.md | 26 +++++++++------ scripts/lint.ps1 | 12 ++++--- src/AutoSplit.py | 9 ++--- src/AutoSplitImage.py | 33 ++++++------------- .../Screenshot using QT attempt.py | 8 +++-- src/capture_method/__init__.py | 27 +++++++-------- src/compare.py | 11 ++----- src/error_messages.py | 6 ++-- src/menu_bar.py | 5 ++- 10 files changed, 64 insertions(+), 74 deletions(-) diff --git a/.github/workflows/lint-and-build.yml b/.github/workflows/lint-and-build.yml index 85f116ab..1089c845 100644 --- a/.github/workflows/lint-and-build.yml +++ b/.github/workflows/lint-and-build.yml @@ -89,6 +89,7 @@ jobs: - name: Analysing the code with Pyright uses: jakebailey/pyright-action@v1 with: + version: "1.1.364" working-directory: src/ python-version: ${{ matrix.python-version }} Build: diff --git a/docs/tutorial.md b/docs/tutorial.md index 032b727d..7db9c44a 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -181,6 +181,7 @@ You can use text recognition as an alternative comparison method. #### Tesseract install First you need to install tesseract and include it in your system or user environment variables. + - See for installation instruction on all platforms. - For Windows: 1. You can go directly to to find the installer. @@ -198,8 +199,10 @@ Content: ```toml texts = ["complete any 2 encounters"] -top_left = [275, 70] -bottom_right = [540, 95] +left = 275 +right = 540 +top = 70 +bottom = 95 methods = [0] fps_limit = 1 ``` @@ -215,17 +218,20 @@ Note: for now we only use lowercase letters in the comparison. All uppercase let The rectangle coordinates where the text you are looking for is expected to appear in the image are configured as follows: ```toml -top_left = [X, Y] -bottom_right = [X, Y] +left = 275 +right = 540 +top = 70 +bottom = 95 ``` -`top_left` is the top left and `bottom_right` is the bottom right corner of the rectangle. +If you're used to working in corner coordinates, you can think of `top_left = [left, top]` and `bottom_right = [right, bottom]`. + +Currently there are two comparison methods: -Currently there are three comparison methods: +- `0` - uses the Levenshtein distance (the default) +- `1` - checks if the OCR text contains the searched text (`0.0` or `1.0`) -* `0` - uses the Levenshtein distance (the default) -* `1` - checks if the OCR text contains the searched text -* `2` - looks for a perfect 1:1 match +If you only want a perfect full match, use "Levenshtein" with a threshold of `(1.0)` on your file name. You can also chain multiple comparison methods using the array notation: @@ -233,7 +239,7 @@ You can also chain multiple comparison methods using the array notation: methods = [1, 0] ``` -The methods are then checked in the order you defined and the best match apon them wins. +The methods are then checked in the order you defined and the best match upon them wins. Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option. The size of the selected rectangle can also impact the CPU load (bigger = more CPU load). diff --git a/scripts/lint.ps1 b/scripts/lint.ps1 index 84dc6f8b..3e0eacb1 100644 --- a/scripts/lint.ps1 +++ b/scripts/lint.ps1 @@ -6,7 +6,7 @@ Write-Host "`nRunning formatting..." autopep8 src/ --recursive --in-place add-trailing-comma $(git ls-files '**.py*') -Write-Host "`nRunning Ruff..." +Write-Host "`nRunning Ruff ..." ruff check . --fix $exitCodes += $LastExitCode if ($LastExitCode -gt 0) { @@ -16,12 +16,16 @@ else { Write-Host "`Ruff passed" -ForegroundColor Green } -Write-Host "`nRunning Pyright..." -$Env:PYRIGHT_PYTHON_FORCE_VERSION = 'latest' -npx pyright@latest src/ +$pyrightVersion = '1.1.364' # Change this if latest has issues +Write-Host "`nRunning Pyright $pyrightVersion ..." +$Env:PYRIGHT_PYTHON_FORCE_VERSION = $pyrightVersion +npx -y pyright@$pyrightVersion src/ $exitCodes += $LastExitCode if ($LastExitCode -gt 0) { Write-Host "`Pyright failed ($LastExitCode)" -ForegroundColor Red + if ($pyrightVersion -eq 'latest') { + npx pyright@latest --version + } } else { Write-Host "`Pyright passed" -ForegroundColor Green diff --git a/src/AutoSplit.py b/src/AutoSplit.py index c553ace7..464c83f8 100644 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -324,8 +324,7 @@ def __compare_capture_for_auto_start(self): start_image_similarity = self.start_image.compare_with_capture(self, capture) # If the similarity becomes higher than highest similarity, set it as such. - if start_image_similarity > self.highest_similarity: - self.highest_similarity = start_image_similarity + self.highest_similarity = max(start_image_similarity, self.highest_similarity) self.table_current_image_live_label.setText(decimal(start_image_similarity)) self.table_current_image_highest_label.setText(decimal(self.highest_similarity)) @@ -673,8 +672,7 @@ def __similarity_threshold_loop(self, number_of_split_images: int, dummy_splits_ self.table_current_image_live_label.setText(decimal(similarity)) # if the similarity becomes higher than highest similarity, set it as such. - if similarity > self.highest_similarity: - self.highest_similarity = similarity + self.highest_similarity = max(similarity, self.highest_similarity) # show live highest similarity if the checkbox is checked self.table_current_image_highest_label.setText(decimal(self.highest_similarity)) @@ -842,8 +840,7 @@ def __reset_if_should(self, capture: MatLike | None): self.table_reset_image_live_label.setText("paused") else: should_reset = similarity >= threshold - if similarity > self.reset_highest_similarity: - self.reset_highest_similarity = similarity + self.reset_highest_similarity = max(similarity, self.reset_highest_similarity) self.table_reset_image_highest_label.setText(decimal(self.reset_highest_similarity)) self.table_reset_image_live_label.setText(decimal(similarity)) diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index a7a88360..052edd1e 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -34,24 +34,18 @@ class ImageType(IntEnum): class AutoSplitImage: - path: str - filename: str - flags: int - loops: int image_type: ImageType byte_array: MatLike | None = None mask: MatLike | None = None - texts: list[str] # This value is internal, check for mask instead _has_transparency = False - # These values should be overriden by some Defaults if None. Use getters instead + # These values should be overridden by some Defaults if None. Use getters instead __delay_time: float | None = None __comparison_method: int | None = None __pause_time: float | None = None __similarity_threshold: float | None = None - __rect: list[int] - __ocr_comparison_methods: list[int] - __fps_limit: int = 0 + __rect = (0, 0, 1, 1) + __fps_limit = 0 @property def is_ocr(self): @@ -108,7 +102,8 @@ def __init__(self, path: str): self.__comparison_method = comparison_method_from_filename(self.filename) self.__pause_time = pause_from_filename(self.filename) self.__similarity_threshold = threshold_from_filename(self.filename) - self.texts = [] + self.texts: list[str] = [] + self. __ocr_comparison_methods: list[int] = [] if path.endswith("txt"): self.__parse_text_file(path) else: @@ -130,12 +125,7 @@ def __parse_text_file(self, path: str): data = toml.load(f) self.texts = [text.lower().strip() for text in data["texts"]] - self.__rect = [ - data["top_left"][0], - data["bottom_right"][0], - data["top_left"][1], - data["bottom_right"][1], - ] + self.__rect = (data["left"], data["right"], data["top"], data["bottom"]) self.__ocr_comparison_methods = data.get("methods", [0]) self.__fps_limit = data.get("fps_limit", 0) @@ -144,14 +134,11 @@ def __parse_text_file(self, path: str): return def __validate_ocr(self): - values = self.__rect + self.__ocr_comparison_methods - values.append(self.__fps_limit) + values = [*self.__rect, *self.__ocr_comparison_methods, self.__fps_limit] return ( - any( # Check for invalid negative values - value < 0 for value in values - ) - or self.__rect[1] <= self.__rect[0] - or self.__rect[3] <= self.__rect[2] + all(value >= 0 for value in values) # Check for invalid negative values + and self.__rect[1] > self.__rect[0] + and self.__rect[3] > self.__rect[2] ) def __read_image_bytes(self, path: str): diff --git a/src/capture_method/Screenshot using QT attempt.py b/src/capture_method/Screenshot using QT attempt.py index fa55e8d5..427bceb1 100644 --- a/src/capture_method/Screenshot using QT attempt.py +++ b/src/capture_method/Screenshot using QT attempt.py @@ -1,17 +1,19 @@ -# flake8: noqa +# ruff: noqa: RET504 + import sys if sys.platform != "linux": - raise OSError() + raise OSError from typing import cast import numpy as np from cv2.typing import MatLike from PySide6.QtCore import QBuffer, QIODeviceBase from PySide6.QtGui import QGuiApplication -from capture_method.CaptureMethodBase import CaptureMethodBase from typing_extensions import override +from capture_method.CaptureMethodBase import CaptureMethodBase + class QtCaptureMethod(CaptureMethodBase): _render_full_content = False diff --git a/src/capture_method/__init__.py b/src/capture_method/__init__.py index 3c5c28b1..429171c9 100644 --- a/src/capture_method/__init__.py +++ b/src/capture_method/__init__.py @@ -1,4 +1,3 @@ -import asyncio import os import sys from collections import OrderedDict @@ -15,6 +14,7 @@ if sys.platform == "win32": from _ctypes import COMError # noqa: PLC2701 + from pygrabber.dshow_graph import FilterGraph from capture_method.BitBltCaptureMethod import BitBltCaptureMethod @@ -76,7 +76,12 @@ def __hash__(self): @override @staticmethod - def _generate_next_value_(name: "str | CaptureMethodEnum", *_): + def _generate_next_value_( + name: "str | CaptureMethodEnum", + start: int, + count: int, + last_values: list["str | CaptureMethodEnum"], + ): return name NONE = "" @@ -112,10 +117,11 @@ def get_method_by_index(self, index: int): # Disallow unsafe get w/o breaking it at runtime @override def __getitem__( # type:ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] - self, - __key: Never, + self, + key: Never, + /, ) -> type[CaptureMethodBase]: - return super().__getitem__(__key) + return super().__getitem__(key) @override def get(self, key: CaptureMethodEnum, default: object = None, /): @@ -215,10 +221,10 @@ def get_input_device_resolution(index: int) -> tuple[int, int] | None: return resolution -async def get_all_video_capture_devices(): +def get_all_video_capture_devices(): named_video_inputs = get_input_devices() - async def get_camera_info(index: int, device_name: str): + def get_camera_info(index: int, device_name: str): backend = "" # Probing freezes some devices (like GV-USB2 and AverMedia) if already in use. See #169 # FIXME: Maybe offer the option to the user to obtain more info about their devices? @@ -245,9 +251,4 @@ async def get_camera_info(index: int, device_name: str): else None ) - return [ - camera_info - for camera_info - in await asyncio.gather(*starmap(get_camera_info, enumerate(named_video_inputs))) - if camera_info is not None - ] + return list(filter(None, starmap(get_camera_info, enumerate(named_video_inputs)))) diff --git a/src/compare.py b/src/compare.py index 211d9b6b..fefc2056 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,3 +1,4 @@ +from collections.abc import Iterable from math import sqrt import cv2 @@ -128,7 +129,7 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None return 1 - (hash_diff / 64.0) -def extract_and_compare_text(capture: MatLike, texts: list[str], methods_index: list[int]): +def extract_and_compare_text(capture: MatLike, texts: Iterable[str], methods_index: Iterable[int]): """ Compares the extracted text of the given image and returns the similarity between the two texts. The best match of all texts and methods is returned. @@ -157,12 +158,6 @@ def compare_submatch(a: str, b: str): return float(a in b) -def compare_one_to_one(a: str, b: str): - if a == b: - return MAX_VALUE - return 0.0 - - def __compare_dummy(*_: object): return 0.0 @@ -173,8 +168,6 @@ def get_ocr_comparison_method_by_index(comparison_method_index: int): return Levenshtein.ratio case 1: return compare_submatch - case 2: - return compare_one_to_one case _: return __compare_dummy diff --git a/src/error_messages.py b/src/error_messages.py index e1270a2a..ceb79df8 100644 --- a/src/error_messages.py +++ b/src/error_messages.py @@ -240,7 +240,7 @@ def tesseract_missing(ocr_split_file_path: str): def wrong_ocr_values(ocr_split_file_path: str): set_text_message( - f"{ocr_split_file_path!r} has invalid values." - + "\nPlease make sure that the X and Y coordinates of 'bottom_right' are not euqal to or lower then the " - + "X and Y coordinates of 'top_left'. Also check for negative values in the 'methods' or 'fps_limit' settings", + f"{ocr_split_file_path!r} has invalid values." + + "\nPlease make sure that `left < right` and `top < bottom`. " + + "Also check for negative values in the 'methods' or 'fps_limit' settings", ) diff --git a/src/menu_bar.py b/src/menu_bar.py index c47e880d..a01fe031 100644 --- a/src/menu_bar.py +++ b/src/menu_bar.py @@ -1,4 +1,3 @@ -import asyncio import json import sys import webbrowser @@ -135,7 +134,7 @@ def __init__(self, autosplit: "AutoSplit"): self.__video_capture_devices: list[CameraInfo] = [] """ Used to temporarily store the existing cameras, - we don't want to call `get_all_video_capture_devices` agains and possibly have a different result + we don't want to call `get_all_video_capture_devices` again and possibly have a different result """ self.setupUi(self) @@ -246,7 +245,7 @@ def __fps_limit_changed(self, value: int): @fire_and_forget def __set_all_capture_devices(self): - self.__video_capture_devices = asyncio.run(get_all_video_capture_devices()) + self.__video_capture_devices = get_all_video_capture_devices() if len(self.__video_capture_devices) > 0: for i in range(self.capture_device_combobox.count()): self.capture_device_combobox.removeItem(i) From c0b2920e5b997a7c5dd9ffd7c010a7e6e995f110 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Jun 2024 21:08:06 +0000 Subject: [PATCH 23/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/capture_method/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/capture_method/__init__.py b/src/capture_method/__init__.py index 429171c9..7c9a7293 100644 --- a/src/capture_method/__init__.py +++ b/src/capture_method/__init__.py @@ -14,7 +14,6 @@ if sys.platform == "win32": from _ctypes import COMError # noqa: PLC2701 - from pygrabber.dshow_graph import FilterGraph from capture_method.BitBltCaptureMethod import BitBltCaptureMethod From 9cd0c2b7686f7224684570499cbf6d465be25a85 Mon Sep 17 00:00:00 2001 From: Avasam Date: Sun, 16 Jun 2024 00:46:16 -0400 Subject: [PATCH 24/26] STARTUPINFO doesn't exist on Linux --- src/utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/utils.py b/src/utils.py index 981b1741..aa7ab165 100644 --- a/src/utils.py +++ b/src/utils.py @@ -9,7 +9,7 @@ from itertools import chain from platform import version from threading import Thread -from typing import TYPE_CHECKING, Any, TypedDict, TypeGuard, TypeVar +from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, TypeGuard, TypeVar from cv2.typing import MatLike @@ -36,6 +36,12 @@ if TYPE_CHECKING: # Source does not exist, keep this under TYPE_CHECKING from _win32typing import PyCDC # pyright: ignore[reportMissingModuleSource] + if sys.platform == "win32": + STARTUPINFO: TypeAlias = subprocess.STARTUPINFO + else: + STARTUPINFO: TypeAlias = None +else: + STARTUPINFO = getattr(subprocess, "STARTUPINFO", None) T = TypeVar("T") @@ -79,7 +85,7 @@ class SubprocessKWArgs(TypedDict): stdin: int stdout: int stderr: int - startupinfo: subprocess.STARTUPINFO | None + startupinfo: STARTUPINFO | None env: os._Environ[str] | None # pyright: ignore[reportPrivateUsage] @@ -246,10 +252,10 @@ def subprocess_kwargs(): which itself is taken from https://github.com/bjones1/enki/blob/master/enki/lib/get_console_output.py """ # The following is true only on Windows. - if hasattr(subprocess, "STARTUPINFO"): + if STARTUPINFO: # On Windows, subprocess calls will pop up a command window by default when run from # Pyinstaller with the ``--noconsole`` option. Avoid this distraction. - startupinfo = subprocess.STARTUPINFO() + startupinfo = STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW # https://github.com/madmaze/pytesseract/blob/88839f03590578a10e806a5244704437c9d477da/pytesseract/pytesseract.py#L236 startupinfo.wShowWindow = subprocess.SW_HIDE From 2f03a90b9aa3b91f8249222b95750e52da1222f5 Mon Sep 17 00:00:00 2001 From: Avasam Date: Sun, 16 Jun 2024 00:52:03 -0400 Subject: [PATCH 25/26] More explicit platform check --- src/utils.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/utils.py b/src/utils.py index aa7ab165..a1d90990 100644 --- a/src/utils.py +++ b/src/utils.py @@ -23,6 +23,9 @@ import win32ui from winsdk.windows.ai.machinelearning import LearningModelDevice, LearningModelDeviceKind from winsdk.windows.media.capture import MediaCapture + STARTUPINFO: TypeAlias = subprocess.STARTUPINFO +else: + STARTUPINFO: TypeAlias = None if sys.platform == "linux": import fcntl @@ -36,12 +39,6 @@ if TYPE_CHECKING: # Source does not exist, keep this under TYPE_CHECKING from _win32typing import PyCDC # pyright: ignore[reportMissingModuleSource] - if sys.platform == "win32": - STARTUPINFO: TypeAlias = subprocess.STARTUPINFO - else: - STARTUPINFO: TypeAlias = None -else: - STARTUPINFO = getattr(subprocess, "STARTUPINFO", None) T = TypeVar("T") @@ -252,7 +249,7 @@ def subprocess_kwargs(): which itself is taken from https://github.com/bjones1/enki/blob/master/enki/lib/get_console_output.py """ # The following is true only on Windows. - if STARTUPINFO: + if sys.platform == "win32": # On Windows, subprocess calls will pop up a command window by default when run from # Pyinstaller with the ``--noconsole`` option. Avoid this distraction. startupinfo = STARTUPINFO() From 797492f782362d66a3e32f252bad80c6ffb2a1dd Mon Sep 17 00:00:00 2001 From: Avasam Date: Sun, 16 Jun 2024 13:23:42 -0400 Subject: [PATCH 26/26] Fix circular imports, mak OCR as clearly experimental --- docs/tutorial.md | 8 +++--- .../VideoCaptureDeviceCaptureMethod.py | 3 +-- src/capture_method/__init__.py | 27 ++----------------- src/utils.py | 27 +++++++++++++++++++ 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index 7db9c44a..c1722a51 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -174,7 +174,7 @@ You can have one (and only one) image with the keyword `reset` in its name. Auto The Start Image is similar to the Reset Image. You can only have one Start Image with the keyword `start_auto_splitter`.You can reload the image using the "`Reload Start Image`" button. The pause time is the amount of seconds AutoSplit will wait before starting comparisons of the first split image. Delay times will be used to delay starting your timer after the threshold is met. -### Text Recognition (OCR) +### Text Recognition / Optical Character Recognition (OCR) ⚠️EXPERIMENTAL⚠️ You can use text recognition as an alternative comparison method. @@ -189,7 +189,7 @@ First you need to install tesseract and include it in your system or user enviro #### Usage -To use this feature you need to place a text file (.txt) in your splits folder instead of an image file. +To use this feature you need to place a text file (`.txt`) in your splits folder instead of an image file. An example file name and content could look like this: @@ -229,7 +229,7 @@ If you're used to working in corner coordinates, you can think of `top_left = [l Currently there are two comparison methods: - `0` - uses the Levenshtein distance (the default) -- `1` - checks if the OCR text contains the searched text (`0.0` or `1.0`) +- `1` - checks if the OCR text contains the searched text (results in matches of either `0.0` or `1.0`) If you only want a perfect full match, use "Levenshtein" with a threshold of `(1.0)` on your file name. @@ -241,7 +241,7 @@ methods = [1, 0] The methods are then checked in the order you defined and the best match upon them wins. -Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option. +Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option. The size of the selected rectangle can also impact the CPU load (bigger = more CPU load). ### Profiles diff --git a/src/capture_method/VideoCaptureDeviceCaptureMethod.py b/src/capture_method/VideoCaptureDeviceCaptureMethod.py index 29606f95..e87a19ca 100644 --- a/src/capture_method/VideoCaptureDeviceCaptureMethod.py +++ b/src/capture_method/VideoCaptureDeviceCaptureMethod.py @@ -7,10 +7,9 @@ from cv2.typing import MatLike from typing_extensions import override -from capture_method import get_input_device_resolution from capture_method.CaptureMethodBase import CaptureMethodBase from error_messages import CREATE_NEW_ISSUE_MESSAGE, exception_traceback -from utils import ImageShape, is_valid_image +from utils import ImageShape, get_input_device_resolution, is_valid_image if TYPE_CHECKING: from AutoSplit import AutoSplit diff --git a/src/capture_method/__init__.py b/src/capture_method/__init__.py index 37fdb502..9e5ea0b5 100644 --- a/src/capture_method/__init__.py +++ b/src/capture_method/__init__.py @@ -10,10 +10,10 @@ from capture_method.CaptureMethodBase import CaptureMethodBase from capture_method.VideoCaptureDeviceCaptureMethod import VideoCaptureDeviceCaptureMethod -from utils import WGC_MIN_BUILD, WINDOWS_BUILD_NUMBER, first, try_get_direct3d_device +from utils import WGC_MIN_BUILD, WINDOWS_BUILD_NUMBER, first, get_input_device_resolution, try_get_direct3d_device if sys.platform == "win32": - from _ctypes import COMError # noqa: PLC2701 + from _ctypes import COMError # noqa: PLC2701 # comtypes is untyped from pygrabber.dshow_graph import FilterGraph @@ -205,29 +205,6 @@ def get_input_devices(): return cameras -def get_input_device_resolution(index: int) -> tuple[int, int] | None: - if sys.platform != "win32": - return (0, 0) - filter_graph = FilterGraph() - try: - filter_graph.add_video_input_device(index) - # This can happen with virtual cameras throwing errors. - # For example since OBS 29.1 updated FFMPEG breaking VirtualCam 3.0 - # https://github.com/Toufool/AutoSplit/issues/238 - except COMError: - return None - - try: - resolution = filter_graph.get_input_device().get_current_format() - # For unknown reasons, some devices can raise "ValueError: NULL pointer access". - # For instance, Oh_DeeR's AVerMedia HD Capture C985 Bus 12 - except ValueError: - return None - finally: - filter_graph.remove_filters() - return resolution - - def get_all_video_capture_devices(): named_video_inputs = get_input_devices() diff --git a/src/utils.py b/src/utils.py index a1d90990..50d3f47f 100644 --- a/src/utils.py +++ b/src/utils.py @@ -18,11 +18,14 @@ if sys.platform == "win32": import ctypes import ctypes.wintypes + from _ctypes import COMError # noqa: PLC2701 # comtypes is untyped import win32gui import win32ui + from pygrabber.dshow_graph import FilterGraph from winsdk.windows.ai.machinelearning import LearningModelDevice, LearningModelDeviceKind from winsdk.windows.media.capture import MediaCapture + STARTUPINFO: TypeAlias = subprocess.STARTUPINFO else: STARTUPINFO: TypeAlias = None @@ -148,6 +151,30 @@ def get_window_bounds(hwnd: int) -> tuple[int, int, int, int]: return window_left_bounds, window_top_bounds, window_width, window_height +# Note: maybe reorganize capture_method module to have different helper modules and a methods submodule +def get_input_device_resolution(index: int) -> tuple[int, int] | None: + if sys.platform != "win32": + return (0, 0) + filter_graph = FilterGraph() + try: + filter_graph.add_video_input_device(index) + # This can happen with virtual cameras throwing errors. + # For example since OBS 29.1 updated FFMPEG breaking VirtualCam 3.0 + # https://github.com/Toufool/AutoSplit/issues/238 + except COMError: + return None + + try: + resolution = filter_graph.get_input_device().get_current_format() + # For unknown reasons, some devices can raise "ValueError: NULL pointer access". + # For instance, Oh_DeeR's AVerMedia HD Capture C985 Bus 12 + except ValueError: + return None + finally: + filter_graph.remove_filters() + return resolution + + def open_file(file_path: str | bytes | os.PathLike[str] | os.PathLike[bytes]): if sys.platform == "win32": os.startfile(file_path) # noqa: S606