From 2b5b6805d05ef9a495135f9384a7445aff52a046 Mon Sep 17 00:00:00 2001 From: Alexandre Menezes Date: Wed, 6 Dec 2023 18:30:52 -0300 Subject: [PATCH] Updated mypy config --- aiopytesseract/base_command.py | 47 ++++++++------- aiopytesseract/commands.py | 96 +++++++++++++++---------------- aiopytesseract/exceptions.py | 16 +++--- aiopytesseract/models/__init__.py | 2 + aiopytesseract/returncode.py | 4 +- setup.cfg | 13 ++++- 6 files changed, 99 insertions(+), 79 deletions(-) diff --git a/aiopytesseract/base_command.py b/aiopytesseract/base_command.py index 7054f93..7036400 100644 --- a/aiopytesseract/base_command.py +++ b/aiopytesseract/base_command.py @@ -1,9 +1,10 @@ import asyncio import shlex +from asyncio.subprocess import Process from collections import deque from functools import singledispatch from pathlib import Path -from typing import Any, List, Optional, Tuple +from typing import Any, List, Tuple, Union from ._logger import logger from .constants import ( @@ -17,7 +18,9 @@ from .validators import file_exists, language_is_valid, oem_is_valid, psm_is_valid -async def execute_cmd(cmd_args: str, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT): +async def execute_cmd( + cmd_args: str, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT +) -> Process: logger.debug( f"aiopytesseract command: '{TESSERACT_CMD} {shlex.join(shlex.split(cmd_args))}'" ) @@ -31,6 +34,8 @@ async def execute_cmd(cmd_args: str, timeout: float = AIOPYTESSERACT_DEFAULT_TIM ), timeout=timeout, ) + if proc is None: + raise TesseractRuntimeError() return proc @@ -42,10 +47,10 @@ async def execute( psm: int, oem: int, timeout: float, - lang: Optional[str] = None, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + lang: Union[None, str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, ) -> bytes: raise NotImplementedError @@ -58,10 +63,10 @@ async def _( psm: int, oem: int, timeout: float, - lang: Optional[str] = None, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + lang: Union[None, str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, ) -> bytes: await file_exists(image) response: bytes = await execute( @@ -84,13 +89,13 @@ async def _( image: bytes, output_format: str, dpi: int, - lang: Optional[str], + lang: Union[None, str], psm: int, oem: int, timeout: float, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> bytes: cmd_args = await _build_cmd_args( @@ -134,9 +139,9 @@ async def execute_multi_output_cmd( psm: int, oem: int, timeout: float, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> Tuple[str, ...]: cmd_args = await _build_cmd_args( @@ -177,10 +182,10 @@ async def _build_cmd_args( dpi: int, psm: int, oem: int, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, - lang: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, + lang: Union[None, str] = None, output: str = "stdout", ) -> List[str]: await asyncio.gather(psm_is_valid(psm), oem_is_valid(oem)) diff --git a/aiopytesseract/commands.py b/aiopytesseract/commands.py index cbf9ed0..8f1ebee 100644 --- a/aiopytesseract/commands.py +++ b/aiopytesseract/commands.py @@ -3,7 +3,7 @@ from contextlib import asynccontextmanager from functools import singledispatch from pathlib import Path -from typing import Any, AsyncGenerator, List, Optional, Tuple +from typing import Any, AsyncGenerator, List, Tuple, Union import cattr from aiofiles import tempfile @@ -35,7 +35,7 @@ async def languages( :param encoding: decode bytes to string. (default: utf-8) """ proc = await execute_cmd(f"--list-langs {config}") - data = await proc.stdout.read() + data = await proc.stdout.read() # type: ignore langs = [] for line in data.decode(encoding).split(): lang = line.strip() @@ -62,7 +62,7 @@ async def tesseract_version(encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING) -> :param encoding: decode bytes to string. (default: utf-8) """ proc = await execute_cmd("--version") - data: bytes = await proc.stdout.readuntil() + data: bytes = await proc.stdout.readuntil() # type: ignore return data.decode(encoding).split()[1] @@ -80,7 +80,7 @@ async def confidence( dpi: int = AIOPYTESSERACT_DEFAULT_DPI, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, oem: int = AIOPYTESSERACT_DEFAULT_OEM, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> float: @@ -121,7 +121,7 @@ async def deskew( dpi: int = AIOPYTESSERACT_DEFAULT_DPI, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, oem: int = AIOPYTESSERACT_DEFAULT_OEM, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> float: @@ -140,7 +140,7 @@ async def deskew( cmdline = f"--tessdata-dir {tessdata_dir} {cmdline}" try: proc = await execute_cmd(cmdline) - data = await asyncio.wait_for(proc.stderr.read(), timeout=timeout) + data = await asyncio.wait_for(proc.stderr.read(), timeout=timeout) # type: ignore deskew_value = float( re.search( # type: ignore r"(Deskew.angle:.)(\d{1,10}.\d{1,10}$)", @@ -165,7 +165,7 @@ async def tesseract_parameters( :param encoding: decode bytes to string. (default: utf-8) """ proc = await execute_cmd("--print-parameters") - raw_data: bytes = await proc.stdout.read() + raw_data: bytes = await proc.stdout.read() # type: ignore data = raw_data.decode(encoding) params = [] for line in data.split("\n"): @@ -182,9 +182,9 @@ async def tesseract_parameters( @singledispatch async def image_to_string( image: Any, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, dpi: int = AIOPYTESSERACT_DEFAULT_DPI, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, psm: int = AIOPYTESSERACT_DEFAULT_PSM, @@ -214,9 +214,9 @@ async def _( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> str: image_text: bytes = await execute( @@ -242,9 +242,9 @@ async def _( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> str: image_text: bytes = await execute( @@ -265,9 +265,9 @@ async def _( @singledispatch async def image_to_hocr( image: Any, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, dpi: int = AIOPYTESSERACT_DEFAULT_DPI, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, psm: int = AIOPYTESSERACT_DEFAULT_PSM, @@ -292,9 +292,9 @@ async def image_to_hocr( @image_to_hocr.register(str) async def _( image: str, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, dpi: int = AIOPYTESSERACT_DEFAULT_DPI, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, psm: int = AIOPYTESSERACT_DEFAULT_PSM, @@ -325,9 +325,9 @@ async def _( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> str: output: bytes = await execute( @@ -353,9 +353,9 @@ async def image_to_pdf( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, ) -> bytes: """Generate a searchable PDF from an image. @@ -380,9 +380,9 @@ async def _( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, ) -> bytes: output: bytes = await execute( image, @@ -407,9 +407,9 @@ async def _( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, ) -> bytes: output: bytes = await execute( image, @@ -430,7 +430,7 @@ async def _( async def image_to_boxes( image: Any, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> List[Box]: @@ -449,7 +449,7 @@ async def image_to_boxes( async def _( image: str, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> List[Box]: @@ -464,7 +464,7 @@ async def _( async def _( image: str, lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> List[Box]: @@ -475,7 +475,7 @@ async def _( try: proc = await execute_cmd(cmdline) stdout, stderr = await asyncio.wait_for( - proc.communicate(image), timeout=timeout + proc.communicate(image), timeout=timeout # type: ignore ) except asyncio.TimeoutError: proc.kill() @@ -486,7 +486,7 @@ async def _( datalen = len(data.split("\n")) - 1 boxes = [] for line in data.split("\n")[:datalen]: - boxes.append(cattr.structure_attrs_fromtuple(line.split(), Box)) + boxes.append(cattr.structure_attrs_fromtuple(tuple(line.split()), Box)) return boxes @@ -497,7 +497,7 @@ async def image_to_data( lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, psm: int = AIOPYTESSERACT_DEFAULT_PSM, ) -> List[Data]: """Information about boxes, confidences, line and page numbers. @@ -520,7 +520,7 @@ async def _( lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, psm: int = AIOPYTESSERACT_DEFAULT_PSM, ) -> List[Data]: await file_exists(image) @@ -537,7 +537,7 @@ async def _( lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, psm: int = AIOPYTESSERACT_DEFAULT_PSM, ) -> List[Data]: cmdline = f"stdin stdout -c tessedit_create_tsv=1 --dpi {dpi} -l {lang} --psm {psm}" @@ -570,7 +570,7 @@ async def image_to_osd( lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, ) -> OSD: """Information about orientation and script detection. @@ -593,7 +593,7 @@ async def _( lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, ) -> OSD: await file_exists(image) osd = await image_to_osd( @@ -610,7 +610,7 @@ async def _( lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, - tessdata_dir: Optional[str] = None, + tessdata_dir: Union[None, str] = None, ) -> OSD: data = await execute( image, @@ -642,9 +642,9 @@ async def run( psm: int = AIOPYTESSERACT_DEFAULT_PSM, oem: int = AIOPYTESSERACT_DEFAULT_OEM, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT, - user_words: Optional[str] = None, - user_patterns: Optional[str] = None, - tessdata_dir: Optional[str] = None, + user_words: Union[None, str] = None, + user_patterns: Union[None, str] = None, + tessdata_dir: Union[None, str] = None, encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING, ) -> AsyncGenerator[Tuple[str, ...], None]: """Run Tesseract-OCR with multiple analysis. diff --git a/aiopytesseract/exceptions.py b/aiopytesseract/exceptions.py index 07be2d9..e5925e8 100644 --- a/aiopytesseract/exceptions.py +++ b/aiopytesseract/exceptions.py @@ -1,30 +1,32 @@ class TesseractError(Exception): """Base exception for tesseract""" - def __init__(self, message: str = "Tesseract Error"): + def __init__(self, message: str = "Tesseract Error") -> None: self.message = message - def __str__(self): + def __str__(self) -> str: return self.message class PSMInvalidException(TesseractError): - def __init__(self, message="PSM value must be in the range [0 - 13]"): + def __init__( + self, message: str = "PSM value must be in the range [0 - 13]" + ) -> None: super().__init__(message) class OEMInvalidException(TesseractError): - def __init__(self, message="OEM value must be in the range [0 - 3]"): + def __init__(self, message: str = "OEM value must be in the range [0 - 3]") -> None: super().__init__(message) class NoSuchFileException(TesseractError): - def __init__(self, message="No such file"): + def __init__(self, message: str = "No such file") -> None: super().__init__(message) class LanguageInvalidException(TesseractError): - def __init__(self, message="Language invalid"): + def __init__(self, message: str = "Language invalid") -> None: super().__init__(message) @@ -33,5 +35,5 @@ class TesseractRuntimeError(TesseractError): class TesseractTimeoutError(TesseractRuntimeError): - def __init__(self, message="Tesseract process timeout"): + def __init__(self, message: str = "Tesseract process timeout") -> None: super().__init__(message) diff --git a/aiopytesseract/models/__init__.py b/aiopytesseract/models/__init__.py index ec95e34..e58dfad 100644 --- a/aiopytesseract/models/__init__.py +++ b/aiopytesseract/models/__init__.py @@ -2,3 +2,5 @@ from .data import Data from .osd import OSD from .parameter import Parameter + +__all__ = ["Box", "Data", "OSD", "Parameter"] diff --git a/aiopytesseract/returncode.py b/aiopytesseract/returncode.py index 69c30c5..d1ee033 100644 --- a/aiopytesseract/returncode.py +++ b/aiopytesseract/returncode.py @@ -3,5 +3,5 @@ @unique class ReturnCode(IntEnum): - SUCCESS = 0 - FAILED = 1 + SUCCESS: int = 0 + FAILED: int = 1 diff --git a/setup.cfg b/setup.cfg index 4920766..0f257c2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,6 @@ platform=linux files = aiopytesseract show_error_context = True verbosity = 0 -ignore_missing_imports = True no_implicit_optional = True warn_unused_configs = True @@ -78,6 +77,18 @@ warn_return_any = True warn_unused_ignores = True warn_unreachable = True +check_untyped_defs = True +strict_equality = True +strict_concatenate = True +no_implicit_reexport = True + +disallow_untyped_defs = True +disallow_untyped_calls = True +disallow_incomplete_defs = True +disallow_subclassing_any = True +disallow_untyped_decorators = True +disallow_any_generics = True + [tox:tox] envlist = py{38,39,310,311,312},pypy{3.8,3.9}