Skip to content

Commit

Permalink
Updated mypy config
Browse files Browse the repository at this point in the history
  • Loading branch information
amenezes committed Dec 6, 2023
1 parent cc7ba3c commit 2b5b680
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 79 deletions.
47 changes: 26 additions & 21 deletions aiopytesseract/base_command.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import asyncio
import shlex
from asyncio.subprocess import Process
from collections import deque
from functools import singledispatch
from pathlib import Path
from typing import Any, List, Optional, Tuple
from typing import Any, List, Tuple, Union

from ._logger import logger
from .constants import (
Expand All @@ -17,7 +18,9 @@
from .validators import file_exists, language_is_valid, oem_is_valid, psm_is_valid


async def execute_cmd(cmd_args: str, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT):
async def execute_cmd(
cmd_args: str, timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT
) -> Process:
logger.debug(
f"aiopytesseract command: '{TESSERACT_CMD} {shlex.join(shlex.split(cmd_args))}'"
)
Expand All @@ -31,6 +34,8 @@ async def execute_cmd(cmd_args: str, timeout: float = AIOPYTESSERACT_DEFAULT_TIM
),
timeout=timeout,
)
if proc is None:
raise TesseractRuntimeError()
return proc


Expand All @@ -42,10 +47,10 @@ async def execute(
psm: int,
oem: int,
timeout: float,
lang: Optional[str] = None,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
lang: Union[None, str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
) -> bytes:
raise NotImplementedError

Expand All @@ -58,10 +63,10 @@ async def _(
psm: int,
oem: int,
timeout: float,
lang: Optional[str] = None,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
lang: Union[None, str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
) -> bytes:
await file_exists(image)
response: bytes = await execute(
Expand All @@ -84,13 +89,13 @@ async def _(
image: bytes,
output_format: str,
dpi: int,
lang: Optional[str],
lang: Union[None, str],
psm: int,
oem: int,
timeout: float,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> bytes:
cmd_args = await _build_cmd_args(
Expand Down Expand Up @@ -134,9 +139,9 @@ async def execute_multi_output_cmd(
psm: int,
oem: int,
timeout: float,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> Tuple[str, ...]:
cmd_args = await _build_cmd_args(
Expand Down Expand Up @@ -177,10 +182,10 @@ async def _build_cmd_args(
dpi: int,
psm: int,
oem: int,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
lang: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
lang: Union[None, str] = None,
output: str = "stdout",
) -> List[str]:
await asyncio.gather(psm_is_valid(psm), oem_is_valid(oem))
Expand Down
96 changes: 48 additions & 48 deletions aiopytesseract/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from contextlib import asynccontextmanager
from functools import singledispatch
from pathlib import Path
from typing import Any, AsyncGenerator, List, Optional, Tuple
from typing import Any, AsyncGenerator, List, Tuple, Union

import cattr
from aiofiles import tempfile
Expand Down Expand Up @@ -35,7 +35,7 @@ async def languages(
:param encoding: decode bytes to string. (default: utf-8)
"""
proc = await execute_cmd(f"--list-langs {config}")
data = await proc.stdout.read()
data = await proc.stdout.read() # type: ignore
langs = []
for line in data.decode(encoding).split():
lang = line.strip()
Expand All @@ -62,7 +62,7 @@ async def tesseract_version(encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING) ->
:param encoding: decode bytes to string. (default: utf-8)
"""
proc = await execute_cmd("--version")
data: bytes = await proc.stdout.readuntil()
data: bytes = await proc.stdout.readuntil() # type: ignore
return data.decode(encoding).split()[1]


Expand All @@ -80,7 +80,7 @@ async def confidence(
dpi: int = AIOPYTESSERACT_DEFAULT_DPI,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> float:
Expand Down Expand Up @@ -121,7 +121,7 @@ async def deskew(
dpi: int = AIOPYTESSERACT_DEFAULT_DPI,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> float:
Expand All @@ -140,7 +140,7 @@ async def deskew(
cmdline = f"--tessdata-dir {tessdata_dir} {cmdline}"
try:
proc = await execute_cmd(cmdline)
data = await asyncio.wait_for(proc.stderr.read(), timeout=timeout)
data = await asyncio.wait_for(proc.stderr.read(), timeout=timeout) # type: ignore
deskew_value = float(
re.search( # type: ignore
r"(Deskew.angle:.)(\d{1,10}.\d{1,10}$)",
Expand All @@ -165,7 +165,7 @@ async def tesseract_parameters(
:param encoding: decode bytes to string. (default: utf-8)
"""
proc = await execute_cmd("--print-parameters")
raw_data: bytes = await proc.stdout.read()
raw_data: bytes = await proc.stdout.read() # type: ignore
data = raw_data.decode(encoding)
params = []
for line in data.split("\n"):
Expand All @@ -182,9 +182,9 @@ async def tesseract_parameters(
@singledispatch
async def image_to_string(
image: Any,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
dpi: int = AIOPYTESSERACT_DEFAULT_DPI,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
Expand Down Expand Up @@ -214,9 +214,9 @@ async def _(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> str:
image_text: bytes = await execute(
Expand All @@ -242,9 +242,9 @@ async def _(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> str:
image_text: bytes = await execute(
Expand All @@ -265,9 +265,9 @@ async def _(
@singledispatch
async def image_to_hocr(
image: Any,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
dpi: int = AIOPYTESSERACT_DEFAULT_DPI,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
Expand All @@ -292,9 +292,9 @@ async def image_to_hocr(
@image_to_hocr.register(str)
async def _(
image: str,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
dpi: int = AIOPYTESSERACT_DEFAULT_DPI,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
Expand Down Expand Up @@ -325,9 +325,9 @@ async def _(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> str:
output: bytes = await execute(
Expand All @@ -353,9 +353,9 @@ async def image_to_pdf(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
) -> bytes:
"""Generate a searchable PDF from an image.
Expand All @@ -380,9 +380,9 @@ async def _(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
) -> bytes:
output: bytes = await execute(
image,
Expand All @@ -407,9 +407,9 @@ async def _(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
) -> bytes:
output: bytes = await execute(
image,
Expand All @@ -430,7 +430,7 @@ async def _(
async def image_to_boxes(
image: Any,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> List[Box]:
Expand All @@ -449,7 +449,7 @@ async def image_to_boxes(
async def _(
image: str,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> List[Box]:
Expand All @@ -464,7 +464,7 @@ async def _(
async def _(
image: str,
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> List[Box]:
Expand All @@ -475,7 +475,7 @@ async def _(
try:
proc = await execute_cmd(cmdline)
stdout, stderr = await asyncio.wait_for(
proc.communicate(image), timeout=timeout
proc.communicate(image), timeout=timeout # type: ignore
)
except asyncio.TimeoutError:
proc.kill()
Expand All @@ -486,7 +486,7 @@ async def _(
datalen = len(data.split("\n")) - 1
boxes = []
for line in data.split("\n")[:datalen]:
boxes.append(cattr.structure_attrs_fromtuple(line.split(), Box))
boxes.append(cattr.structure_attrs_fromtuple(tuple(line.split()), Box))
return boxes


Expand All @@ -497,7 +497,7 @@ async def image_to_data(
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
) -> List[Data]:
"""Information about boxes, confidences, line and page numbers.
Expand All @@ -520,7 +520,7 @@ async def _(
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
) -> List[Data]:
await file_exists(image)
Expand All @@ -537,7 +537,7 @@ async def _(
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
) -> List[Data]:
cmdline = f"stdin stdout -c tessedit_create_tsv=1 --dpi {dpi} -l {lang} --psm {psm}"
Expand Down Expand Up @@ -570,7 +570,7 @@ async def image_to_osd(
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
) -> OSD:
"""Information about orientation and script detection.
Expand All @@ -593,7 +593,7 @@ async def _(
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
) -> OSD:
await file_exists(image)
osd = await image_to_osd(
Expand All @@ -610,7 +610,7 @@ async def _(
lang: str = AIOPYTESSERACT_DEFAULT_LANGUAGE,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
tessdata_dir: Optional[str] = None,
tessdata_dir: Union[None, str] = None,
) -> OSD:
data = await execute(
image,
Expand Down Expand Up @@ -642,9 +642,9 @@ async def run(
psm: int = AIOPYTESSERACT_DEFAULT_PSM,
oem: int = AIOPYTESSERACT_DEFAULT_OEM,
timeout: float = AIOPYTESSERACT_DEFAULT_TIMEOUT,
user_words: Optional[str] = None,
user_patterns: Optional[str] = None,
tessdata_dir: Optional[str] = None,
user_words: Union[None, str] = None,
user_patterns: Union[None, str] = None,
tessdata_dir: Union[None, str] = None,
encoding: str = AIOPYTESSERACT_DEFAULT_ENCODING,
) -> AsyncGenerator[Tuple[str, ...], None]:
"""Run Tesseract-OCR with multiple analysis.
Expand Down
Loading

0 comments on commit 2b5b680

Please sign in to comment.