diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index da9cbcafb6..32aea6bbe1 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -59,7 +59,6 @@ "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|" "\\b[\\w.%+-]+@[\\w.-]+\\b)" ) -inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P[\w,]*))?") USAGE = """ \t%prog [OPTIONS] [file1 file2 ... fileN] """ @@ -952,20 +951,10 @@ def parse_file( if not line or line in exclude_lines: continue - extra_words_to_ignore = set() - match = inline_ignore_regex.search(line) - if match: - extra_words_to_ignore = set( - filter(None, (match.group("words") or "").split(",")) - ) - if not extra_words_to_ignore: - continue - fixed_words = set() asked_for = set() - issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore) - for issue in issues: + for issue in spellchecker.spellcheck_line(line, line_tokenizer): misspelling = issue.misspelling word = issue.word lword = issue.lword diff --git a/codespell_lib/spellchecker.py b/codespell_lib/spellchecker.py index 9d1c5398d6..f1ad6885b6 100644 --- a/codespell_lib/spellchecker.py +++ b/codespell_lib/spellchecker.py @@ -21,6 +21,7 @@ from typing import ( Container, Dict, + FrozenSet, Generic, Iterable, Optional, @@ -108,6 +109,8 @@ _builtin_default_as_tuple = tuple(_builtin_default.split(",")) +_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P[\w,]*))?") + class UnknownBuiltinDictionaryError(ValueError): def __init__(self, name: str) -> None: @@ -173,12 +176,21 @@ def __init__(self) -> None: self._misspellings: Dict[str, Misspelling] = {} self.ignore_words_cased: Container[str] = frozenset() + def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]: + inline_ignore_match = _inline_ignore_regex.search(line) + if inline_ignore_match: + words = frozenset( + filter(None, (inline_ignore_match.group("words") or "").split(",")) + ) + return words if words else None + return frozenset() + def spellcheck_line( self, line: str, tokenizer: LineTokenizer[T_co], *, - extra_words_to_ignore: Container[str] = frozenset() + respect_inline_ignore: bool = True, ) -> Iterable[DetectedMisspelling[T_co]]: """Tokenize and spellcheck a line @@ -187,12 +199,19 @@ def spellcheck_line( :param line: The line to spellcheck. :param tokenizer: A callable that will tokenize the line - :param extra_words_to_ignore: Extra words to ignore for this particular line - (such as content from a `codespell:ignore` comment) + :param respect_inline_ignore: Whether to check the line for + `codespell:ignore` instructions + :returns: An iterable of discovered typos. """ misspellings = self._misspellings ignore_words_cased = self.ignore_words_cased + extra_words_to_ignore = ( + self._parse_inline_ignore(line) if respect_inline_ignore else frozenset() + ) + if extra_words_to_ignore is None: + return + for token in tokenizer(line): word = token.group() if word in ignore_words_cased: