Skip to content

Commit

Permalink
Move codespell:ignore check into Spellchecker
Browse files Browse the repository at this point in the history
This makes the API automatically avoid some declared false-positives
that the command line tool would also filter.
  • Loading branch information
nthykier committed May 25, 2024
1 parent c4d1738 commit 3c08c9b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 15 deletions.
13 changes: 1 addition & 12 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
"\\b[\\w.%+-]+@[\\w.-]+\\b)"
)
inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
"""
Expand Down Expand Up @@ -952,20 +951,10 @@ def parse_file(
if not line or line in exclude_lines:
continue

extra_words_to_ignore = set()
match = inline_ignore_regex.search(line)
if match:
extra_words_to_ignore = set(
filter(None, (match.group("words") or "").split(","))
)
if not extra_words_to_ignore:
continue

fixed_words = set()
asked_for = set()

issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore)
for issue in issues:
for issue in spellchecker.spellcheck_line(line, line_tokenizer):
misspelling = issue.misspelling
word = issue.word
lword = issue.lword
Expand Down
25 changes: 22 additions & 3 deletions codespell_lib/spellchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import (
Container,
Dict,
FrozenSet,
Generic,
Iterable,
Optional,
Expand Down Expand Up @@ -108,6 +109,8 @@

_builtin_default_as_tuple = tuple(_builtin_default.split(","))

_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")


class UnknownBuiltinDictionaryError(ValueError):
def __init__(self, name: str) -> None:
Expand Down Expand Up @@ -173,12 +176,21 @@ def __init__(self) -> None:
self._misspellings: Dict[str, Misspelling] = {}
self.ignore_words_cased: Container[str] = frozenset()

def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]:
inline_ignore_match = _inline_ignore_regex.search(line)
if inline_ignore_match:
words = frozenset(
filter(None, (inline_ignore_match.group("words") or "").split(","))
)
return words if words else None
return frozenset()

def spellcheck_line(
self,
line: str,
tokenizer: LineTokenizer[T_co],
*,
extra_words_to_ignore: Container[str] = frozenset()
respect_inline_ignore: bool = True,
) -> Iterable[DetectedMisspelling[T_co]]:
"""Tokenize and spellcheck a line
Expand All @@ -187,12 +199,19 @@ def spellcheck_line(
:param line: The line to spellcheck.
:param tokenizer: A callable that will tokenize the line
:param extra_words_to_ignore: Extra words to ignore for this particular line
(such as content from a `codespell:ignore` comment)
:param respect_inline_ignore: Whether to check the line for
`codespell:ignore` instructions
:returns: An iterable of discovered typos.
"""
misspellings = self._misspellings
ignore_words_cased = self.ignore_words_cased

extra_words_to_ignore = (
self._parse_inline_ignore(line) if respect_inline_ignore else frozenset()
)
if extra_words_to_ignore is None:
return

for token in tokenizer(line):
word = token.group()
if word in ignore_words_cased:
Expand Down

0 comments on commit 3c08c9b

Please sign in to comment.