codespell-project · larsoner · Aug 8, 2023 · Aug 1, 2023 · Jul 28, 2023
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -36,6 +36,9 @@
     "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
     "\\b[\\w.%+-]+@[\\w.-]+\\b)"
 )
+# Pass all misspellings through this translation table to generate
+# alternative misspellings and fixes.
+alt_chars = (("'", "’"),)
 encodings = ("utf-8", "iso-8859-1")
 USAGE = """
 \t%prog [OPTIONS] [file1 file2 ... fileN]
@@ -622,31 +625,46 @@ def build_ignore_words(filename: str, ignore_words: Set[str]) -> None:
             ignore_words.add(line.strip())
 
 
+def add_misspelling(
+    key: str,
+    data: str,
+    misspellings: Dict[str, Misspelling],
+) -> None:
+    data = data.strip()
+
+    if "," in data:
+        fix = False
+        data, reason = data.rsplit(",", 1)
+        reason = reason.lstrip()
+    else:
+        fix = True
+        reason = ""
+
+    misspellings[key] = Misspelling(data, fix, reason)
+
+
 def build_dict(
     filename: str,
     misspellings: Dict[str, Misspelling],
     ignore_words: Set[str],
 ) -> None:
     with open(filename, encoding="utf-8") as f:
+        translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
         for line in f:
             [key, data] = line.split("->")
             # TODO for now, convert both to lower. Someday we can maybe add
             # support for fixing caps.
             key = key.lower()
             data = data.lower()
-            if key in ignore_words:
-                continue
-            data = data.strip()
-
-            if "," in data:
-                fix = False
-                data, reason = data.rsplit(",", 1)
-                reason = reason.lstrip()
-            else:
-                fix = True
-                reason = ""
-
-            misspellings[key] = Misspelling(data, fix, reason)
+            if key not in ignore_words:
+                add_misspelling(key, data, misspellings)
+            # generate alternative misspellings/fixes
+            for x, table in translate_tables:
+                if x in key:
+                    alt_key = key.translate(table)
+                    alt_data = data.translate(table)
+                    if alt_key not in ignore_words:
+                        add_misspelling(alt_key, alt_data, misspellings)
 
 
 def is_hidden(filename: str, check_hidden: bool) -> bool:

diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
@@ -168,6 +168,12 @@ def test_default_word_parsing(
         f.write("`abandonned`\n")
     assert cs.main(fname) == 1, "bad"
 
+    fname = tmp_path / "apostrophe"
+    fname.write_text("woudn't\n", encoding="utf-8")  # U+0027 (')
+    assert cs.main(fname) == 1, "misspelling containing typewriter apostrophe U+0027"
+    fname.write_text("woudn’t\n", encoding="utf-8")  # U+2019 (’)
+    assert cs.main(fname) == 1, "misspelling containing typographic apostrophe U+2019"
+
 
 def test_bad_glob(
     tmp_path: Path,