Skip to content

Commit

Permalink
add correct_typos.py as a replacement for preprocess --corr=...
Browse files Browse the repository at this point in the history
  • Loading branch information
Phaqui committed Feb 23, 2024
1 parent d01ce30 commit 2056b6c
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions scripts/correct_typos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Read a corrections file (path as first argument to the script)
and then read line by line from standard input, and substitute each
word with the correct one from the corrections file."""
from sys import stdin, argv

if len(argv) <= 1:
exit("usage: python {argv[0]} <correction_file>")


def read_corrections_file(path):
lookups = {}
with open(path, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
try:
wrong, right = line.split("\t")
except ValueError:
pass
else:
lookups[wrong] = right
return lookups


def main():
correction_file = argv[1]
corrections = read_corrections_file(correction_file)

for line in stdin.readlines():
line = line.strip()
if not line:
continue
print(corrections.get(line, line))


if __name__ == "__main__":
raise SystemExit(main())

0 comments on commit 2056b6c

Please sign in to comment.