Skip to content

Commit

Permalink
SpellingConversionJob: allow Path and str to avoid breaking hashes. (#…
Browse files Browse the repository at this point in the history
…505)

* Allow tk.Path as input type for conversion spelling mapping files.
  • Loading branch information
sarahberanek authored and Marvin84 committed Apr 30, 2024
1 parent a4bde9f commit f1b3c09
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions lexicon/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os.path
import re
from typing import List, Optional, Tuple, Union
import xml.dom.minidom
import xml.etree.ElementTree as ET

Expand Down Expand Up @@ -308,18 +309,18 @@ class SpellingConversionJob(Job):

def __init__(
self,
bliss_lexicon,
orth_mapping_file,
mapping_file_delimiter=" ",
mapping_rules=None,
invert_mapping=False,
keep_original_target_lemmas=False,
bliss_lexicon: tk.Path,
orth_mapping_file: Union[str, tk.Path],
mapping_file_delimiter: str = " ",
mapping_rules: Optional[List[Tuple[str, str, str]]] = None,
invert_mapping: bool = False,
keep_original_target_lemmas: bool = False,
):
"""
:param Path bliss_lexicon:
input lexicon, whose lemmata all have unique PRIMARY orth
to reach the above requirements apply LexiconUniqueOrthJob
:param str orth_mapping_file:
:param str|tk.Path orth_mapping_file:
orthography mapping file: *.json *.json.gz *.txt *.gz
in case of plain text file
one can adjust mapping_delimiter
Expand Down Expand Up @@ -376,16 +377,16 @@ def _lemma_to_str(lemma, description):

def run(self):
# load mapping from json or plain text file
is_json = self.orth_mapping_file.endswith(".json")
is_json |= self.orth_mapping_file.endswith(".json.gz")
orth_map_file_str = tk.uncached_path(self.orth_mapping_file)
is_json = orth_map_file_str.endswith(".json") | orth_map_file_str.endswith(".json.gz")
if is_json:
with uopen(self.orth_mapping_file, "rt") as f:
with uopen(orth_map_file_str, "rt") as f:
mapping = json.load(f)
if self.invert_mapping:
mapping = {v: k for k, v in mapping.items()}
else:
mapping = dict()
with uopen(self.orth_mapping_file, "rt") as f:
with uopen(orth_map_file_str, "rt") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
Expand Down

0 comments on commit f1b3c09

Please sign in to comment.