Skip to content

Commit

Permalink
Bump hardrules requirement, fix compat issues
Browse files Browse the repository at this point in the history
  • Loading branch information
mbanon committed Feb 9, 2024
1 parent 386baf4 commit 2b728c1
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Bicleaner 0.17.4:
* Bump bicleaner-hardrules requirement to 2.10.3 and fix compatibility issues.

Bicleaner 0.17.3:
* Bump bicleaner-hardrules requirement to 2.8.1 to avoid hunspell installation issues.

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "bicleaner"
version = "0.17.3"
version = "0.17.4"
description = "Parallel corpus classifier, indicating the likelihood of a pair of sentences being mutual translations or not"
readme = "README.md"
maintainers = [
Expand All @@ -19,7 +19,7 @@ dependencies = [ "regex",
"toolwrapper>=0.4.1,<=2.1.0",
"joblib",
"sacremoses==0.0.53",
"bicleaner-hardrules==2.8.1",
"bicleaner-hardrules==2.10.3",
]
classifiers = [ "Environment :: Console",
"Intended Audience :: Science/Research",
Expand Down
16 changes: 8 additions & 8 deletions src/bicleaner/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def load_metadata(args, parser):
try:
# Load YAML
metadata_yaml = yaml.safe_load(args.metadata)
yamlpath = os.path.dirname(os.path.abspath(args.metadata.name))
metadata_yaml["yamlpath"] = yamlpath
yamldir = os.path.dirname(os.path.abspath(args.metadata.name))
metadata_yaml["yamldir"] = yamldir

# Read language pair and tokenizers
args.source_lang=metadata_yaml["source_lang"]
Expand All @@ -86,32 +86,32 @@ def load_metadata(args, parser):

# Load classifier
try:
args.clf=joblib.load( os.path.join( yamlpath , metadata_yaml["classifier"]))
args.clf=joblib.load( os.path.join( yamldir , metadata_yaml["classifier"]))
except:
args.clf=joblib.load(metadata_yaml["classifier"])
args.clf.n_jobs = 1
args.classifier_type=metadata_yaml["classifier_type"]

# Load probabilistic dictionaries
try:
args.dict_sl_tl = ProbabilisticDictionary( os.path.join(yamlpath , metadata_yaml["source_dictionary"]))
args.dict_sl_tl = ProbabilisticDictionary( os.path.join(yamldir , metadata_yaml["source_dictionary"]))
except:
args.dict_sl_tl = ProbabilisticDictionary(metadata_yaml["source_dictionary"])
try:
args.dict_tl_sl = ProbabilisticDictionary( os.path.join(yamlpath , metadata_yaml["target_dictionary"]))
args.dict_tl_sl = ProbabilisticDictionary( os.path.join(yamldir , metadata_yaml["target_dictionary"]))
except:
args.dict_tl_sl = ProbabilisticDictionary(metadata_yaml["target_dictionary"])

# Load wordfreqs
try:
args.sl_word_freqs = WordZipfFreqDist( os.path.join( yamlpath, metadata_yaml["source_word_freqs"]))
args.sl_word_freqs = WordZipfFreqDist( os.path.join( yamldir, metadata_yaml["source_word_freqs"]))
except:
try:
args.sl_word_freqs = WordZipfFreqDist(metadata_yaml["source_word_freqs"])
except:
args.sl_word_freqs = None
try:
args.tl_word_freqs = WordZipfFreqDist( os.path.join( yamlpath , metadata_yaml["target_word_freqs"]))
args.tl_word_freqs = WordZipfFreqDist( os.path.join( yamldir , metadata_yaml["target_word_freqs"]))
except:
try:
args.tl_word_freqs = WordZipfFreqDist(metadata_yaml["target_word_freqs"])
Expand Down Expand Up @@ -153,7 +153,7 @@ def load_metadata(args, parser):
logging.warning("Porn removal not present in metadata, disabling.")
else:
try:
args.porn_removal = fasttext.load_model(os.path.join(yamlpath, metadata_yaml['porn_removal_file']))
args.porn_removal = fasttext.load_model(os.path.join(yamldir, metadata_yaml['porn_removal_file']))
except:
args.porn_removal = fasttext.load_model(args.metadata_yaml['porn_removal_file'])
else:
Expand Down

0 comments on commit 2b728c1

Please sign in to comment.