From 431049a62e84a9dfcb285ad0062f4a1bf27b7bd6 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Fri, 22 Nov 2024 16:18:34 -0800 Subject: [PATCH] Decide whether or not to use lemma_classifier based on charlm args, if lemma_classifier is not specifically set. Pass along the charlm args to the lemma classifier as well --- stanza/utils/training/run_lemma.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/stanza/utils/training/run_lemma.py b/stanza/utils/training/run_lemma.py index 431183adef..2af026b7ea 100644 --- a/stanza/utils/training/run_lemma.py +++ b/stanza/utils/training/run_lemma.py @@ -34,8 +34,10 @@ def add_lemma_args(parser): add_charlm_args(parser) - parser.add_argument('--no_lemma_classifier', dest='lemma_classifier', action='store_false', default=True, - help="Don't use the lemma classifier datasets. Default is to build lemma classifier as part of the original lemmatizer") + parser.add_argument('--lemma_classifier', dest='lemma_classifier', action='store_true', default=None, + help="Don't use the lemma classifier datasets. Default is to build lemma classifier as part of the original lemmatizer if the charlm is used") + parser.add_argument('--no_lemma_classifier', dest='lemma_classifier', action='store_false', + help="Don't use the lemma classifier datasets. Default is to build lemma classifier as part of the original lemmatizer if the charlm is used") def build_model_filename(paths, short_name, command_args, extra_args): """ @@ -148,10 +150,13 @@ def run_treebank(mode, paths, treebank, short_name, logger.info("Running test lemmatizer for {} with args {}".format(treebank, test_args)) lemmatizer.main(test_args) - use_lemma_classifier = command_args.lemma_classifier and short_name in prepare_lemma_classifier.DATASET_MAPPING + use_lemma_classifier = command_args.lemma_classifier + if use_lemma_classifier is None: + use_lemma_classifier = command_args.charlm is not None + use_lemma_classifier = use_lemma_classifier and short_name in prepare_lemma_classifier.DATASET_MAPPING if use_lemma_classifier and mode == Mode.TRAIN: - # TODO: pass along charlm args - lemma_classifier_args = [treebank] + lc_charlm_args = ['--no_charlm'] if command_args.charlm is None else ['--charlm', command_args.charlm] + lemma_classifier_args = [treebank] + lc_charlm_args if command_args.force: lemma_classifier_args.append('--force') run_lemma_classifier.main(lemma_classifier_args)