Skip to content

Commit

Permalink
Merge pull request #22 from deep-spin/update-metrics-with-lang-args
Browse files Browse the repository at this point in the history
Update the metrics with language in their arguments
  • Loading branch information
zeppombal authored Jul 17, 2024
2 parents 39857fa + 1e71153 commit d951257
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 22 deletions.
8 changes: 4 additions & 4 deletions tower_eval/metrics/errant/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ def run(
self,
hypothesis_path,
gold_data_path,
references,
language: str,
tokenize_source: bool = False,
tokenize_hypothesis: bool = False,
**kwargs
) -> dict:
hypothesis_m2 = self.preprocess(hypothesis_path, gold_data_path, language)
language = kwargs["lp"]["src_lang"]
references = kwargs["references_m2"]
hypothesis_m2 = self.preprocess(hypothesis_path, gold_data_path, language, tokenize_source, tokenize_hypothesis)
result = self.evaluate(
hypothesis_m2, references, language, tokenize_source, tokenize_hypothesis
hypothesis_m2, references
)
result.print_result(self.metric_name())
return result.format_result(self.metric_name())
Expand Down
17 changes: 11 additions & 6 deletions tower_eval/metrics/f1_sequence/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,17 @@ def run(
valid_ner_tags: list[str] = None,
**kwargs,
) -> dict:
language = kwargs["lp"]["src_lang"]
hypothesis = self._load_samples(
hypothesis_path,
language=language,
format=hypothesis_format,
tokenize=tokenize_hypothesis,
default_noent_tag=default_noent_tag
)
hypothesis = self.filter_tags(hypothesis, valid_ner_tags, default_noent_tag)
reference = self._load_samples(
gold_data_path, format=reference_format, tokenize=False
gold_data_path, language=language, format=reference_format, tokenize=False, default_noent_tag=default_noent_tag
)
reference = self.filter_tags(reference, valid_ner_tags, default_noent_tag)

Expand All @@ -67,7 +70,7 @@ def evaluate(
) -> F1SequenceResult:
# evaluate by tag
f1s_by_tag = {}
for tag in self.valid_ner_tags:
for tag in valid_ner_tags:
filtered_hypothesis = self.filter_tags(hypothesis, tag, default_noent_tag)
filtered_reference = self.filter_tags(reference, tag, default_noent_tag)
true_seqs, pred_seqs = align_hyp_ref(
Expand Down Expand Up @@ -101,9 +104,11 @@ def process_result(self, result) -> MetricResult:
def _load_samples(
self,
filename: str,
language: str,
format: Literal["text", "tsv", "xml"] = "text",
separator="|",
tokenize: bool = False,
default_noent_tag: str = "O"
) -> List[List[str]]:
""" "
It reads the labeled file, and returns only the tags.
Expand All @@ -119,13 +124,13 @@ def _load_samples(
if tokenize:
input_lines = read_lines(filename)
input_lines = tokenize_spacy(
lines=input_lines, language=self.language, keep_xml=True
lines=input_lines, language=language, keep_xml=True
)
else:
with open(filename, "r", encoding="utf8") as ifh:
input_lines = ifh.readlines()
input_lines = [line.strip() for line in input_lines]
labels = [self.xml2iob(hyp) for hyp in input_lines]
labels = [self.xml2iob(hyp, default_noent_tag) for hyp in input_lines]
elif format == "tsv":
separator = "\t"
with open(filename, "r", encoding="utf8") as infh:
Expand Down Expand Up @@ -166,11 +171,11 @@ def list_of_tuples_to_tokens(self, line):
tokens = [token[1] for token in list_of_tuples]
return tokens

def xml2iob(self, xml_string):
def xml2iob(self, xml_string, default_noent_tag):
tokens = xml_string.split()
annotations = []
# tag is set to "O" (self.noent_tag) by default
tag = self.default_noent_tag
tag = default_noent_tag
for token in tokens:
matching_open_tag = re.search(PATTERN_OPEN_TAG, token)
matching_close_tag = re.search(PATTERN_CLOSE_TAG, token)
Expand Down
9 changes: 4 additions & 5 deletions tower_eval/tasks/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,11 @@ def generate(i: int, config_path: str, config_type: str, available_models: dict=
f"Running inference for task: <yellow> {task_name} </yellow>, subtask: <green> {subtask} </green> with model: <red> {model_type}/{model_name} </red> saving to: <red> {output_dir} </red>"
)

if task_name in ["mt", "ape"]:
lp = subtask.split(".")[-1]
src_lang, tgt_lang = get_langs(lp)
lp = subtask.split(".")[-1]
src_lang, tgt_lang = get_langs(lp)

model.source_language = src_lang
model.target_language = tgt_lang
model.source_language = src_lang
model.target_language = tgt_lang
model.generation_with_resume(
input_file=input_file,
output_file=output_file,
Expand Down
21 changes: 14 additions & 7 deletions tower_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,9 @@ def get_eval_args_given_task(
)
gold_data_path = data_dir / task_name / subtask / "test.jsonl"
# add language argument to eval_args as it is needed in some metrics
language = subtask.split(".")[1]
if "-" in language:
_, language = get_langs(language)
eval_args["language"] = language
lp = subtask.split(".")[1]
src_lang, trg_lang = get_langs(lp)
eval_args["lp"] = {"src_lang": src_lang, "trg_lang": trg_lang}

return hypothesis_path, gold_data_path, eval_args

Expand Down Expand Up @@ -433,9 +432,17 @@ def get_langs(lp):
lang_pattern = "|".join(valid_langs)
lp_pattern = rf"^({lang_pattern})-({lang_pattern})$"
match = re.match(lp_pattern, lp)
src_lang = match.group(1)
trg_lang = match.group(2)
return src_lang, trg_lang
if match:
# We have a language pair, so both src_lang and trg_lang will be extracted from lp
src_lang = match.group(1)
trg_lang = match.group(2)
return src_lang, trg_lang
elif lp in valid_langs:
# the task is monolingual, hence we only have one language. So, we set the src_lang only. trg_lang will be set to None
src_lang = lp
trg_lang = None
return src_lang, trg_lang
return None, None


def add_average_generation_time(
Expand Down

0 comments on commit d951257

Please sign in to comment.