Skip to content

Commit

Permalink
Drop optional n-best header in CTM (#542)
Browse files Browse the repository at this point in the history
This optional header fields leads to issues in GLM mapping leading to high (almost 100 percent) deletion rates, because all lines with fewer than 7 columns are dropped from the file.
  • Loading branch information
NeoLegends authored Sep 11, 2024
1 parent 0d4454d commit 74711c5
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions returnn/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,12 @@ def run(self):
d = eval(util.uopen(self.recog_words_file.get_path(), "rt").read())
assert isinstance(d, dict), "only search output file with dict format is supported"
with util.uopen(self.out_ctm_file.get_path(), "wt") as out:
out.write(";; <name> <track> <start> <duration> <word> <confidence> [<n-best>]\n")
# Do not print optional [n-best] header, some downstream evaluation pipelines
# use the number of headers for validation. Since we do not print n-best-list
# information this validation fails and discards the entire search outputs.
#
# See https://github.com/rwth-i6/i6_core/pull/542.
out.write(";; <name> <track> <start> <duration> <word> <confidence>\n")
for seg in corpus.segments():
seg_start = 0.0 if seg.start == float("inf") else seg.start
seg_end = 0.0 if seg.end == float("inf") else seg.end
Expand Down Expand Up @@ -541,7 +546,12 @@ def run(self):
else:
seq_order = d.keys()
with util.uopen(self.out_ctm_file.get_path(), "wt") as out:
out.write(";; <name> <track> <start> <duration> <word> <confidence> [<n-best>]\n")
# Do not print optional [n-best] header, some downstream evaluation pipelines
# use the number of headers for validation. Since we do not print n-best-list
# information this validation fails and discards the entire search outputs.
#
# See https://github.com/rwth-i6/i6_core/pull/542.
out.write(";; <name> <track> <start> <duration> <word> <confidence>\n")
for seg_fullname in seq_order:
assert isinstance(
seg_fullname, str
Expand Down

0 comments on commit 74711c5

Please sign in to comment.