Skip to content

Commit

Permalink
Merge regression in RO training
Browse files Browse the repository at this point in the history
Fixes #549
  • Loading branch information
mittagessen committed Dec 4, 2023
1 parent 5b441ad commit 99dc1c7
Showing 1 changed file with 10 additions and 22 deletions.
32 changes: 10 additions & 22 deletions kraken/lib/dataset/ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,6 @@ def __init__(self, files: Sequence[Union[PathLike, str]] = None,
self.data = []

if mode in ['alto', 'page', 'xml']:
for file in files:
try:
doc = XMLPage(file, filetype=mode)
for tag in doc.tags:
if tag not in self.class_mapping:
self.class_mapping[tag] = self.num_classes
self.num_classes += 1
except KrakenInputException as e:
files.pop(file)
logger.warning(e)
continue
for file in files:
try:
doc = XMLPage(file, filetype=mode)
Expand All @@ -104,6 +93,11 @@ def __init__(self, files: Sequence[Union[PathLike, str]] = None,
order = doc.get_sorted_regions(ro_id)
else:
raise ValueError(f'Invalid RO type {level}')
for el in order:
for tag in el.tags.values():
if tag not in self.class_mapping:
self.class_mapping[tag] = self.num_classes
self.num_classes += 1
# traverse RO and substitute features.
w, h = doc.image_size
sorted_lines = []
Expand Down Expand Up @@ -186,17 +180,6 @@ def __init__(self, files: Sequence[Union[PathLike, str]] = None,
self.data = []

if mode in ['alto', 'page', 'xml']:
for file in files:
try:
doc = XMLPage(file, filetype=mode)
for tag in doc.tags:
if tag not in self.class_mapping:
self.class_mapping[tag] = self.num_classes
self.num_classes += 1
except KrakenInputException as e:
files.pop(file)
logger.warning(e)
continue
for file in files:
try:
doc = XMLPage(file, filetype=mode)
Expand All @@ -210,6 +193,11 @@ def __init__(self, files: Sequence[Union[PathLike, str]] = None,
order = doc.get_sorted_regions(ro_id)
else:
raise ValueError(f'Invalid RO type {level}')
for el in order:
for tag in el.tags.values():
if tag not in self.class_mapping:
self.class_mapping[tag] = self.num_classes
self.num_classes += 1
# traverse RO and substitute features.
w, h = doc.image_size
sorted_lines = []
Expand Down

0 comments on commit 99dc1c7

Please sign in to comment.