From 1841820a139047aa59395bb9dc74cbd4bbb0828b Mon Sep 17 00:00:00 2001 From: "Nailia (Nellie)" Date: Mon, 1 Jul 2024 15:43:06 +0200 Subject: [PATCH] fix whitespace tokenization --- named-entity-recognition-submission/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/named-entity-recognition-submission/run.py b/named-entity-recognition-submission/run.py index 97229a7..8702e70 100644 --- a/named-entity-recognition-submission/run.py +++ b/named-entity-recognition-submission/run.py @@ -11,7 +11,7 @@ targets_validation = tira.pd.truths("nlpbuw-fsu-sose-24", "ner-validation-20240612-training") def simple_ner_predictor(sentence): - tokens = sentence.split() + tokens = sentence.split(' ') tags = ['O'] * len(tokens) for i, token in enumerate(tokens): if token.istitle():