From 1841820a139047aa59395bb9dc74cbd4bbb0828b Mon Sep 17 00:00:00 2001
From: "Nailia (Nellie)" <nailia.mirzakhmedova@uni-weimar.de>
Date: Mon, 1 Jul 2024 15:43:06 +0200
Subject: [PATCH] fix whitespace tokenization

---
 named-entity-recognition-submission/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/named-entity-recognition-submission/run.py b/named-entity-recognition-submission/run.py
index 97229a7..8702e70 100644
--- a/named-entity-recognition-submission/run.py
+++ b/named-entity-recognition-submission/run.py
@@ -11,7 +11,7 @@
     targets_validation = tira.pd.truths("nlpbuw-fsu-sose-24", "ner-validation-20240612-training")
 
     def simple_ner_predictor(sentence):
-        tokens = sentence.split()
+        tokens = sentence.split(' ')
         tags = ['O'] * len(tokens)
         for i, token in enumerate(tokens):
             if token.istitle():