Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
ramakrishna232 committed Jun 26, 2024
1 parent 99b5cd7 commit 47da970
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.analysis.autoImportCompletions": true
}
5 changes: 5 additions & 0 deletions ner-submission/dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM fschlatt/natural-language-processing-exercises:0.0.1

ADD run.py /code/run.py

ENTRYPOINT [ "python3", "/code/run.py" ]
52 changes: 52 additions & 0 deletions ner-submission/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from pathlib import Path
import json
import spacy
from tira.rest_api_client import Client
from tira.third_party_integrations import get_output_directory

def load_data(file_path):
with open(file_path, 'r') as file:
data = [json.loads(line) for line in file]
return data

def predict_labels(sentences, nlp):
predictions = []
for sentence in sentences:
doc = nlp(sentence['sentence'])
tokens = [token.text for token in doc]
labels = ['O'] * len(tokens)

for ent in doc.ents:
ent_tokens = [token.text for token in nlp(ent.text)]
start_idx = None
for i in range(len(tokens) - len(ent_tokens) + 1):
if tokens[i:i+len(ent_tokens)] == ent_tokens:
start_idx = i
break
if start_idx is not None:
labels[start_idx] = f"B-{ent.label_}"
for i in range(1, len(ent_tokens)):
labels[start_idx + i] = f"I-{ent.label_}"

predictions.append({"id": sentence['id'], "tags": labels})
return predictions

if __name__ == "__main__":
tira = Client()

# Loading validation data (automatically replaced by test data when run on TIRA)
text_validation = tira.pd.inputs("nlpbuw-fsu-sose-24", "ner-validation-20240612-training")
sentences = text_validation.to_dict(orient="records")

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Predicting labels for each sentence
predictions = predict_labels(sentences, nlp)

# Saving the prediction
output_directory = get_output_directory(str(Path(__file__).parent))
with open(Path(output_directory) / "predictions.jsonl", 'w') as outfile:
for prediction in predictions:
json.dump(prediction, outfile)
outfile.write('\n')

0 comments on commit 47da970

Please sign in to comment.