From 1cd80037a5c3d83dc0eeb43a257bcb7e029bcf19 Mon Sep 17 00:00:00 2001 From: Osma Suominen Date: Mon, 16 Sep 2024 13:53:23 +0300 Subject: [PATCH] adapt to annif.simplemma_util and newer Connexion --- annif/rest.py | 16 +++++++++------- tests/test_rest.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/annif/rest.py b/annif/rest.py index 84f8450d..c283a60b 100644 --- a/annif/rest.py +++ b/annif/rest.py @@ -7,12 +7,12 @@ from typing import TYPE_CHECKING, Any import connexion -from simplemma.langdetect import lang_detector import annif.registry from annif.corpus import Document, DocumentList, SubjectSet from annif.exception import AnnifException from annif.project import Access +from annif.simplemma_util import get_language_detector if TYPE_CHECKING: from connexion.lifecycle import ConnexionResponse @@ -83,7 +83,7 @@ def show_project( return project.dump(), 200, {"Content-Type": "application/json"} -def detect_language(body): +def detect_language(body: dict[str, Any]): """return scores for detected languages formatted according to Swagger spec""" text = body.get("text") @@ -96,21 +96,23 @@ def detect_language(body): detail="no candidate languages given", ) - scores = lang_detector(text, tuple(candidates)) - - if not scores: + detector = get_language_detector(tuple(candidates)) + try: + proportions = detector.proportion_in_each_language(text) + except ValueError: return connexion.problem( status=400, title="Bad Request", detail="unsupported candidate languages", ) - return { + result = { "results": [ {"language": lang if lang != "unk" else None, "score": score} - for lang, score in scores + for lang, score in proportions.items() ] } + return result, 200, {"Content-Type": "application/json"} def _suggestion_to_dict( diff --git a/tests/test_rest.py b/tests/test_rest.py index 8e35aa0e..57adad64 100644 --- a/tests/test_rest.py +++ b/tests/test_rest.py @@ -66,7 +66,7 @@ def test_rest_detect_language_unknown(app): # an unknown language should return None with app.app_context(): result = annif.rest.detect_language( - {"text": "example text", "candidates": ["fi", "sv"]} + {"text": "exampley texty", "candidates": ["fi", "sv"]} ) assert {"language": None, "score": 1} in result["results"]