diff --git a/environment.yml b/environment.yml index 69b2eda73..bd304353f 100644 --- a/environment.yml +++ b/environment.yml @@ -21,3 +21,5 @@ dependencies: - regex>=2023.8.8 - SPARQLWrapper>=2.0.0 - tensorflow>=2.11.0 + - langcodes>=3.0.0 + - language_data>=1.0.0 diff --git a/requirements.txt b/requirements.txt index e570d825a..ca9aba163 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,8 @@ regex>=2023.3.23 sentencepiece>=0.1.95 SPARQLWrapper>=2.0.0 tabulate>=0.8.9 -tensorflow>=2.5.1 +tensorflow>=2.11.0 tqdm==4.56.1 transformers>=4.12 +langcodes>=3.0.0 +language_data>=1.0.0 \ No newline at end of file diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index b424fa845..4c067ee50 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -27,6 +27,8 @@ from importlib import resources from pathlib import Path from typing import Any +import langcodes +from langcodes import * PROJECT_ROOT = "Scribe-Data" @@ -154,13 +156,11 @@ def get_language_iso(language: str) -> str: str The ISO code for the language. """ - return _find( - "language", - language, - "iso", - f"{language.capitalize()} is currently not a supported language for ISO conversion.", - ) - + try: + iso_code = str(langcodes.find(language).language) + except LookupError: + raise ValueError(f"{language.capitalize()} is currently not a supported language for ISO conversion.") + return iso_code def get_language_from_iso(iso: str) -> str: """ @@ -176,12 +176,11 @@ def get_language_from_iso(iso: str) -> str: str The name for the language which has an ISO value of iso. """ - return _find( - "iso", - iso, - "language", - f"{iso.upper()} is currently not a supported ISO language.", - ).capitalize() + + language_name = str(Language.make(language=iso).display_name()) + if "Unknown language" in str(language_name): + raise ValueError(f"{iso.upper()} is currently not a supported ISO language.") + return language_name def get_language_words_to_remove(language: str) -> list[str]: diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py index 6aeaef415..4a99bcfd9 100644 --- a/tests/load/test_update_utils.py +++ b/tests/load/test_update_utils.py @@ -1,6 +1,9 @@ import unittest import pytest +import sys +sys.path.append('../../src') + from scribe_data import utils