From bf2f1763ebf18110e6435a4eaf9fa7a7f3312dbf Mon Sep 17 00:00:00 2001 From: Jag-Marcel Date: Tue, 19 Mar 2024 17:18:23 +0100 Subject: [PATCH] Portuguese translation to other Scribe languages --- .../translations/translate_words.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/scribe_data/extract_transform/languages/Portuguese/translations/translate_words.py diff --git a/src/scribe_data/extract_transform/languages/Portuguese/translations/translate_words.py b/src/scribe_data/extract_transform/languages/Portuguese/translations/translate_words.py new file mode 100644 index 000000000..d170c943b --- /dev/null +++ b/src/scribe_data/extract_transform/languages/Portuguese/translations/translate_words.py @@ -0,0 +1,41 @@ +""" +Translates the Portuguese words queried from Wikidata to all other Scribe languages. + +Example +------- + python3 src/scribe_data/extract_transform/languages/Portuguese/translations/translate_words.py +""" + +import json +import os +import sys + +PATH_TO_SCRIBE_ORG = os.path.dirname(sys.path[0]).split("Scribe-Data")[0] +PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" +sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) + +from scribe_data.utils import translate_to_other_languages + +SRC_LANG = "Portuguese" +translate_script_dir = os.path.dirname(os.path.abspath(__file__)) +words_to_translate_path = os.path.join(translate_script_dir, "words_to_translate.json") + +with open(words_to_translate_path, "r", encoding="utf-8") as file: + json_data = json.load(file) + +word_list = [item["word"] for item in json_data] + +translations = {} +translated_words_path = os.path.join( + translate_script_dir, "../formatted_data/translated_words.json" +) +if os.path.exists(translated_words_path): + with open(translated_words_path, "r", encoding="utf-8") as file: + translations = json.load(file) + +translate_to_other_languages( + source_language=SRC_LANG, + word_list=word_list, + translations=translations, + batch_size=100, +)