From fc3797ab0140227f8c29d373cfc08136b50b6c7d Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Sat, 23 Dec 2023 02:38:48 +0100 Subject: [PATCH] Post-processing regex for inline url link --- src/aiida_core_i18n/__init__.py | 43 ++++++++++++++++++- src/aiida_core_i18n/__main__.py | 26 +++-------- tests/statics/origin_text.txt | 12 +++++- tests/test_translate.py | 36 +++++++++++++++- .../test_po_translate_default.txt | 10 +++++ .../test_po_translate_max_chars_100_.txt | 10 +++++ .../test_po_translate_max_chars_20_.txt | 10 +++++ .../test_po_translate_max_chars_3_.txt | 10 +++++ .../test_po_translate_max_chars_500_.txt | 10 +++++ .../test_po_translate_override_False_.txt | 10 +++++ .../test_po_translate_override_True_.txt | 10 +++++ translations/zh_CN/index.po | 8 ---- 12 files changed, 162 insertions(+), 33 deletions(-) diff --git a/src/aiida_core_i18n/__init__.py b/src/aiida_core_i18n/__init__.py index ffd10b84d..b82cdf306 100644 --- a/src/aiida_core_i18n/__init__.py +++ b/src/aiida_core_i18n/__init__.py @@ -23,13 +23,33 @@ def str_post_processing(raw_str: str) -> str: # for ``{content}`` make sure a space in front res = re.sub(r'(?:(?:(? `__``。" -> r"请访问 `话语论坛 `__。" + res = re.sub(r"``(.*?)\s+`__``", r"`\1`__", res, flags=re.ASCII) return res.strip() +def met_skip_rule(inp_str: str) -> bool: + """The rule when met, skip the translation + """ + # if string is a citation, skip (container link to a doi url) + # e.g. Martin Uhrin, It is a great day, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 `_ + if re.match(r".*DOI: `.*? `_.*?", inp_str): + return True + + return False + def translate(inp_str: str, target_lang="ZH", post_processing: bool=True) -> str: """Call deepl API to tranlate and do post process""" + # If the inp_str meet the skip rule, return the inp_str immediately + if met_skip_rule(inp_str): + return inp_str + translator = deepl.Translator(get_env_deepl_token()) + # We don't want to translate the code snippet, so we use + # a special string to replace the `` in the code snippet to avoid + # the translation. # `` -> EDBS after translated, recover to `` # EDBS for End Double BackSlash @@ -139,4 +159,25 @@ def is_translated(lines: typing.List[str]) -> bool: return output_lines - \ No newline at end of file + +def deepl_status(info: str = "verbose") -> int: + """Get the status of the deepl API""" + import deepl + token = get_env_deepl_token() + if token is None: + raise RuntimeError("Please set the 'DEEPL_TOKEN' environment variable") + + translator = deepl.Translator(token) + + usage = translator.get_usage() + + if info == "verbose": + return usage + elif info == "count": + return usage.character.count + elif info == "limit": + return usage.character.limit + elif info == "avail": + return usage.character.limit - usage.character.count + else: + raise ValueError("Please set the correct parameter") \ No newline at end of file diff --git a/src/aiida_core_i18n/__main__.py b/src/aiida_core_i18n/__main__.py index abad81314..87b77d7ca 100644 --- a/src/aiida_core_i18n/__main__.py +++ b/src/aiida_core_i18n/__main__.py @@ -41,28 +41,12 @@ def translate(po: pathlib.Path, max_chars: int, override_translation: bool, over @click.option('-p', '--param', help='which information to show', type=click.Choice(['count', 'limit', 'verbose', 'avail']), default='verbose') def status(param: str): """Show the status of the api translation limit""" - import os - import deepl + from aiida_core_i18n import deepl_status - token = get_env_deepl_token() - if token is None: - click.echo("ERROR: Please set the 'DEEPL_TOKEN' environment variable") - return - - translator = deepl.Translator(token) - - usage = translator.get_usage() - - if param == 'verbose': - click.echo(usage) - elif param == 'count': - click.echo(usage.character.count) - elif param == 'limit': - click.echo(usage.character.limit) - elif param == 'avail': - click.echo(usage.character.limit - usage.character.count) - else: - click.echo("ERROR: Please set the correct parameter") + try: + click.echo(deepl_status(param)) + except ValueError as exc: + click.echo(f"ERROR: {exc}") @cli.command() @click.argument('string', type=str) diff --git a/tests/statics/origin_text.txt b/tests/statics/origin_text.txt index 0e5811533..3af0ecb00 100644 --- a/tests/statics/origin_text.txt +++ b/tests/statics/origin_text.txt @@ -43,4 +43,14 @@ msgid "" msgstr "" "`点击这里打开一个 issue `__" \ No newline at end of file +"improvements.md&title=Docs%3A+404>`__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "" diff --git a/tests/test_translate.py b/tests/test_translate.py index 1d978ffbe..281b65405 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -1,7 +1,8 @@ import pytest import pathlib +from collections import namedtuple -from aiida_core_i18n import str_post_processing, po_translate +from aiida_core_i18n import str_post_processing, po_translate, translate @pytest.fixture(scope="function") def static_path() -> pathlib.Path: @@ -35,6 +36,20 @@ def test_str_post_processing(input: str, expected: str): got = str_post_processing(input) assert got == expected +@pytest.mark.parametrize( + ('input', 'expected'), + [ + (r"Martin Uhrin, It is a great day, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 `_", r"Martin Uhrin, It is a great day, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 `_"), + ] +) +def test_met_skip_rule(input: str, expected: str, monkeypatch): + """Test the skip rule by translate, the deepl translate function is monkey patched to return a dummy string + """ + # The return value should contain the `text` attribute + monkeypatch.setattr("deepl.Translator.translate_text", lambda *args, **kwargs: namedtuple("Dummy", ["text"])("YOUSHALLNOTPASS")) + + got = translate(input) + assert got == expected @pytest.fixture(scope="function") def pot_str(static_path: pathlib.Path) -> str: @@ -48,9 +63,26 @@ def test_po_translate_default(pot_str, file_regression): """The actuall process of po file This consumes ~ 500 characters of deepl API """ + from aiida_core_i18n import deepl_status + + # may not be enough for the whole file if so, fail the test + # We need go and maybe change + max_chars = 500 + + # Get initial count + i_count = deepl_status("count") + lines = pot_str.splitlines() - translated_lines = po_translate(lines) + translated_lines = po_translate(lines, max_chars) + f_count = deepl_status("count") + used = f_count - i_count + + if not used < max_chars: + pytest.fail(f"Used {used} characters, more than the max_chars {max_chars}") + + print(f"Translated {used} characters in this test session") + file_regression.check('\n'.join(translated_lines)) @pytest.mark.parametrize("override", [True, False]) diff --git a/tests/test_translate/test_po_translate_default.txt b/tests/test_translate/test_po_translate_default.txt index e24bcfff4..37a9871fc 100644 --- a/tests/test_translate/test_po_translate_default.txt +++ b/tests/test_translate/test_po_translate_default.txt @@ -44,3 +44,13 @@ msgstr "" "`点击这里打开一个 issue `__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "Martin Uhrin、Sebastiaan.P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA:为稳健的模块化计算工作流设计一个高通量、基于事件的引擎*,《计算材料科学》**187**,110086 (2021);DOI: `10.1016/j.commatsci.2020.110086 `" diff --git a/tests/test_translate/test_po_translate_max_chars_100_.txt b/tests/test_translate/test_po_translate_max_chars_100_.txt index 8fe0ce6e3..b41260eb3 100644 --- a/tests/test_translate/test_po_translate_max_chars_100_.txt +++ b/tests/test_translate/test_po_translate_max_chars_100_.txt @@ -44,3 +44,13 @@ msgstr "" "`点击这里打开一个 issue `__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "" diff --git a/tests/test_translate/test_po_translate_max_chars_20_.txt b/tests/test_translate/test_po_translate_max_chars_20_.txt index ab06a9e67..052d7046a 100644 --- a/tests/test_translate/test_po_translate_max_chars_20_.txt +++ b/tests/test_translate/test_po_translate_max_chars_20_.txt @@ -44,3 +44,13 @@ msgstr "" "`点击这里打开一个 issue `__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "" diff --git a/tests/test_translate/test_po_translate_max_chars_3_.txt b/tests/test_translate/test_po_translate_max_chars_3_.txt index aaaae01e9..6af8d1c8d 100644 --- a/tests/test_translate/test_po_translate_max_chars_3_.txt +++ b/tests/test_translate/test_po_translate_max_chars_3_.txt @@ -44,3 +44,13 @@ msgstr "" "`点击这里打开一个 issue `__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "" diff --git a/tests/test_translate/test_po_translate_max_chars_500_.txt b/tests/test_translate/test_po_translate_max_chars_500_.txt index 8fe0ce6e3..fcdf4013d 100644 --- a/tests/test_translate/test_po_translate_max_chars_500_.txt +++ b/tests/test_translate/test_po_translate_max_chars_500_.txt @@ -44,3 +44,13 @@ msgstr "" "`点击这里打开一个 issue `__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based engine for robust and modular computational workflows*, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 `" diff --git a/tests/test_translate/test_po_translate_override_False_.txt b/tests/test_translate/test_po_translate_override_False_.txt index 8fe0ce6e3..fcdf4013d 100644 --- a/tests/test_translate/test_po_translate_override_False_.txt +++ b/tests/test_translate/test_po_translate_override_False_.txt @@ -44,3 +44,13 @@ msgstr "" "`点击这里打开一个 issue `__" + +#: ../../source/index.rst:174 +msgid "" +"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " +"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " +"engine for robust and modular computational workflows*, Computational " +"Materials Science **187**, 110086 (2021); DOI: " +"`10.1016/j.commatsci.2020.110086 " +"`" +msgstr "Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based engine for robust and modular computational workflows*, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 `" diff --git a/tests/test_translate/test_po_translate_override_True_.txt b/tests/test_translate/test_po_translate_override_True_.txt index 17a08360c..a7238e37d 100644 --- a/tests/test_translate/test_po_translate_override_True_.txt +++ b/tests/test_translate/test_po_translate_override_True_.txt @@ -44,3 +44,13 @@ msgstr "`Click here to open an issue `" +msgstr "Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based engine for robust and modular computational workflows*, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 `" diff --git a/translations/zh_CN/index.po b/translations/zh_CN/index.po index 336891c0d..41c778458 100644 --- a/translations/zh_CN/index.po +++ b/translations/zh_CN/index.po @@ -137,8 +137,6 @@ msgid "" "welcome, get started with the `contributing guidelines " "`__." msgstr "" -"看到文档中有错别字?想改进代码?欢迎随时提供帮助,请从 `贡献指南 __` 开始。" #: ../../source/index.rst:164 msgid "How to cite" @@ -179,12 +177,6 @@ msgid "" "`10.1016/j.commatsci.2020.110086 " "`_" msgstr "" -"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni " -"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based " -"engine for robust and modular computational workflows*, Computational " -"Materials Science **187**, 110086 (2021); DOI: " -"`10.1016/j.commatsci.2020.110086 " -"`_" #: ../../source/index.rst:172 msgid "If the ADES concepts are referenced, please also cite:"