Skip to content

Commit

Permalink
Post-processing regex for inline url link
Browse files Browse the repository at this point in the history
  • Loading branch information
unkcpz committed Dec 23, 2023
1 parent 733e2f4 commit fc3797a
Show file tree
Hide file tree
Showing 12 changed files with 162 additions and 33 deletions.
43 changes: 42 additions & 1 deletion src/aiida_core_i18n/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,33 @@ def str_post_processing(raw_str: str) -> str:

# for ``{content}`` make sure a space in front
res = re.sub(r'(?:(?:(?<!^)(?<!\s)(?<!`))(``\w.*?``))', r' \1', add_space, flags=re.ASCII)

# r"请访问 ``话语论坛 <https://aiida.discourse.group> `__``。" -> r"请访问 `话语论坛 <https://aiida.discourse.group>`__。"
res = re.sub(r"``(.*?)\s+`__``", r"`\1`__", res, flags=re.ASCII)

return res.strip()

def met_skip_rule(inp_str: str) -> bool:
"""The rule when met, skip the translation
"""
# if string is a citation, skip (container link to a doi url)
# e.g. Martin Uhrin, It is a great day, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`_
if re.match(r".*DOI: `.*? <https://doi.org/.*?>`_.*?", inp_str):
return True

return False

def translate(inp_str: str, target_lang="ZH", post_processing: bool=True) -> str:
"""Call deepl API to tranlate and do post process"""
# If the inp_str meet the skip rule, return the inp_str immediately
if met_skip_rule(inp_str):
return inp_str

translator = deepl.Translator(get_env_deepl_token())

# We don't want to translate the code snippet, so we use
# a special string to replace the `` in the code snippet to avoid
# the translation.
# `` -> EDBS after translated, recover to ``
# EDBS for End Double BackSlash

Expand Down Expand Up @@ -139,4 +159,25 @@ def is_translated(lines: typing.List[str]) -> bool:

return output_lines



def deepl_status(info: str = "verbose") -> int:
"""Get the status of the deepl API"""
import deepl
token = get_env_deepl_token()
if token is None:
raise RuntimeError("Please set the 'DEEPL_TOKEN' environment variable")

translator = deepl.Translator(token)

usage = translator.get_usage()

if info == "verbose":
return usage
elif info == "count":
return usage.character.count
elif info == "limit":
return usage.character.limit
elif info == "avail":
return usage.character.limit - usage.character.count
else:
raise ValueError("Please set the correct parameter")
26 changes: 5 additions & 21 deletions src/aiida_core_i18n/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,12 @@ def translate(po: pathlib.Path, max_chars: int, override_translation: bool, over
@click.option('-p', '--param', help='which information to show', type=click.Choice(['count', 'limit', 'verbose', 'avail']), default='verbose')
def status(param: str):
"""Show the status of the api translation limit"""
import os
import deepl
from aiida_core_i18n import deepl_status

token = get_env_deepl_token()
if token is None:
click.echo("ERROR: Please set the 'DEEPL_TOKEN' environment variable")
return

translator = deepl.Translator(token)

usage = translator.get_usage()

if param == 'verbose':
click.echo(usage)
elif param == 'count':
click.echo(usage.character.count)
elif param == 'limit':
click.echo(usage.character.limit)
elif param == 'avail':
click.echo(usage.character.limit - usage.character.count)
else:
click.echo("ERROR: Please set the correct parameter")
try:
click.echo(deepl_status(param))
except ValueError as exc:
click.echo(f"ERROR: {exc}")

@cli.command()
@click.argument('string', type=str)
Expand Down
12 changes: 11 additions & 1 deletion tests/statics/origin_text.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,14 @@ msgid ""
msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr ""
36 changes: 34 additions & 2 deletions tests/test_translate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
import pathlib
from collections import namedtuple

from aiida_core_i18n import str_post_processing, po_translate
from aiida_core_i18n import str_post_processing, po_translate, translate

@pytest.fixture(scope="function")
def static_path() -> pathlib.Path:
Expand Down Expand Up @@ -35,6 +36,20 @@ def test_str_post_processing(input: str, expected: str):
got = str_post_processing(input)
assert got == expected

@pytest.mark.parametrize(
('input', 'expected'),
[
(r"Martin Uhrin, It is a great day, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`_", r"Martin Uhrin, It is a great day, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`_"),
]
)
def test_met_skip_rule(input: str, expected: str, monkeypatch):
"""Test the skip rule by translate, the deepl translate function is monkey patched to return a dummy string
"""
# The return value should contain the `text` attribute
monkeypatch.setattr("deepl.Translator.translate_text", lambda *args, **kwargs: namedtuple("Dummy", ["text"])("YOUSHALLNOTPASS"))

got = translate(input)
assert got == expected

@pytest.fixture(scope="function")
def pot_str(static_path: pathlib.Path) -> str:
Expand All @@ -48,9 +63,26 @@ def test_po_translate_default(pot_str, file_regression):
"""The actuall process of po file
This consumes ~ 500 characters of deepl API
"""
from aiida_core_i18n import deepl_status

# may not be enough for the whole file if so, fail the test
# We need go and maybe change
max_chars = 500

# Get initial count
i_count = deepl_status("count")

lines = pot_str.splitlines()

translated_lines = po_translate(lines)
translated_lines = po_translate(lines, max_chars)
f_count = deepl_status("count")
used = f_count - i_count

if not used < max_chars:
pytest.fail(f"Used {used} characters, more than the max_chars {max_chars}")

print(f"Translated {used} characters in this test session")

file_regression.check('\n'.join(translated_lines))

@pytest.mark.parametrize("override", [True, False])
Expand Down
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_default.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr "Martin Uhrin、Sebastiaan.P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA:为稳健的模块化计算工作流设计一个高通量、基于事件的引擎*,《计算材料科学》**187**,110086 (2021);DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`"
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_max_chars_100_.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr ""
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_max_chars_20_.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr ""
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_max_chars_3_.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr ""
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_max_chars_500_.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr "Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based engine for robust and modular computational workflows*, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`"
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_override_False_.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr ""
"`点击这里打开一个 issue <https\\://github.com/aiidateam/aiida-"
"core/issues/new?assignees=&labels=topic%2Fdocumentation&template=doc-"
"improvements.md&title=Docs%3A+404>`__"

#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr "Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based engine for robust and modular computational workflows*, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`"
10 changes: 10 additions & 0 deletions tests/test_translate/test_po_translate_override_True_.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ msgstr "`Click here to open an issue <https://github.com/aiidateam/aiida-core/is




#: ../../source/index.rst:174
msgid ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`"
msgstr "Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based engine for robust and modular computational workflows*, Computational Materials Science **187**, 110086 (2021); DOI: `10.1016/j.commatsci.2020.110086 <https://doi.org/10.1016/j.commatsci.2020.110086>`"
8 changes: 0 additions & 8 deletions translations/zh_CN/index.po
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,6 @@ msgid ""
"welcome, get started with the `contributing guidelines "
"<https://github.com/aiidateam/aiida-core/wiki>`__."
msgstr ""
"看到文档中有错别字?想改进代码?欢迎随时提供帮助,请从 `贡献指南 <https://github.com/aiidateam/aiida-"
"core/wiki>__` 开始。"

#: ../../source/index.rst:164
msgid "How to cite"
Expand Down Expand Up @@ -179,12 +177,6 @@ msgid ""
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`_"
msgstr ""
"Martin Uhrin, Sebastiaan. P. Huber, Jusong Yu, Nicola Marzari, and Giovanni "
"Pizzi, *Workflows in AiiDA: Engineering a high-throughput, event-based "
"engine for robust and modular computational workflows*, Computational "
"Materials Science **187**, 110086 (2021); DOI: "
"`10.1016/j.commatsci.2020.110086 "
"<https://doi.org/10.1016/j.commatsci.2020.110086>`_"

#: ../../source/index.rst:172
msgid "If the ADES concepts are referenced, please also cite:"
Expand Down

0 comments on commit fc3797a

Please sign in to comment.