From 86d6c68d43e5c3dcb4abeeb790b58afdbfb32abc Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 3 Nov 2024 15:30:16 +0100 Subject: [PATCH] [3.13] gh-104400: Add more tests to pygettext (GH-108173) (GH-126361) (cherry picked from commit dcae5cd6abaae4f73e656ebc054f30d3f15ca7b8) Co-authored-by: Tomas R --- Lib/test/test_tools/i18n_data/docstrings.pot | 40 +++++++ Lib/test/test_tools/i18n_data/docstrings.py | 41 +++++++ Lib/test/test_tools/i18n_data/fileloc.pot | 35 ++++++ Lib/test/test_tools/i18n_data/fileloc.py | 26 +++++ Lib/test/test_tools/i18n_data/messages.pot | 67 +++++++++++ Lib/test/test_tools/i18n_data/messages.py | 64 +++++++++++ Lib/test/test_tools/test_i18n.py | 110 +++++++++++++++---- Makefile.pre.in | 1 + 8 files changed, 363 insertions(+), 21 deletions(-) create mode 100644 Lib/test/test_tools/i18n_data/docstrings.pot create mode 100644 Lib/test/test_tools/i18n_data/docstrings.py create mode 100644 Lib/test/test_tools/i18n_data/fileloc.pot create mode 100644 Lib/test/test_tools/i18n_data/fileloc.py create mode 100644 Lib/test/test_tools/i18n_data/messages.pot create mode 100644 Lib/test/test_tools/i18n_data/messages.py diff --git a/Lib/test/test_tools/i18n_data/docstrings.pot b/Lib/test/test_tools/i18n_data/docstrings.pot new file mode 100644 index 00000000000000..5af1d41422ff62 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/docstrings.pot @@ -0,0 +1,40 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: docstrings.py:7 +#, docstring +msgid "" +msgstr "" + +#: docstrings.py:18 +#, docstring +msgid "" +"multiline\n" +" docstring\n" +" " +msgstr "" + +#: docstrings.py:25 +#, docstring +msgid "docstring1" +msgstr "" + +#: docstrings.py:30 +#, docstring +msgid "Hello, {}!" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/docstrings.py b/Lib/test/test_tools/i18n_data/docstrings.py new file mode 100644 index 00000000000000..85d7f159d37775 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/docstrings.py @@ -0,0 +1,41 @@ +# Test docstring extraction +from gettext import gettext as _ + + +# Empty docstring +def test(x): + """""" + + +# Leading empty line +def test2(x): + + """docstring""" # XXX This should be extracted but isn't. + + +# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`. +def test3(x): + """multiline + docstring + """ + + +# Multiple docstrings - only the first should be extracted +def test4(x): + """docstring1""" + """docstring2""" + + +def test5(x): + """Hello, {}!""".format("world!") # XXX This should not be extracted. + + +# Nested docstrings +def test6(x): + def inner(y): + """nested docstring""" # XXX This should be extracted but isn't. + + +class Outer: + class Inner: + "nested class docstring" # XXX This should be extracted but isn't. diff --git a/Lib/test/test_tools/i18n_data/fileloc.pot b/Lib/test/test_tools/i18n_data/fileloc.pot new file mode 100644 index 00000000000000..dbd28687a73556 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/fileloc.pot @@ -0,0 +1,35 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: fileloc.py:5 fileloc.py:6 +msgid "foo" +msgstr "" + +#: fileloc.py:9 +msgid "bar" +msgstr "" + +#: fileloc.py:14 fileloc.py:18 +#, docstring +msgid "docstring" +msgstr "" + +#: fileloc.py:22 fileloc.py:26 +#, docstring +msgid "baz" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/fileloc.py b/Lib/test/test_tools/i18n_data/fileloc.py new file mode 100644 index 00000000000000..c5d4d0595fea52 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/fileloc.py @@ -0,0 +1,26 @@ +# Test file locations +from gettext import gettext as _ + +# Duplicate strings +_('foo') +_('foo') + +# Duplicate strings on the same line should only add one location to the output +_('bar'), _('bar') + + +# Duplicate docstrings +class A: + """docstring""" + + +def f(): + """docstring""" + + +# Duplicate message and docstring +_('baz') + + +def g(): + """baz""" diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot new file mode 100644 index 00000000000000..ddfbd18349ef4f --- /dev/null +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -0,0 +1,67 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: messages.py:5 +msgid "" +msgstr "" + +#: messages.py:8 messages.py:9 +msgid "parentheses" +msgstr "" + +#: messages.py:12 +msgid "Hello, world!" +msgstr "" + +#: messages.py:15 +msgid "" +"Hello,\n" +" multiline!\n" +msgstr "" + +#: messages.py:29 +msgid "Hello, {}!" +msgstr "" + +#: messages.py:33 +msgid "1" +msgstr "" + +#: messages.py:33 +msgid "2" +msgstr "" + +#: messages.py:34 messages.py:35 +msgid "A" +msgstr "" + +#: messages.py:34 messages.py:35 +msgid "B" +msgstr "" + +#: messages.py:36 +msgid "set" +msgstr "" + +#: messages.py:42 +msgid "nested string" +msgstr "" + +#: messages.py:47 +msgid "baz" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py new file mode 100644 index 00000000000000..f220294b8d5c67 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -0,0 +1,64 @@ +# Test message extraction +from gettext import gettext as _ + +# Empty string +_("") + +# Extra parentheses +(_("parentheses")) +((_("parentheses"))) + +# Multiline strings +_("Hello, " + "world!") + +_("""Hello, + multiline! +""") + +# Invalid arguments +_() +_(None) +_(1) +_(False) +_(x="kwargs are not allowed") +_("foo", "bar") +_("something", x="something else") + +# .format() +_("Hello, {}!").format("world") # valid +_("Hello, {}!".format("world")) # invalid + +# Nested structures +_("1"), _("2") +arr = [_("A"), _("B")] +obj = {'a': _("A"), 'b': _("B")} +{{{_('set')}}} + + +# Nested functions and classes +def test(): + _("nested string") # XXX This should be extracted but isn't. + [_("nested string")] + + +class Foo: + def bar(self): + return _("baz") + + +def bar(x=_('default value')): # XXX This should be extracted but isn't. + pass + + +def baz(x=[_('default value')]): # XXX This should be extracted but isn't. + pass + + +# Shadowing _() +def _(x): + pass + + +def _(x="don't extract me"): + pass diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index c083a04475e726..21dead8f943bb7 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -1,9 +1,11 @@ """Tests to cover the Tools/i18n package""" import os +import re import sys import unittest from textwrap import dedent +from pathlib import Path from test.support.script_helper import assert_python_ok from test.test_tools import skip_if_missing, toolsdir @@ -12,20 +14,47 @@ skip_if_missing() +DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' + + +def normalize_POT_file(pot): + """Normalize the POT creation timestamp, charset and + file locations to make the POT file easier to compare. + + """ + # Normalize the creation date. + date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"') + header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' + pot = re.sub(date_pattern, header, pot) + + # Normalize charset to UTF-8 (currently there's no way to specify the output charset). + charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"') + charset = r'"Content-Type: text/plain; charset=UTF-8\\n"' + pot = re.sub(charset_pattern, charset, pot) + + # Normalize file location path separators in case this test is + # running on Windows (which uses '\'). + fileloc_pattern = re.compile(r'#:.+') + + def replace(match): + return match[0].replace(os.sep, "/") + pot = re.sub(fileloc_pattern, replace, pot) + return pot + class Test_pygettext(unittest.TestCase): """Tests for the pygettext.py tool""" - script = os.path.join(toolsdir,'i18n', 'pygettext.py') + script = Path(toolsdir, 'i18n', 'pygettext.py') def get_header(self, data): """ utility: return the header of a .po file as a dictionary """ headers = {} for line in data.split('\n'): - if not line or line.startswith(('#', 'msgid','msgstr')): + if not line or line.startswith(('#', 'msgid', 'msgstr')): continue line = line.strip('"') - key, val = line.split(':',1) + key, val = line.split(':', 1) headers[key] = val.strip() return headers @@ -53,13 +82,18 @@ def get_msgids(self, data): return msgids + def assert_POT_equal(self, expected, actual): + """Check if two POT files are equal""" + self.maxDiff = None + self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) + def extract_docstrings_from_str(self, module_content): """ utility: return all msgids extracted from module_content """ filename = 'test_docstrings.py' with temp_cwd(None) as cwd: with open(filename, 'w', encoding='utf-8') as fp: fp.write(module_content) - assert_python_ok(self.script, '-D', filename) + assert_python_ok('-Xutf8', self.script, '-D', filename) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() return self.get_msgids(data) @@ -69,7 +103,7 @@ def test_header(self): http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry """ with temp_cwd(None) as cwd: - assert_python_ok(self.script) + assert_python_ok('-Xutf8', self.script) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() header = self.get_header(data) @@ -96,7 +130,7 @@ def test_POT_Creation_Date(self): """ Match the date format from xgettext for POT-Creation-Date """ from datetime import datetime with temp_cwd(None) as cwd: - assert_python_ok(self.script) + assert_python_ok('-Xutf8', self.script) with open('messages.pot', encoding='utf-8') as fp: data = fp.read() header = self.get_header(data) @@ -310,6 +344,20 @@ def test_calls_in_fstring_with_partially_wrong_expression(self): self.assertNotIn('foo', msgids) self.assertIn('bar', msgids) + def test_pygettext_output(self): + """Test that the pygettext output exactly matches snapshots.""" + for input_file in DATA_DIR.glob('*.py'): + output_file = input_file.with_suffix('.pot') + with self.subTest(input_file=f'i18n_data/{input_file}'): + contents = input_file.read_text(encoding='utf-8') + with temp_cwd(None): + Path(input_file.name).write_text(contents) + assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) + output = Path('messages.pot').read_text(encoding='utf-8') + + expected = output_file.read_text(encoding='utf-8') + self.assert_POT_equal(expected, output) + def test_files_list(self): """Make sure the directories are inspected for source files bpo-31920 @@ -318,21 +366,41 @@ def test_files_list(self): text2 = 'Text to translate2' text3 = 'Text to ignore' with temp_cwd(None), temp_dir(None) as sdir: - os.mkdir(os.path.join(sdir, 'pypkg')) - with open(os.path.join(sdir, 'pypkg', 'pymod.py'), 'w', - encoding='utf-8') as sfile: - sfile.write(f'_({text1!r})') - os.mkdir(os.path.join(sdir, 'pkg.py')) - with open(os.path.join(sdir, 'pkg.py', 'pymod2.py'), 'w', - encoding='utf-8') as sfile: - sfile.write(f'_({text2!r})') - os.mkdir(os.path.join(sdir, 'CVS')) - with open(os.path.join(sdir, 'CVS', 'pymod3.py'), 'w', - encoding='utf-8') as sfile: - sfile.write(f'_({text3!r})') - assert_python_ok(self.script, sdir) - with open('messages.pot', encoding='utf-8') as fp: - data = fp.read() + pymod = Path(sdir, 'pypkg', 'pymod.py') + pymod.parent.mkdir() + pymod.write_text(f'_({text1!r})', encoding='utf-8') + + pymod2 = Path(sdir, 'pkg.py', 'pymod2.py') + pymod2.parent.mkdir() + pymod2.write_text(f'_({text2!r})', encoding='utf-8') + + pymod3 = Path(sdir, 'CVS', 'pymod3.py') + pymod3.parent.mkdir() + pymod3.write_text(f'_({text3!r})', encoding='utf-8') + + assert_python_ok('-Xutf8', self.script, sdir) + data = Path('messages.pot').read_text(encoding='utf-8') self.assertIn(f'msgid "{text1}"', data) self.assertIn(f'msgid "{text2}"', data) self.assertNotIn(text3, data) + + +def update_POT_snapshots(): + for input_file in DATA_DIR.glob('*.py'): + output_file = input_file.with_suffix('.pot') + contents = input_file.read_bytes() + with temp_cwd(None): + Path(input_file.name).write_bytes(contents) + assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) + output = Path('messages.pot').read_text(encoding='utf-8') + + output = normalize_POT_file(output) + output_file.write_text(output, encoding='utf-8') + + +if __name__ == '__main__': + # To regenerate POT files + if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': + update_POT_snapshots() + sys.exit(0) + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 445fa6381c20a4..32a338aa5c879b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2476,6 +2476,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tomllib/data/valid/dates-and-times \ test/test_tomllib/data/valid/multiline-basic-str \ test/test_tools \ + test/test_tools/i18n_data \ test/test_ttk \ test/test_unittest \ test/test_unittest/testmock \