diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
deleted file mode 100644
index c083a04475e726..00000000000000
--- a/Lib/test/test_tools/test_i18n.py
+++ /dev/null
@@ -1,338 +0,0 @@
-"""Tests to cover the Tools/i18n package"""
-
-import os
-import sys
-import unittest
-from textwrap import dedent
-
-from test.support.script_helper import assert_python_ok
-from test.test_tools import skip_if_missing, toolsdir
-from test.support.os_helper import temp_cwd, temp_dir
-
-
-skip_if_missing()
-
-
-class Test_pygettext(unittest.TestCase):
-    """Tests for the pygettext.py tool"""
-
-    script = os.path.join(toolsdir,'i18n', 'pygettext.py')
-
-    def get_header(self, data):
-        """ utility: return the header of a .po file as a dictionary """
-        headers = {}
-        for line in data.split('\n'):
-            if not line or line.startswith(('#', 'msgid','msgstr')):
-                continue
-            line = line.strip('"')
-            key, val = line.split(':',1)
-            headers[key] = val.strip()
-        return headers
-
-    def get_msgids(self, data):
-        """ utility: return all msgids in .po file as a list of strings """
-        msgids = []
-        reading_msgid = False
-        cur_msgid = []
-        for line in data.split('\n'):
-            if reading_msgid:
-                if line.startswith('"'):
-                    cur_msgid.append(line.strip('"'))
-                else:
-                    msgids.append('\n'.join(cur_msgid))
-                    cur_msgid = []
-                    reading_msgid = False
-                    continue
-            if line.startswith('msgid '):
-                line = line[len('msgid '):]
-                cur_msgid.append(line.strip('"'))
-                reading_msgid = True
-        else:
-            if reading_msgid:
-                msgids.append('\n'.join(cur_msgid))
-
-        return msgids
-
-    def extract_docstrings_from_str(self, module_content):
-        """ utility: return all msgids extracted from module_content """
-        filename = 'test_docstrings.py'
-        with temp_cwd(None) as cwd:
-            with open(filename, 'w', encoding='utf-8') as fp:
-                fp.write(module_content)
-            assert_python_ok(self.script, '-D', filename)
-            with open('messages.pot', encoding='utf-8') as fp:
-                data = fp.read()
-        return self.get_msgids(data)
-
-    def test_header(self):
-        """Make sure the required fields are in the header, according to:
-           http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
-        """
-        with temp_cwd(None) as cwd:
-            assert_python_ok(self.script)
-            with open('messages.pot', encoding='utf-8') as fp:
-                data = fp.read()
-            header = self.get_header(data)
-
-            self.assertIn("Project-Id-Version", header)
-            self.assertIn("POT-Creation-Date", header)
-            self.assertIn("PO-Revision-Date", header)
-            self.assertIn("Last-Translator", header)
-            self.assertIn("Language-Team", header)
-            self.assertIn("MIME-Version", header)
-            self.assertIn("Content-Type", header)
-            self.assertIn("Content-Transfer-Encoding", header)
-            self.assertIn("Generated-By", header)
-
-            # not clear if these should be required in POT (template) files
-            #self.assertIn("Report-Msgid-Bugs-To", header)
-            #self.assertIn("Language", header)
-
-            #"Plural-Forms" is optional
-
-    @unittest.skipIf(sys.platform.startswith('aix'),
-                     'bpo-29972: broken test on AIX')
-    def test_POT_Creation_Date(self):
-        """ Match the date format from xgettext for POT-Creation-Date """
-        from datetime import datetime
-        with temp_cwd(None) as cwd:
-            assert_python_ok(self.script)
-            with open('messages.pot', encoding='utf-8') as fp:
-                data = fp.read()
-            header = self.get_header(data)
-            creationDate = header['POT-Creation-Date']
-
-            # peel off the escaped newline at the end of string
-            if creationDate.endswith('\\n'):
-                creationDate = creationDate[:-len('\\n')]
-
-            # This will raise if the date format does not exactly match.
-            datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
-
-    def test_funcdocstring(self):
-        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
-            with self.subTest(doc):
-                msgids = self.extract_docstrings_from_str(dedent('''\
-                def foo(bar):
-                    %s
-                ''' % doc))
-                self.assertIn('doc', msgids)
-
-    def test_funcdocstring_bytes(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        def foo(bar):
-            b"""doc"""
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_funcdocstring_fstring(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        def foo(bar):
-            f"""doc"""
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_classdocstring(self):
-        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
-            with self.subTest(doc):
-                msgids = self.extract_docstrings_from_str(dedent('''\
-                class C:
-                    %s
-                ''' % doc))
-                self.assertIn('doc', msgids)
-
-    def test_classdocstring_bytes(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        class C:
-            b"""doc"""
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_classdocstring_fstring(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        class C:
-            f"""doc"""
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_moduledocstring(self):
-        for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
-            with self.subTest(doc):
-                msgids = self.extract_docstrings_from_str(dedent('''\
-                %s
-                ''' % doc))
-                self.assertIn('doc', msgids)
-
-    def test_moduledocstring_bytes(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        b"""doc"""
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_moduledocstring_fstring(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"""doc"""
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_msgid(self):
-        msgids = self.extract_docstrings_from_str(
-                '''_("""doc""" r'str' u"ing")''')
-        self.assertIn('docstring', msgids)
-
-    def test_msgid_bytes(self):
-        msgids = self.extract_docstrings_from_str('_(b"""doc""")')
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_msgid_fstring(self):
-        msgids = self.extract_docstrings_from_str('_(f"""doc""")')
-        self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
-
-    def test_funcdocstring_annotated_args(self):
-        """ Test docstrings for functions with annotated args """
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        def foo(bar: str):
-            """doc"""
-        '''))
-        self.assertIn('doc', msgids)
-
-    def test_funcdocstring_annotated_return(self):
-        """ Test docstrings for functions with annotated return type """
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        def foo(bar) -> str:
-            """doc"""
-        '''))
-        self.assertIn('doc', msgids)
-
-    def test_funcdocstring_defvalue_args(self):
-        """ Test docstring for functions with default arg values """
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        def foo(bar=()):
-            """doc"""
-        '''))
-        self.assertIn('doc', msgids)
-
-    def test_funcdocstring_multiple_funcs(self):
-        """ Test docstring extraction for multiple functions combining
-        annotated args, annotated return types and default arg values
-        """
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        def foo1(bar: tuple=()) -> str:
-            """doc1"""
-
-        def foo2(bar: List[1:2]) -> (lambda x: x):
-            """doc2"""
-
-        def foo3(bar: 'func'=lambda x: x) -> {1: 2}:
-            """doc3"""
-        '''))
-        self.assertIn('doc1', msgids)
-        self.assertIn('doc2', msgids)
-        self.assertIn('doc3', msgids)
-
-    def test_classdocstring_early_colon(self):
-        """ Test docstring extraction for a class with colons occurring within
-        the parentheses.
-        """
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
-            """doc"""
-        '''))
-        self.assertIn('doc', msgids)
-
-    def test_calls_in_fstrings(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_('foo bar')}"
-        '''))
-        self.assertIn('foo bar', msgids)
-
-    def test_calls_in_fstrings_raw(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        rf"{_('foo bar')}"
-        '''))
-        self.assertIn('foo bar', msgids)
-
-    def test_calls_in_fstrings_nested(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"""{f'{_("foo bar")}'}"""
-        '''))
-        self.assertIn('foo bar', msgids)
-
-    def test_calls_in_fstrings_attribute(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{obj._('foo bar')}"
-        '''))
-        self.assertIn('foo bar', msgids)
-
-    def test_calls_in_fstrings_with_call_on_call(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{type(str)('foo bar')}"
-        '''))
-        self.assertNotIn('foo bar', msgids)
-
-    def test_calls_in_fstrings_with_format(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_('foo {bar}').format(bar='baz')}"
-        '''))
-        self.assertIn('foo {bar}', msgids)
-
-    def test_calls_in_fstrings_with_wrong_input_1(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_(f'foo {bar}')}"
-        '''))
-        self.assertFalse([msgid for msgid in msgids if 'foo {bar}' in msgid])
-
-    def test_calls_in_fstrings_with_wrong_input_2(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_(1)}"
-        '''))
-        self.assertNotIn(1, msgids)
-
-    def test_calls_in_fstring_with_multiple_args(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_('foo', 'bar')}"
-        '''))
-        self.assertNotIn('foo', msgids)
-        self.assertNotIn('bar', msgids)
-
-    def test_calls_in_fstring_with_keyword_args(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_('foo', bar='baz')}"
-        '''))
-        self.assertNotIn('foo', msgids)
-        self.assertNotIn('bar', msgids)
-        self.assertNotIn('baz', msgids)
-
-    def test_calls_in_fstring_with_partially_wrong_expression(self):
-        msgids = self.extract_docstrings_from_str(dedent('''\
-        f"{_(f'foo') + _('bar')}"
-        '''))
-        self.assertNotIn('foo', msgids)
-        self.assertIn('bar', msgids)
-
-    def test_files_list(self):
-        """Make sure the directories are inspected for source files
-           bpo-31920
-        """
-        text1 = 'Text to translate1'
-        text2 = 'Text to translate2'
-        text3 = 'Text to ignore'
-        with temp_cwd(None), temp_dir(None) as sdir:
-            os.mkdir(os.path.join(sdir, 'pypkg'))
-            with open(os.path.join(sdir, 'pypkg', 'pymod.py'), 'w',
-                      encoding='utf-8') as sfile:
-                sfile.write(f'_({text1!r})')
-            os.mkdir(os.path.join(sdir, 'pkg.py'))
-            with open(os.path.join(sdir, 'pkg.py', 'pymod2.py'), 'w',
-                      encoding='utf-8') as sfile:
-                sfile.write(f'_({text2!r})')
-            os.mkdir(os.path.join(sdir, 'CVS'))
-            with open(os.path.join(sdir, 'CVS', 'pymod3.py'), 'w',
-                      encoding='utf-8') as sfile:
-                sfile.write(f'_({text3!r})')
-            assert_python_ok(self.script, sdir)
-            with open('messages.pot', encoding='utf-8') as fp:
-                data = fp.read()
-            self.assertIn(f'msgid "{text1}"', data)
-            self.assertIn(f'msgid "{text2}"', data)
-            self.assertNotIn(text3, data)
diff --git a/Lib/test/test_tools/test_i18n/__init__.py b/Lib/test/test_tools/test_i18n/__init__.py
new file mode 100644
index 00000000000000..2f1449063fe6bb
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/__init__.py
@@ -0,0 +1,6 @@
+import os.path
+from test import support
+
+
+def load_tests(*args):
+    return support.load_package_tests(os.path.dirname(__file__), *args)
diff --git a/Lib/test/test_tools/test_i18n/data/all.pot b/Lib/test/test_tools/test_i18n/data/all.pot
new file mode 100644
index 00000000000000..53a517a38fddcd
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/all.pot
@@ -0,0 +1,164 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2023-05-11 00:37+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+#: pypkg/docstrings.py:2
+#, docstring
+msgid ""
+"multiline\n"
+"module docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:9 pypkg/docstrings.py:74 pypkg/messages.py:35
+#: pypkg/mixed.py:11 pypkg/mixed.py:15
+msgid "foo"
+msgstr ""
+
+#: pypkg/docstrings.py:16
+#, docstring
+msgid "docstring with some blank lines in front"
+msgstr ""
+
+#: pypkg/docstrings.py:21
+#, docstring
+msgid ""
+"multiline\n"
+"docstring with some more content"
+msgstr ""
+
+#: pypkg/docstrings.py:27
+#, docstring
+msgid "outer docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:29
+#, docstring
+msgid "nested docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:33
+#, docstring
+msgid "some docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:39
+#, docstring
+msgid ""
+"A very long docstring which should be correctly wrapped into multiple lines\n"
+"in the output po file according to the maximum line width setting."
+msgstr ""
+
+#: pypkg/docstrings.py:46
+#, docstring
+msgid "Raw docstrings are ok"
+msgstr ""
+
+#: pypkg/docstrings.py:50
+#, docstring
+msgid "Unicode docstrings are ok"
+msgstr ""
+
+#: pypkg/docstrings.py:66
+#, docstring
+msgid "\"Some non-ascii dosctring: ěščř αβγδ"
+msgstr ""
+
+#: pypkg/docstrings.py:78
+#, docstring
+msgid "outer class docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:80
+#, docstring
+msgid "inner class docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:83 pypkg/docstrings.py:86
+#, docstring
+msgid "method docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:90
+#, docstring
+msgid "Async function docstring"
+msgstr ""
+
+#: pypkg/docstrings.py:92
+#, docstring
+msgid "async function nested docstring"
+msgstr ""
+
+#: pypkg/messages.py:5 pypkg/messages.py:7 pypkg/mixed.py:6 pypkg/mixed.py:8
+msgid "Hello, world!"
+msgstr ""
+
+#: pypkg/messages.py:10
+msgid ""
+"Hello,\n"
+"    multiline!\n"
+msgstr ""
+
+#: pypkg/messages.py:14
+msgid "parentheses"
+msgstr ""
+
+#: pypkg/messages.py:16
+msgid "Raw string \\ \\n \\t"
+msgstr ""
+
+#: pypkg/messages.py:18
+msgid "unicode string"
+msgstr ""
+
+#: pypkg/messages.py:20
+msgid "rawdocunicodestandard"
+msgstr ""
+
+#: pypkg/messages.py:31
+msgid "Some non-ascii text: ěščř αβγδ"
+msgstr ""
+
+#: pypkg/messages.py:33
+msgid "Some very long text which should wrap correctly into multiple lines while respecting the maximum line length"
+msgstr ""
+
+#: pypkg/messages.py:35
+msgid "bar"
+msgstr ""
+
+#: pypkg/messages.py:37 pypkg/messages.py:73
+msgid "baz"
+msgstr ""
+
+#: pypkg/messages.py:55
+msgid "Hello, {}!"
+msgstr ""
+
+#: pypkg/messages.py:59 pypkg/messages.py:60
+msgid "A"
+msgstr ""
+
+#: pypkg/messages.py:59 pypkg/messages.py:60
+msgid "B"
+msgstr ""
+
+#: pypkg/messages.py:64
+msgid "nested"
+msgstr ""
+
+#: pypkg/messages.py:67
+msgid "param"
+msgstr ""
+
diff --git a/Lib/test/test_tools/test_i18n/data/docstrings.pot b/Lib/test/test_tools/test_i18n/data/docstrings.pot
new file mode 100644
index 00000000000000..5da0363cf48ffa
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/docstrings.pot
@@ -0,0 +1,102 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2023-05-10 16:10+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+#: docstrings.py:2
+#, docstring
+msgid ""
+"multiline\n"
+"module docstring"
+msgstr ""
+
+#: docstrings.py:9 docstrings.py:74
+#, docstring
+msgid "foo"
+msgstr ""
+
+#: docstrings.py:16
+#, docstring
+msgid "docstring with some blank lines in front"
+msgstr ""
+
+#: docstrings.py:21
+#, docstring
+msgid ""
+"multiline\n"
+"docstring with some more content"
+msgstr ""
+
+#: docstrings.py:27
+#, docstring
+msgid "outer docstring"
+msgstr ""
+
+#: docstrings.py:29
+#, docstring
+msgid "nested docstring"
+msgstr ""
+
+#: docstrings.py:33
+#, docstring
+msgid "some docstring"
+msgstr ""
+
+#: docstrings.py:39
+#, docstring
+msgid ""
+"A very long docstring which should be correctly wrapped into multiple lines\n"
+"in the output po file according to the maximum line width setting."
+msgstr ""
+
+#: docstrings.py:46
+#, docstring
+msgid "Raw docstrings are ok"
+msgstr ""
+
+#: docstrings.py:50
+#, docstring
+msgid "Unicode docstrings are ok"
+msgstr ""
+
+#: docstrings.py:66
+#, docstring
+msgid "\"Some non-ascii dosctring: ěščř αβγδ"
+msgstr ""
+
+#: docstrings.py:78
+#, docstring
+msgid "outer class docstring"
+msgstr ""
+
+#: docstrings.py:80
+#, docstring
+msgid "inner class docstring"
+msgstr ""
+
+#: docstrings.py:83 docstrings.py:86
+#, docstring
+msgid "method docstring"
+msgstr ""
+
+#: docstrings.py:90
+#, docstring
+msgid "Async function docstring"
+msgstr ""
+
+#: docstrings.py:92
+#, docstring
+msgid "async function nested docstring"
+msgstr ""
+
diff --git a/Lib/test/test_tools/test_i18n/data/docstrings.py b/Lib/test/test_tools/test_i18n/data/docstrings.py
new file mode 100644
index 00000000000000..bcdcf60a6b0e37
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/docstrings.py
@@ -0,0 +1,92 @@
+# Test docstring extraction
+"""
+multiline
+module docstring
+"""
+
+
+def test(x):
+    """foo"""
+    return 2*x
+
+
+def test2(x):
+
+
+    """docstring with some blank lines in front"""
+    return 2*x
+
+
+def test3(x):
+    """multiline
+    docstring with some more content
+    """
+
+
+def test4(x):
+    """outer docstring"""
+    def inner(y):
+        """nested docstring"""
+
+
+def test5(x):
+    """some docstring"""
+    """another string"""
+    """and one more"""
+
+
+def test6(x):
+    """
+    A very long docstring which should be correctly wrapped into multiple lines
+    in the output po file according to the maximum line width setting.
+    """
+
+
+def test7(x):
+    r"""Raw docstrings are ok"""
+
+
+def test8(x):
+    u"""Unicode docstrings are ok"""
+
+
+def test9(x):
+    b"""bytes should not be picked up"""
+
+
+def test10(x):
+    f"""f-strings should not be picked up"""
+
+
+def test11(x):
+    """Hello, {}!""".format("docstring")
+
+
+def test12(x):
+    """"Some non-ascii dosctring: ěščř αβγδ"""
+
+
+def test13(x):
+    """"""
+
+
+class Foo:
+    """foo"""
+
+
+class Outer:
+    """outer class docstring"""
+    class Inner:
+        "inner class docstring"
+
+        def inner_method(self):
+            """method docstring"""
+
+    def outer_method(self):
+        """method docstring"""
+
+
+async def async_test(x):
+    """Async function docstring"""
+    async def async_inner(y):
+        """async function nested docstring"""
diff --git a/Lib/test/test_tools/test_i18n/data/messages.pot b/Lib/test/test_tools/test_i18n/data/messages.pot
new file mode 100644
index 00000000000000..490b8bd728d74c
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/messages.pot
@@ -0,0 +1,82 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2023-05-11 00:33+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+#: messages.py:5 messages.py:7
+msgid "Hello, world!"
+msgstr ""
+
+#: messages.py:10
+msgid ""
+"Hello,\n"
+"    multiline!\n"
+msgstr ""
+
+#: messages.py:14
+msgid "parentheses"
+msgstr ""
+
+#: messages.py:16
+msgid "Raw string \\ \\n \\t"
+msgstr ""
+
+#: messages.py:18
+msgid "unicode string"
+msgstr ""
+
+#: messages.py:20
+msgid "rawdocunicodestandard"
+msgstr ""
+
+#: messages.py:31
+msgid "Some non-ascii text: ěščř αβγδ"
+msgstr ""
+
+#: messages.py:33
+msgid "Some very long text which should wrap correctly into multiple lines while respecting the maximum line length"
+msgstr ""
+
+#: messages.py:35
+msgid "foo"
+msgstr ""
+
+#: messages.py:35
+msgid "bar"
+msgstr ""
+
+#: messages.py:37 messages.py:73
+msgid "baz"
+msgstr ""
+
+#: messages.py:55
+msgid "Hello, {}!"
+msgstr ""
+
+#: messages.py:59 messages.py:60
+msgid "A"
+msgstr ""
+
+#: messages.py:59 messages.py:60
+msgid "B"
+msgstr ""
+
+#: messages.py:64
+msgid "nested"
+msgstr ""
+
+#: messages.py:67
+msgid "param"
+msgstr ""
+
diff --git a/Lib/test/test_tools/test_i18n/data/messages.py b/Lib/test/test_tools/test_i18n/data/messages.py
new file mode 100644
index 00000000000000..aea65a97eb1679
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/messages.py
@@ -0,0 +1,81 @@
+# Test message extraction
+from gettext import gettext as _
+
+
+_("Hello, world!")
+
+_("Hello, "
+  "world!")
+
+_("""Hello,
+    multiline!
+""")
+
+(_("parentheses"))
+
+_(r"Raw string \ \n \t")
+
+_(u"unicode string")
+
+_(r"raw" """doc""" u"unicode" "standard")
+
+_(f"f-strings should not get extracted")
+
+_(f"f-strings should not get {'extracted'}")
+
+text = 'extracted'
+_(f"f-strings should not get {text}")
+
+_(b"bytes should not be extracted")
+
+_("Some non-ascii text: ěščř αβγδ")
+
+_("Some very long text which should wrap correctly into multiple lines while respecting the maximum line length")
+
+_("foo"), _("bar")
+
+_("baz"), _("baz")
+
+_("")
+
+_()
+
+_(None)
+
+_(1)
+
+_(False)
+
+_(x="no kw arguments")
+
+_("foo", "bar")
+
+_("something", x="something else")
+
+_("Hello, {}!").format("world")
+
+_("Hello, {}!".format("world"))
+
+arr = [_("A"), _("B")]
+obj = {'a': _("A"), 'b': _("B")}
+
+
+def test():
+    print(_("nested"))
+
+
+def test2(x=_("param")):
+    pass
+
+
+class Foo:
+    def bar(self):
+        return _("baz")
+
+
+def _(x):
+    pass
+
+
+def _(x="don't extract me"):
+    pass
diff --git a/Lib/test/test_tools/test_i18n/data/mixed.pot b/Lib/test/test_tools/test_i18n/data/mixed.pot
new file mode 100644
index 00000000000000..2feb7624a4ce1a
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/mixed.pot
@@ -0,0 +1,24 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2023-05-10 16:11+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+#: mixed.py:6 mixed.py:8
+msgid "Hello, world!"
+msgstr ""
+
+#: mixed.py:11 mixed.py:15
+msgid "foo"
+msgstr ""
+
diff --git a/Lib/test/test_tools/test_i18n/data/mixed.py b/Lib/test/test_tools/test_i18n/data/mixed.py
new file mode 100644
index 00000000000000..5a39d0eafbb7dd
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/data/mixed.py
@@ -0,0 +1,15 @@
+# Test messages and docstrings with the same msgid
+from gettext import gettext as _
+
+
+def test():
+    """Hello, world!"""
+
+    print(_("Hello, world!"))
+
+
+_("foo")
+
+
+class Foo:
+    """foo"""
diff --git a/Lib/test/test_tools/test_i18n/test_i18n.py b/Lib/test/test_tools/test_i18n/test_i18n.py
new file mode 100644
index 00000000000000..a3839acb86f78c
--- /dev/null
+++ b/Lib/test/test_tools/test_i18n/test_i18n.py
@@ -0,0 +1,135 @@
+"""Tests to cover the Tools/i18n package"""
+
+import os
+import re
+import sys
+import unittest
+from pathlib import Path
+from test.support.os_helper import temp_cwd
+from test.support.script_helper import assert_python_ok
+from test.test_tools import skip_if_missing, toolsdir
+
+skip_if_missing()
+
+DATA_DIR = Path(__file__).parent / 'data'
+
+
+class Test_pygettext(unittest.TestCase):
+    """Tests for the pygettext.py tool"""
+
+    script = os.path.join(toolsdir, 'i18n', 'pygettext.py')
+
+    def get_header(self, data):
+        """ utility: return the header of a .po file as a dictionary """
+        headers = {}
+        for line in data.split('\n'):
+            if not line or line.startswith(('#', 'msgid', 'msgstr')):
+                continue
+            line = line.strip('"')
+            key, val = line.split(':', 1)
+            headers[key] = val.strip()
+        return headers
+
+    def assertPOEqual(self, expected, actual):
+        """Check if two PO files are equal"""
+        # Normalize the creation date
+        date_pattern = re.compile(r'"POT-Creation-Date: .+?\n"')
+        header = '"POT-Creation-Date: 2000-01-01 00:00+0000\\n"'
+        expected = re.sub(date_pattern, header, expected)
+        actual = re.sub(date_pattern, header, actual)
+
+        # Normalize the path separators in case this test is running on a
+        # platform which does not use '/' as a default separator
+        fileloc_pattern = re.compile(r'#:.+')
+
+        def replace(match):
+            return match[0].replace(os.sep, "/")
+        expected = re.sub(fileloc_pattern, replace, expected)
+        actual = re.sub(fileloc_pattern, replace, actual)
+
+        self.assertEqual(expected, actual)
+
+    def test_header(self):
+        """Make sure the required fields are in the header, according to:
+           http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
+        """
+        with temp_cwd(None):
+            assert_python_ok(self.script)
+            with open('messages.pot', encoding='utf-8') as fp:
+                data = fp.read()
+            header = self.get_header(data)
+
+            self.assertIn("Project-Id-Version", header)
+            self.assertIn("POT-Creation-Date", header)
+            self.assertIn("PO-Revision-Date", header)
+            self.assertIn("Last-Translator", header)
+            self.assertIn("Language-Team", header)
+            self.assertIn("MIME-Version", header)
+            self.assertIn("Content-Type", header)
+            self.assertIn("Content-Transfer-Encoding", header)
+            self.assertIn("Generated-By", header)
+
+            # not clear if these should be required in POT (template) files
+            # self.assertIn("Report-Msgid-Bugs-To", header)
+            # self.assertIn("Language", header)
+
+            # "Plural-Forms" is optional
+
+    @unittest.skipIf(sys.platform.startswith('aix'),
+                     'bpo-29972: broken test on AIX')
+    def test_POT_Creation_Date(self):
+        """ Match the date format from xgettext for POT-Creation-Date """
+        from datetime import datetime
+        with temp_cwd(None):
+            assert_python_ok(self.script)
+            with open('messages.pot', encoding='utf-8') as fp:
+                data = fp.read()
+            header = self.get_header(data)
+            creationDate = header['POT-Creation-Date']
+
+            # peel off the escaped newline at the end of string
+            if creationDate.endswith('\\n'):
+                creationDate = creationDate[:-len('\\n')]
+
+            # This will raise if the date format does not exactly match.
+            datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
+
+    def test_files(self):
+        """Test message and docstring extraction.
+        Compares the script output against the .po files in the data folder.
+        """
+        filenames = (('messages.py', 'messages.pot'),
+                     ('docstrings.py', 'docstrings.pot'),
+                     ('mixed.py', 'mixed.pot'))
+
+        for input_file, output_file in filenames:
+            with self.subTest(f'Input file: data/{input_file}'):
+                contents = (DATA_DIR / input_file).read_text(encoding='utf-8')
+                with temp_cwd(None):
+                    Path(input_file).write_text(contents, encoding='utf-8')
+                    assert_python_ok(self.script, '-D', input_file)
+                    output = Path('messages.pot').read_text(encoding='utf-8')
+
+                expected = (DATA_DIR / output_file).read_text(encoding='utf-8')
+                self.assertPOEqual(expected, output)
+
+    def test_files_list(self):
+        """Make sure the directories are inspected for source files
+           bpo-31920
+        """
+        filenames = ('messages.py', 'docstrings.py', 'mixed.py')
+
+        with temp_cwd(None):
+            pkg_dir = Path('pypkg')
+            pkg_dir.mkdir()
+
+            for filename in filenames:
+                data = (DATA_DIR / filename).read_text(encoding='utf-8')
+                path = pkg_dir / filename
+                path.write_text(data, encoding='utf-8')
+
+            assert_python_ok(self.script, '-D', 'pypkg')
+            output = Path('messages.pot').read_text(encoding='utf-8')
+
+        expected = (Path(DATA_DIR) / 'all.pot').read_text(encoding='utf-8')
+        self.assertPOEqual(expected, output)
diff --git a/Misc/NEWS.d/next/Tools-Demos/2023-05-11-23-32-25.gh-issue-104400.23vxm7.rst b/Misc/NEWS.d/next/Tools-Demos/2023-05-11-23-32-25.gh-issue-104400.23vxm7.rst
new file mode 100644
index 00000000000000..f7bb05b99efe59
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2023-05-11-23-32-25.gh-issue-104400.23vxm7.rst
@@ -0,0 +1 @@
+Use an AST parser instead of a tokenizer in pygettext
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 3a0b27ba420e7a..6a5a8d85c0a05e 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,11 +1,10 @@
 #! /usr/bin/env python3
-# -*- coding: iso-8859-1 -*-
 # Originally written by Barry Warsaw <barry@python.org>
 #
 # Minimally patched to make it even more xgettext compatible
 # by Peter Funk <pf@artcom-gmbh.de>
 #
-# 2002-11-22 J�rgen Hermann <jh@web.de>
+# 2002-11-22 Jürgen Hermann <jh@web.de>
 # Added checks that _() only contains string literals, and
 # command line args are resolved to module lists, i.e. you
 # can now pass a filename, a module or package name, or a
@@ -73,6 +72,10 @@
     --default-domain=name
         Rename the default output file from messages.pot to name.pot.
 
+    --charset=charset
+        Character set used to read the input files and to write the
+        output file (default "utf-8").
+
     -E
     --escape
         Replace non-ASCII characters with octal escape sequences.
@@ -155,24 +158,24 @@
 If `inputfile' is -, standard input is read.
 """)
 
-import os
+import ast
+import getopt
+import glob
 import importlib.machinery
 import importlib.util
+import os
 import sys
-import glob
 import time
-import getopt
-import ast
-import token
-import tokenize
+from ast import (AsyncFunctionDef, ClassDef, FunctionDef, Module, NodeVisitor,
+                 unparse)
+from collections import defaultdict
+from dataclasses import dataclass
 
 __version__ = '1.5'
 
 default_keywords = ['_']
 DEFAULTKEYWORDS = ', '.join(default_keywords)
 
-EMPTYSTRING = ''
-
 
 # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
 # there.
@@ -207,7 +210,7 @@ def make_escapes(pass_nonascii):
     global escapes, escape
     if pass_nonascii:
         # Allow non-ascii characters to pass through so that e.g. 'msgid
-        # "H�he"' would result not result in 'msgid "H\366he"'.  Otherwise we
+        # "Höhe"' would not result in 'msgid "H\366he"'.  Otherwise we
         # escape any character outside the 32..126 range.
         mod = 128
         escape = escape_ascii
@@ -227,19 +230,11 @@ def make_escapes(pass_nonascii):
 def escape_ascii(s, encoding):
     return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
 
+
 def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
 
-def is_literal_string(s):
-    return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
-
-
-def safe_eval(s):
-    # unwrap quotes, safely
-    return eval(s, {'__builtins__':{}}, {})
-
-
 def normalize(s, encoding):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style.
@@ -306,211 +301,133 @@ def getFilesForName(name):
     return []
 
 
-class TokenEater:
-    def __init__(self, options):
-        self.__options = options
-        self.__messages = {}
-        self.__state = self.__waiting
-        self.__data = []
-        self.__lineno = -1
-        self.__freshmodule = 1
-        self.__curfile = None
-        self.__enclosurecount = 0
-
-    def __call__(self, ttype, tstring, stup, etup, line):
-        # dispatch
-##        import token
-##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
-##              file=sys.stderr)
-        self.__state(ttype, tstring, stup[0])
-
-    def __waiting(self, ttype, tstring, lineno):
-        opts = self.__options
-        # Do docstring extractions, if enabled
-        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
-            # module docstring?
-            if self.__freshmodule:
-                if ttype == tokenize.STRING and is_literal_string(tstring):
-                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
-                    self.__freshmodule = 0
-                    return
-                if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
-                    return
-                self.__freshmodule = 0
-            # class or func/method docstring?
-            if ttype == tokenize.NAME and tstring in ('class', 'def'):
-                self.__state = self.__suiteseen
-                return
-        if ttype == tokenize.NAME and tstring in opts.keywords:
-            self.__state = self.__keywordseen
+@dataclass
+class Message:
+    filename: str
+    lineno: int
+    msgid: str
+    is_docstring: bool = False
+
+
+def get_funcname(node):
+    if isinstance(node.func, ast.Name):
+        return node.func.id
+    elif isinstance(node.func, ast.Attribute):
+        return node.func.attr
+    else:
+        return None
+
+
+class GettextVisitor(NodeVisitor):
+    def __init__(self, options, filename=None):
+        super().__init__()
+        self.options = options
+        self.filename = filename
+        self.messages = defaultdict(list)
+
+    def _is_string_const(self, node):
+        return isinstance(node, ast.Constant) and isinstance(node.value, str)
+
+    def _extract_docstring(self, node):
+        if not self.options.docstrings or self.options.nodocstrings.get(self.filename):
             return
-        if ttype == tokenize.STRING:
-            maybe_fstring = ast.parse(tstring, mode='eval').body
-            if not isinstance(maybe_fstring, ast.JoinedStr):
-                return
-            for value in filter(lambda node: isinstance(node, ast.FormattedValue),
-                                maybe_fstring.values):
-                for call in filter(lambda node: isinstance(node, ast.Call),
-                                   ast.walk(value)):
-                    func = call.func
-                    if isinstance(func, ast.Name):
-                        func_name = func.id
-                    elif isinstance(func, ast.Attribute):
-                        func_name = func.attr
+
+        if not node.body:
+            return
+
+        expr = node.body[0]
+        if isinstance(expr, ast.Expr) and self._is_string_const(expr.value):
+            if docstring := ast.get_docstring(node):
+                message = Message(self.filename, expr.lineno, docstring, is_docstring=True)
+                self.messages[docstring].append(message)
+
+    def _extract_message(self, node):
+        funcname = get_funcname(node)
+        if funcname not in self.options.keywords:
+            return
+
+        filename = self.filename
+        lineno = node.lineno
+
+        if len(node.args) != 1:
+            print(f'*** {filename}:{lineno}: Seen unexpected amount of '
+                  f'positional arguments in gettext call: {unparse(node)}',
+                  file=sys.stderr)
+            return
+
+        if node.keywords:
+            print(f'*** {filename}:{lineno}: Seen unexpected keyword arguments '
+                  f'in gettext call: {unparse(node)}', file=sys.stderr)
+            return
+
+        arg = node.args[0]
+        if not self._is_string_const(arg):
+            print(f'*** {filename}:{lineno}: Seen unexpected argument type '
+                  f'in gettext call: {unparse(node)}', file=sys.stderr)
+            return
+
+        msgid = arg.value
+        if msgid == '':
+            print(f'*** {filename}:{lineno}: Empty msgid. It is reserved by GNU gettext: '
+                  'gettext("") returns the header entry with '
+                  'meta information, not the empty string.',
+                  file=sys.stderr)
+            return
+
+        message = Message(filename, lineno, msgid)
+        self.messages[msgid].append(message)
+
+    def visit(self, node):
+        if type(node) in {Module, FunctionDef, AsyncFunctionDef, ClassDef}:
+            self._extract_docstring(node)
+        super().visit(node)
+
+    def visit_Call(self, node):
+        self._extract_message(node)
+        self.generic_visit(node)
+
+
+def format_pot_file(all_messages, options, encoding):
+    timestamp = time.strftime('%Y-%m-%d %H:%M%z')
+    output = pot_header % {'time': timestamp, 'version': __version__,
+                           'charset': encoding,
+                           'encoding': '8bit'}
+    inverted = defaultdict(dict)
+
+    for msgid, messages in all_messages.items():
+        occurrences = set((msg.filename, msg.lineno) for msg in messages)
+        occurrences = tuple(sorted(occurrences))
+        inverted[occurrences][msgid] = messages
+
+    sorted_occurrences = sorted(list(inverted.keys()))
+
+    for occurrences in sorted_occurrences:
+        messages_dict = inverted[occurrences]
+        for msgid, messages in messages_dict.items():
+            if options.writelocations:
+                locline = '#:'
+                for (filename, lineno) in occurrences:
+                    s = f' {filename}:{lineno}'
+                    if len(locline) + len(s) <= options.width:
+                        locline = locline + s
                     else:
-                        continue
-
-                    if func_name not in opts.keywords:
-                        continue
-                    if len(call.args) != 1:
-                        print(_(
-                            '*** %(file)s:%(lineno)s: Seen unexpected amount of'
-                            ' positional arguments in gettext call: %(source_segment)s'
-                            ) % {
-                            'source_segment': ast.get_source_segment(tstring, call) or tstring,
-                            'file': self.__curfile,
-                            'lineno': lineno
-                            }, file=sys.stderr)
-                        continue
-                    if call.keywords:
-                        print(_(
-                            '*** %(file)s:%(lineno)s: Seen unexpected keyword arguments'
-                            ' in gettext call: %(source_segment)s'
-                            ) % {
-                            'source_segment': ast.get_source_segment(tstring, call) or tstring,
-                            'file': self.__curfile,
-                            'lineno': lineno
-                            }, file=sys.stderr)
-                        continue
-                    arg = call.args[0]
-                    if not isinstance(arg, ast.Constant):
-                        print(_(
-                            '*** %(file)s:%(lineno)s: Seen unexpected argument type'
-                            ' in gettext call: %(source_segment)s'
-                            ) % {
-                            'source_segment': ast.get_source_segment(tstring, call) or tstring,
-                            'file': self.__curfile,
-                            'lineno': lineno
-                            }, file=sys.stderr)
-                        continue
-                    if isinstance(arg.value, str):
-                        self.__addentry(arg.value, lineno)
-
-    def __suiteseen(self, ttype, tstring, lineno):
-        # skip over any enclosure pairs until we see the colon
-        if ttype == tokenize.OP:
-            if tstring == ':' and self.__enclosurecount == 0:
-                # we see a colon and we're not in an enclosure: end of def
-                self.__state = self.__suitedocstring
-            elif tstring in '([{':
-                self.__enclosurecount += 1
-            elif tstring in ')]}':
-                self.__enclosurecount -= 1
-
-    def __suitedocstring(self, ttype, tstring, lineno):
-        # ignore any intervening noise
-        if ttype == tokenize.STRING and is_literal_string(tstring):
-            self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
-            self.__state = self.__waiting
-        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
-                           tokenize.COMMENT):
-            # there was no class docstring
-            self.__state = self.__waiting
-
-    def __keywordseen(self, ttype, tstring, lineno):
-        if ttype == tokenize.OP and tstring == '(':
-            self.__data = []
-            self.__lineno = lineno
-            self.__state = self.__openseen
-        else:
-            self.__state = self.__waiting
-
-    def __openseen(self, ttype, tstring, lineno):
-        if ttype == tokenize.OP and tstring == ')':
-            # We've seen the last of the translatable strings.  Record the
-            # line number of the first line of the strings and update the list
-            # of messages seen.  Reset state for the next batch.  If there
-            # were no strings inside _(), then just ignore this entry.
-            if self.__data:
-                self.__addentry(EMPTYSTRING.join(self.__data))
-            self.__state = self.__waiting
-        elif ttype == tokenize.STRING and is_literal_string(tstring):
-            self.__data.append(safe_eval(tstring))
-        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
-                           token.NEWLINE, tokenize.NL]:
-            # warn if we see anything else than STRING or whitespace
-            print(_(
-                '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
-                ) % {
-                'token': tstring,
-                'file': self.__curfile,
-                'lineno': self.__lineno
-                }, file=sys.stderr)
-            self.__state = self.__waiting
-
-    def __addentry(self, msg, lineno=None, isdocstring=0):
-        if lineno is None:
-            lineno = self.__lineno
-        if not msg in self.__options.toexclude:
-            entry = (self.__curfile, lineno)
-            self.__messages.setdefault(msg, {})[entry] = isdocstring
-
-    def set_filename(self, filename):
-        self.__curfile = filename
-        self.__freshmodule = 1
-
-    def write(self, fp):
-        options = self.__options
-        timestamp = time.strftime('%Y-%m-%d %H:%M%z')
-        encoding = fp.encoding if fp.encoding else 'UTF-8'
-        print(pot_header % {'time': timestamp, 'version': __version__,
-                            'charset': encoding,
-                            'encoding': '8bit'}, file=fp)
-        # Sort the entries.  First sort each particular entry's keys, then
-        # sort all the entries by their first item.
-        reverse = {}
-        for k, v in self.__messages.items():
-            keys = sorted(v.keys())
-            reverse.setdefault(tuple(keys), []).append((k, v))
-        rkeys = sorted(reverse.keys())
-        for rkey in rkeys:
-            rentries = reverse[rkey]
-            rentries.sort()
-            for k, v in rentries:
-                # If the entry was gleaned out of a docstring, then add a
-                # comment stating so.  This is to aid translators who may wish
-                # to skip translating some unimportant docstrings.
-                isdocstring = any(v.values())
-                # k is the message string, v is a dictionary-set of (filename,
-                # lineno) tuples.  We want to sort the entries in v first by
-                # file name and then by line number.
-                v = sorted(v.keys())
-                if not options.writelocations:
-                    pass
-                # location comments are different b/w Solaris and GNU:
-                elif options.locationstyle == options.SOLARIS:
-                    for filename, lineno in v:
-                        d = {'filename': filename, 'lineno': lineno}
-                        print(_(
-                            '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
-                elif options.locationstyle == options.GNU:
-                    # fit as many locations on one line, as long as the
-                    # resulting line length doesn't exceed 'options.width'
-                    locline = '#:'
-                    for filename, lineno in v:
-                        d = {'filename': filename, 'lineno': lineno}
-                        s = _(' %(filename)s:%(lineno)d') % d
-                        if len(locline) + len(s) <= options.width:
-                            locline = locline + s
-                        else:
-                            print(locline, file=fp)
-                            locline = "#:" + s
-                    if len(locline) > 2:
-                        print(locline, file=fp)
-                if isdocstring:
-                    print('#, docstring', file=fp)
-                print('msgid', normalize(k, encoding), file=fp)
-                print('msgstr ""\n', file=fp)
+                        output += locline + '\n'
+                        locline = '#:' + s
+                if len(locline) > 2:
+                    output += locline + '\n'
+
+            # If the entry was gleaned out of a docstring, then add a
+            # comment stating so. This is to aid translators who may wish
+            # to skip translating some unimportant docstrings.
+            is_docstring = all(msg.is_docstring for msg in messages)
+            if is_docstring:
+                output += '#, docstring\n'
+
+            output += f'msgid {normalize(msgid, encoding)}\n'
+            output += 'msgstr ""\n'
+            output += '\n'
+
+    return output
 
 
 def main():
@@ -519,7 +436,7 @@ def main():
         opts, args = getopt.getopt(
             sys.argv[1:],
             'ad:DEhk:Kno:p:S:Vvw:x:X:',
-            ['extract-all', 'default-domain=', 'escape', 'help',
+            ['extract-all', 'default-domain=', 'charset=', 'escape', 'help',
              'keyword=', 'no-default-keywords',
              'add-location', 'no-location', 'output=', 'output-dir=',
              'style=', 'verbose', 'version', 'width=', 'exclude-file=',
@@ -535,6 +452,7 @@ class Options:
         SOLARIS = 2
         # defaults
         extractall = 0 # FIXME: currently this option has no effect at all.
+        charset = 'utf-8'
         escape = 0
         keywords = []
         outpath = ''
@@ -560,6 +478,8 @@ class Options:
             options.extractall = 1
         elif opt in ('-d', '--default-domain'):
             options.outfile = arg + '.pot'
+        elif opt in ('--charset',):
+            options.charset = arg
         elif opt in ('-E', '--escape'):
             options.escape = 1
         elif opt in ('-D', '--docstrings'):
@@ -631,7 +551,7 @@ class Options:
     args = expanded
 
     # slurp through all the files
-    eater = TokenEater(options)
+    visitor = GettextVisitor(options)
     for filename in args:
         if filename == '-':
             if options.verbose:
@@ -641,18 +561,11 @@ class Options:
         else:
             if options.verbose:
                 print(_('Working on %s') % filename)
-            fp = open(filename, 'rb')
+            fp = open(filename, encoding=options.charset)
             closep = 1
         try:
-            eater.set_filename(filename)
-            try:
-                tokens = tokenize.tokenize(fp.readline)
-                for _token in tokens:
-                    eater(*_token)
-            except tokenize.TokenError as e:
-                print('%s: %s, line %d, column %d' % (
-                    e.args[0], filename, e.args[1][0], e.args[1][1]),
-                    file=sys.stderr)
+            visitor.filename = filename
+            visitor.visit(ast.parse(fp.read()))
         finally:
             if closep:
                 fp.close()
@@ -664,10 +577,10 @@ class Options:
     else:
         if options.outpath:
             options.outfile = os.path.join(options.outpath, options.outfile)
-        fp = open(options.outfile, 'w')
+        fp = open(options.outfile, 'w', encoding=options.charset)
         closep = 1
     try:
-        eater.write(fp)
+        fp.write(format_pot_file(visitor.messages, options, options.charset))
     finally:
         if closep:
             fp.close()