From c93c1998cfb849e294b8bc0f7665b635cf3cc311 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 Sep 2023 23:13:04 +0200 Subject: [PATCH] gh-109425: regrtest decodes worker stdout with backslashreplace libregrtest now decodes stdout of test worker processes with the "backslashreplace" error handler to log corrupted stdout, instead of failing with an error and not logging the stdout. --- Lib/test/libregrtest/run_workers.py | 4 +- Lib/test/test_regrtest.py | 39 ++++++++++++------- ...-09-14-23-27-40.gh-issue-109425.j-uFep.rst | 3 ++ 3 files changed, 32 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py index 45b2f424ce4e5df..481e909077a6fb6 100644 --- a/Lib/test/libregrtest/run_workers.py +++ b/Lib/test/libregrtest/run_workers.py @@ -218,7 +218,9 @@ def create_stdout(self, stack: contextlib.ExitStack) -> TextIO: # gh-94026: Write stdout+stderr to a tempfile as workaround for # non-blocking pipes on Emscripten with NodeJS. - stdout_file = tempfile.TemporaryFile('w+', encoding=encoding) + stdout_file = tempfile.TemporaryFile('w+', + encoding=encoding, + errors='backslashreplace') stack.enter_context(stdout_file) return stdout_file diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 55cf9e7f0207214..fc2a15cdf6049dc 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -421,10 +421,12 @@ def regex_search(self, regex, output): self.fail("%r not found in %r" % (regex, output)) return match - def check_line(self, output, regex, full=False): + def check_line(self, output, pattern, full=False, regex=True): + if not regex: + pattern = re.escape(pattern) if full: - regex += '\n' - regex = re.compile(r'^' + regex, re.MULTILINE) + pattern += '\n' + regex = re.compile(r'^' + pattern, re.MULTILINE) self.assertRegex(output, regex) def parse_executed_tests(self, output): @@ -1755,7 +1757,7 @@ def test_leak_tmp_file(self): f"files (1): mytmpfile", output) - def test_mp_decode_error(self): + def test_worker_decode_error(self): # gh-101634: If a worker stdout cannot be decoded, report a failed test # and a non-zero exit code. if sys.platform == 'win32': @@ -1767,29 +1769,40 @@ def test_mp_decode_error(self): if encoding is None: self.skipTest("cannot get regrtest worker encoding") - nonascii = b"byte:\xa0\xa9\xff\n" + nonascii = bytes(ch for ch in range(128, 256)) + corrupted_output = b"nonascii:%s\n" % (nonascii,) + # gh-108989: On Windows, assertion errors are written in UTF-16 + assertion_failed = 'Assertion failed: tstate_is_alive(tstate)\n' + corrupted_output += assertion_failed.encode('utf-16-le') try: - nonascii.decode(encoding) + corrupted_output.decode(encoding) except UnicodeDecodeError: pass else: - self.skipTest(f"{encoding} can decode non-ASCII bytes {nonascii!a}") + self.skipTest(f"{encoding} can decode non-ASCII bytes") + + expected_line = corrupted_output.decode(encoding, 'backslashreplace') code = textwrap.dedent(fr""" import sys + import unittest + + class Tests(unittest.TestCase): + def test_pass(self): + pass + # bytes which cannot be decoded from UTF-8 - nonascii = {nonascii!a} - sys.stdout.buffer.write(nonascii) + corrupted_output = {corrupted_output!a} + sys.stdout.buffer.write(corrupted_output) sys.stdout.buffer.flush() """) testname = self.create_test(code=code) - output = self.run_tests("--fail-env-changed", "-v", "-j1", testname, - exitcode=EXITCODE_BAD_TEST) + output = self.run_tests("--fail-env-changed", "-v", "-j1", testname) self.check_executed_tests(output, [testname], - failed=[testname], parallel=True, - stats=0) + stats=1) + self.check_line(output, expected_line, regex=False) def test_doctest(self): code = textwrap.dedent(r''' diff --git a/Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst b/Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst new file mode 100644 index 000000000000000..bfe18569ae97f3b --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst @@ -0,0 +1,3 @@ +libregrtest now decodes stdout of test worker processes with the +"backslashreplace" error handler to log corrupted stdout, instead of failing +with an error and not logging the stdout. Patch by Victor Stinner.