diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py index 45b2f424ce4e5df..481e909077a6fb6 100644 --- a/Lib/test/libregrtest/run_workers.py +++ b/Lib/test/libregrtest/run_workers.py @@ -218,7 +218,9 @@ def create_stdout(self, stack: contextlib.ExitStack) -> TextIO: # gh-94026: Write stdout+stderr to a tempfile as workaround for # non-blocking pipes on Emscripten with NodeJS. - stdout_file = tempfile.TemporaryFile('w+', encoding=encoding) + stdout_file = tempfile.TemporaryFile('w+', + encoding=encoding, + errors='backslashreplace') stack.enter_context(stdout_file) return stdout_file diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 55cf9e7f0207214..fc2a15cdf6049dc 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -421,10 +421,12 @@ def regex_search(self, regex, output): self.fail("%r not found in %r" % (regex, output)) return match - def check_line(self, output, regex, full=False): + def check_line(self, output, pattern, full=False, regex=True): + if not regex: + pattern = re.escape(pattern) if full: - regex += '\n' - regex = re.compile(r'^' + regex, re.MULTILINE) + pattern += '\n' + regex = re.compile(r'^' + pattern, re.MULTILINE) self.assertRegex(output, regex) def parse_executed_tests(self, output): @@ -1755,7 +1757,7 @@ def test_leak_tmp_file(self): f"files (1): mytmpfile", output) - def test_mp_decode_error(self): + def test_worker_decode_error(self): # gh-101634: If a worker stdout cannot be decoded, report a failed test # and a non-zero exit code. if sys.platform == 'win32': @@ -1767,29 +1769,40 @@ def test_mp_decode_error(self): if encoding is None: self.skipTest("cannot get regrtest worker encoding") - nonascii = b"byte:\xa0\xa9\xff\n" + nonascii = bytes(ch for ch in range(128, 256)) + corrupted_output = b"nonascii:%s\n" % (nonascii,) + # gh-108989: On Windows, assertion errors are written in UTF-16 + assertion_failed = 'Assertion failed: tstate_is_alive(tstate)\n' + corrupted_output += assertion_failed.encode('utf-16-le') try: - nonascii.decode(encoding) + corrupted_output.decode(encoding) except UnicodeDecodeError: pass else: - self.skipTest(f"{encoding} can decode non-ASCII bytes {nonascii!a}") + self.skipTest(f"{encoding} can decode non-ASCII bytes") + + expected_line = corrupted_output.decode(encoding, 'backslashreplace') code = textwrap.dedent(fr""" import sys + import unittest + + class Tests(unittest.TestCase): + def test_pass(self): + pass + # bytes which cannot be decoded from UTF-8 - nonascii = {nonascii!a} - sys.stdout.buffer.write(nonascii) + corrupted_output = {corrupted_output!a} + sys.stdout.buffer.write(corrupted_output) sys.stdout.buffer.flush() """) testname = self.create_test(code=code) - output = self.run_tests("--fail-env-changed", "-v", "-j1", testname, - exitcode=EXITCODE_BAD_TEST) + output = self.run_tests("--fail-env-changed", "-v", "-j1", testname) self.check_executed_tests(output, [testname], - failed=[testname], parallel=True, - stats=0) + stats=1) + self.check_line(output, expected_line, regex=False) def test_doctest(self): code = textwrap.dedent(r''' diff --git a/Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst b/Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst new file mode 100644 index 000000000000000..bfe18569ae97f3b --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2023-09-14-23-27-40.gh-issue-109425.j-uFep.rst @@ -0,0 +1,3 @@ +libregrtest now decodes stdout of test worker processes with the +"backslashreplace" error handler to log corrupted stdout, instead of failing +with an error and not logging the stdout. Patch by Victor Stinner.