Skip to content

Commit

Permalink
pythongh-109425: regrtest decodes worker stdout with backslashreplace
Browse files Browse the repository at this point in the history
libregrtest now decodes stdout of test worker processes with the
"backslashreplace" error handler to log corrupted stdout, instead of
failing with an error and not logging the stdout.
  • Loading branch information
vstinner committed Sep 14, 2023
1 parent 1f885df commit c93c199
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 14 deletions.
4 changes: 3 additions & 1 deletion Lib/test/libregrtest/run_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ def create_stdout(self, stack: contextlib.ExitStack) -> TextIO:

# gh-94026: Write stdout+stderr to a tempfile as workaround for
# non-blocking pipes on Emscripten with NodeJS.
stdout_file = tempfile.TemporaryFile('w+', encoding=encoding)
stdout_file = tempfile.TemporaryFile('w+',
encoding=encoding,
errors='backslashreplace')
stack.enter_context(stdout_file)
return stdout_file

Expand Down
39 changes: 26 additions & 13 deletions Lib/test/test_regrtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,12 @@ def regex_search(self, regex, output):
self.fail("%r not found in %r" % (regex, output))
return match

def check_line(self, output, regex, full=False):
def check_line(self, output, pattern, full=False, regex=True):
if not regex:
pattern = re.escape(pattern)
if full:
regex += '\n'
regex = re.compile(r'^' + regex, re.MULTILINE)
pattern += '\n'
regex = re.compile(r'^' + pattern, re.MULTILINE)
self.assertRegex(output, regex)

def parse_executed_tests(self, output):
Expand Down Expand Up @@ -1755,7 +1757,7 @@ def test_leak_tmp_file(self):
f"files (1): mytmpfile",
output)

def test_mp_decode_error(self):
def test_worker_decode_error(self):
# gh-101634: If a worker stdout cannot be decoded, report a failed test
# and a non-zero exit code.
if sys.platform == 'win32':
Expand All @@ -1767,29 +1769,40 @@ def test_mp_decode_error(self):
if encoding is None:
self.skipTest("cannot get regrtest worker encoding")

nonascii = b"byte:\xa0\xa9\xff\n"
nonascii = bytes(ch for ch in range(128, 256))
corrupted_output = b"nonascii:%s\n" % (nonascii,)
# gh-108989: On Windows, assertion errors are written in UTF-16
assertion_failed = 'Assertion failed: tstate_is_alive(tstate)\n'
corrupted_output += assertion_failed.encode('utf-16-le')
try:
nonascii.decode(encoding)
corrupted_output.decode(encoding)
except UnicodeDecodeError:
pass
else:
self.skipTest(f"{encoding} can decode non-ASCII bytes {nonascii!a}")
self.skipTest(f"{encoding} can decode non-ASCII bytes")

expected_line = corrupted_output.decode(encoding, 'backslashreplace')

code = textwrap.dedent(fr"""
import sys
import unittest
class Tests(unittest.TestCase):
def test_pass(self):
pass
# bytes which cannot be decoded from UTF-8
nonascii = {nonascii!a}
sys.stdout.buffer.write(nonascii)
corrupted_output = {corrupted_output!a}
sys.stdout.buffer.write(corrupted_output)
sys.stdout.buffer.flush()
""")
testname = self.create_test(code=code)

output = self.run_tests("--fail-env-changed", "-v", "-j1", testname,
exitcode=EXITCODE_BAD_TEST)
output = self.run_tests("--fail-env-changed", "-v", "-j1", testname)
self.check_executed_tests(output, [testname],
failed=[testname],
parallel=True,
stats=0)
stats=1)
self.check_line(output, expected_line, regex=False)

def test_doctest(self):
code = textwrap.dedent(r'''
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
libregrtest now decodes stdout of test worker processes with the
"backslashreplace" error handler to log corrupted stdout, instead of failing
with an error and not logging the stdout. Patch by Victor Stinner.

0 comments on commit c93c199

Please sign in to comment.