ansible · Shrews · Jan 5, 2024 · Dec 5, 2023
@@ -297,10 +297,11 @@ def run(self):
                 child.logfile_read = stdout_handle
             except pexpect.exceptions.ExceptionPexpect as e:
                 child = collections.namedtuple(
-                    'MissingProcess', 'exitstatus isalive close'
+                    'MissingProcess', 'exitstatus isalive expect close'
                 )(
                     exitstatus=127,
                     isalive=lambda: False,
+                    expect=lambda *args, **kwargs: None,
                     close=lambda: None,
                 )
 
@@ -341,9 +342,17 @@ def run(self):
                     Runner.handle_termination(child.pid)
                     self.timed_out = True
 
+            # fix for https://github.com/ansible/ansible-runner/issues/1330
+            # Since we're (ab)using pexpect's logging callback as our source of stdout data, we need to pump the stream one last
+            # time, in case any new output was written by the child between the last return from expect and its termination. Ideally
+            # this would have an arbitrarily large timeout value as well, in case a ridiculous amount of data was written, but just
+            # invoking one last pump should cover the vast majority of real-world cases.
+            child.expect(pexpect.EOF, timeout=5)
+
+            # close the child to ensure no more output will be written before we close the stream interposers
+            child.close()
             stdout_handle.close()
             stderr_handle.close()
-            child.close()
             self.rc = child.exitstatus if not (self.timed_out or self.canceled) else 254
 
         if self.canceled:

@@ -0,0 +1,9 @@
+# part of the regression test for https://github.com/ansible/ansible-runner/issues/1330
+
+- hosts: localhost
+  gather_facts: no
+  tasks:
+    # sleep significantly longer than the configured pexpect timeout; the cancel callback will inject
+    # additional delay before the next process status sampling interval that can cause further output to be lost;
+    # if all is well, we'll do another loop over the child output until it's all been consumed...
+    - raw: sleep 2
@@ -4,12 +4,13 @@
 import os
 import re
 import sys
+import time
 
 from test.utils.common import iterate_timeout
 
 import pytest
 
-from ansible_runner import Runner
+from ansible_runner import Runner, run
 from ansible_runner.exceptions import AnsibleRunnerException
 
 
@@ -279,3 +280,16 @@ def test_set_extra_vars(rc):
         with open(os.path.join(rc.artifact_dir, 'stdout')) as f:
             if 'hello there' in f.read():
                 break
+
+
+# regression test for https://github.com/ansible/ansible-runner/issues/1330
+def test_pexpect_timeout(project_fixtures):
+    r = run(
+        private_data_dir=str(project_fixtures / 'pexpect_timeout_data_loss'),
+        playbook='pb.yml',
+        settings={"pexpect_timeout": 0.1},  # set the pexpect timeout very low
+        cancel_callback=lambda: time.sleep(3) or False,  # induce enough delay in the child polling loop that the child will exit before being polled again
+    )
+
+    # ensure we got playbook_on_stats; if pexpect ate it, we won't...
+    assert any(ev for ev in r.events if ev.get('event', None) == 'playbook_on_stats')