diff --git a/dangerzone/cli.py b/dangerzone/cli.py index 7a0acfb97..a8c0c475b 100644 --- a/dangerzone/cli.py +++ b/dangerzone/cli.py @@ -46,7 +46,8 @@ def print_header(s: str) -> None: "--debug", "debug", flag_value=True, - help="Run Dangerzone in debug mode, to get logs from gVisor.") + help="Run Dangerzone in debug mode, to get logs from gVisor.", +) @click.version_option(version=get_version(), message="%(version)s") @errors.handle_document_errors def cli_main( diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 59c8b6083..bdd7ad908 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -5,7 +5,9 @@ import signal import subprocess import sys +import threading from abc import ABC, abstractmethod +from queue import Queue from typing import IO, Callable, Iterator, Optional import fitz @@ -92,6 +94,7 @@ def __init__(self, debug: bool = False) -> None: self.proc_stderr = subprocess.PIPE else: self.proc_stderr = subprocess.DEVNULL + self.stderr_queue: Queue = Queue() def should_capture_stderr(self) -> bool: return self.debug or getattr(sys, "dangerzone_dev", False) @@ -363,3 +366,23 @@ def doc_to_pixels_proc( f"{debug_log}" # no need for an extra newline here f"{DOC_TO_PIXELS_LOG_END}" ) + + def _stream_stderr(self, stderr: IO[bytes], queue: Queue) -> None: + """Read stderr in a separate thread to avoid blocking""" + try: + for line in stderr: + queue.put(line) + if self.debug: + log.debug(line.decode().strip()) + except (ValueError, IOError) as e: + log.debug(f"Stderr stream closed: {e}") + + def start_stderr_thread(self, process: subprocess.Popen) -> None: + """Start a thread to read stderr from the process""" + if process.stderr: + stderr_thread = threading.Thread( + target=self._stream_stderr, + args=(process.stderr, self.stderr_queue), + daemon=True, + ) + stderr_thread.start() diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 8d3ca05ce..4771d16c9 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -154,7 +154,7 @@ def exec( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=self.proc_stderr, + stderr=subprocess.PIPE, startupinfo=startupinfo, # Start the conversion process in a new session, so that we can later on # kill the process group, without killing the controlling script. @@ -189,7 +189,14 @@ def exec_container( + command ) args = [container_runtime] + args - return self.exec(args) + args_str = " ".join(shlex.quote(s) for s in args) + log.info("> " + args_str) + + process = self.exec(args) + # Start stderr reader thread, attaching it to the process + self.start_stderr_thread(process) + + return process def kill_container(self, name: str) -> None: """Terminate a spawned container. diff --git a/dangerzone/logic.py b/dangerzone/logic.py index 786fc444a..9bf8e05a8 100644 --- a/dangerzone/logic.py +++ b/dangerzone/logic.py @@ -66,11 +66,25 @@ def convert_documents( ) -> None: def convert_doc(document: Document) -> None: try: + # Clear any existing stderr output + while not self.isolation_provider.stderr_queue.empty(): + self.isolation_provider.stderr_queue.get_nowait() + self.isolation_provider.convert( document, ocr_lang, stdout_callback, ) + + # Process any stderr output that was captured + while not self.isolation_provider.stderr_queue.empty(): + try: + line = self.isolation_provider.stderr_queue.get_nowait() + if stdout_callback: + stdout_callback(True, line.decode().strip(), -1) + except Exception as e: + log.error(f"Error processing stderr: {e}") + except Exception: log.exception( f"Unexpected error occurred while converting '{document}'"