Matgenix · gpetretto · Sep 25, 2024 · Sep 25, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/src/jobflow_remote/config/base.py b/src/jobflow_remote/config/base.py
@@ -183,6 +183,11 @@ class WorkerBase(BaseModel):
         "username instead that from the list of job ids. May be necessary for some "
         "scheduler_type (e.g. SGE)",
     )
+    sanitize_command: bool = Field(
+        default=False,
+        description="Sanitize the output of commands in case of failures due to spurious text produced"
+        "by the worker shell.",
+    )
     model_config = ConfigDict(extra="forbid")
 
     @field_validator("scheduler_type")
@@ -252,7 +257,9 @@ def get_host(self) -> BaseHost:
         -------
         The LocalHost.
         """
-        return LocalHost(timeout_execute=self.timeout_execute)
+        return LocalHost(
+            timeout_execute=self.timeout_execute, sanitize=self.sanitize_command
+        )
 
     @property
     def cli_info(self) -> dict:
@@ -402,6 +409,7 @@ def get_host(self) -> BaseHost:
             shell_cmd=self.shell_cmd,
             login_shell=self.login_shell,
             interactive_login=self.interactive_login,
+            sanitize=self.sanitize_command,
         )
 
     @property

diff --git a/src/jobflow_remote/jobs/data.py b/src/jobflow_remote/jobs/data.py
@@ -177,9 +177,7 @@ def estimated_run_time(self) -> Optional[float]:
             The estimated run time in seconds.
         """
         if self.start_time:
-            return (
-                datetime.now(tz=self.start_time.tzinfo) - self.start_time
-            ).total_seconds()
+            return (datetime.utcnow() - self.start_time).total_seconds()
 
         return None
 

diff --git a/src/jobflow_remote/remote/host/base.py b/src/jobflow_remote/remote/host/base.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import abc
+import logging
+import re
 import traceback
 from typing import TYPE_CHECKING
 
@@ -10,9 +12,26 @@
     from pathlib import Path
 
 
+logger = logging.getLogger(__name__)
+
+SANITIZE_KEY = r"_-_-_-_-_### JFREMOTE SANITIZE ###_-_-_-_-_"
+
+
 class BaseHost(MSONable):
     """Base Host class."""
 
+    def __init__(self, sanitize: bool = False):
+        """
+        Parameters
+        ----------
+        sanitize
+            If True text a string will be prepended and appended to the output
+            of the commands, to ease the parsing and avoid failures due to spurious
+            text coming from the host shell.
+        """
+        self.sanitize = sanitize
+        self._sanitize_regex: re.Pattern | None = None
+
     @abc.abstractmethod
     def execute(
         self,
@@ -28,7 +47,8 @@ def execute(
             Command to execute, as a str or list of str
         workdir: str or None
             path where the command will be executed.
-
+        timeout
+            Timeout for the execution of the commands.
         """
         raise NotImplementedError
 
@@ -74,8 +94,19 @@ def test(self) -> str | None:
         try:
             cmd = "echo 'test'"
             stdout, stderr, returncode = self.execute(cmd)
-            if returncode != 0 or stdout.strip() != "test":
-                msg = f"Command was executed but some error occurred.\nstdoud: {stdout}\nstderr: {stderr}"
+            if returncode != 0:
+                msg = f"Command was executed but return code was different from zero.\nstdoud: {stdout}\nstderr: {stderr}"
+            elif stdout.strip() != "test" or stderr.strip() != "":
+                msg = (
+                    "Command was executed but the output is not the expected one (i.e. a single 'test' "
+                    f"string in both stdout and stderr).\nstdoud: {stdout}\nstderr: {stderr}"
+                )
+                if not self.sanitize:
+                    msg += (
+                        "\nIf the output contains additional text the problem may be solved by setting "
+                        "the 'sanitize_command' option to True in the project configuration."
+                    )
+
         except Exception:
             exc = traceback.format_exc()
             msg = f"Error while executing command:\n {exc}"
@@ -124,6 +155,71 @@ def interactive_login(self) -> bool:
         """
         return False
 
+    @property
+    def sanitize_regex(self) -> re.Pattern:
+        """
+        Regular expression to sanitize sensitive info in command outputs.
+        """
+        if not self._sanitize_regex:
+            escaped_key = re.escape(SANITIZE_KEY)
+            # Optionally match the newline that comes from the "echo" command.
+            # The -n option for echo to suppress the newline seems to not be
+            # supported on all systems
+            self._sanitize_regex = re.compile(
+                f"{escaped_key}\r?\n?(.*?)(?:{escaped_key}\r?\n?|$)", re.DOTALL
+            )
+
+        return self._sanitize_regex
+
+    def sanitize_command(self, cmd: str) -> str:
+        """
+        Sanitizes a command by adding a prefix and suffix to the command string if
+        sanitization is enabled.
+        The prefix and suffix are the same and are used to mark the parts of the output
+        that should be sanitized. The prefix and suffix are defined by `SANITIZE_KEY`.
+
+        Parameters
+        ----------
+        cmd
+            The command string to be sanitized
+
+        Returns
+        -------
+        str
+            The sanitized command string
+        """
+        if self.sanitize:
+            echo_cmd = f'echo "{SANITIZE_KEY}" | tee /dev/stderr'
+            cmd = f"{echo_cmd};{cmd};{echo_cmd}"
+        return cmd
+
+    def sanitize_output(self, output: str) -> str:
+        """
+        Sanitizes the output of a command by selecting the section between the
+        SANITIZE_KEY strings.
+        If the second instance of the key is not found, the part of the output after the key is returned.
+        If the key is not present, the entire output is returned.
+
+        Parameters
+        ----------
+        output
+            The output of the command to be sanitized
+
+        Returns
+        -------
+        str
+            The sanitized output
+        """
+        if self.sanitize:
+            match = self.sanitize_regex.search(output)
+            if not match:
+                logger.warning(
+                    f"Even if sanitization was required, there was no match for the output: {output}. Returning the complete output"
+                )
+                return output
+            return match.group(1)
+        return output
+
 
 class HostError(Exception):
     pass
diff --git a/src/jobflow_remote/remote/host/local.py b/src/jobflow_remote/remote/host/local.py
@@ -12,8 +12,9 @@
 
 
 class LocalHost(BaseHost):
-    def __init__(self, timeout_execute: int = None) -> None:
+    def __init__(self, timeout_execute: int = None, sanitize: bool = False) -> None:
         self.timeout_execute = timeout_execute
+        super().__init__(sanitize=sanitize)
 
     def __eq__(self, other):
         return isinstance(other, LocalHost)
@@ -34,6 +35,10 @@ def execute(
         ----------
         command: str or list of str
             Command to execute, as a str or list of str
+        workdir: str or None
+            path where the command will be executed.
+        timeout
+            Timeout for the execution of the commands.
 
         Returns
         -------
@@ -46,13 +51,16 @@ def execute(
         """
         if isinstance(command, (list, tuple)):
             command = " ".join(command)
+        command = self.sanitize_command(command)
         workdir = str(workdir) if workdir else Path.cwd()
         timeout = timeout or self.timeout_execute
         with cd(workdir):
             proc = subprocess.run(
                 command, capture_output=True, shell=True, timeout=timeout, check=False
             )
-        return proc.stdout.decode(), proc.stderr.decode(), proc.returncode
+        stdout = self.sanitize_output(proc.stdout.decode())
+        stderr = self.sanitize_output(proc.stderr.decode())
+        return stdout, stderr, proc.returncode
 
     def mkdir(
         self, directory: str | Path, recursive: bool = True, exist_ok: bool = True

diff --git a/src/jobflow_remote/remote/host/remote.py b/src/jobflow_remote/remote/host/remote.py
@@ -42,6 +42,7 @@ def __init__(
         login_shell=True,
         retry_on_closed_connection=True,
         interactive_login=False,
+        sanitize: bool = False,
     ) -> None:
         self.host = host
         self.user = user
@@ -59,6 +60,7 @@ def __init__(
         self.retry_on_closed_connection = retry_on_closed_connection
         self._interactive_login = interactive_login
         self._create_connection()
+        super().__init__(sanitize=sanitize)
 
     def _create_connection(self) -> None:
         if self.interactive_login:
@@ -175,6 +177,8 @@ def execute(
         if isinstance(command, (list, tuple)):
             command = " ".join(command)
 
+        command = self.sanitize_command(command)
+
         # TODO: check if this works:
         if not workdir:
             workdir = "."
@@ -201,7 +205,10 @@ def execute(
                 timeout=timeout,
             )
 
-        return out.stdout, out.stderr, out.exited
+        stdout = self.sanitize_output(out.stdout)
+        stderr = self.sanitize_output(out.stderr)
+
+        return stdout, stderr, out.exited
 
     def mkdir(
         self, directory: str | Path, recursive: bool = True, exist_ok: bool = True

diff --git a/tests/db/remote/host/test_local.py b/tests/db/remote/host/test_local.py
@@ -0,0 +1,33 @@
+from unittest.mock import patch
+
+
+@patch("subprocess.run")
+def test_sanitize(mock_run):
+    from jobflow_remote.remote.host.base import SANITIZE_KEY
+    from jobflow_remote.remote.host.local import LocalHost
+
+    lh = LocalHost(sanitize=True)
+
+    cmd = "echo 'test'"
+
+    echo_cmd = f'echo "{SANITIZE_KEY}" | tee /dev/stderr'
+    expected_cmd = f"{echo_cmd};{cmd};{echo_cmd}"
+    mock_stdout = f"SOME NOISE --{SANITIZE_KEY}\ntest{SANITIZE_KEY}\nSOME appended TEXT"
+
+    # Configure the mock
+    mock_run.return_value.returncode = 0
+    mock_run.return_value.stdout = mock_stdout.encode()
+    mock_run.return_value.stderr = b""
+
+    stdout, stderr, _ = lh.execute(cmd)
+
+    mock_run.assert_called_once_with(
+        expected_cmd,
+        capture_output=True,
+        shell=True,  # noqa: S604
+        timeout=None,
+        check=False,
+    )
+
+    assert stdout == "test"
+    assert stderr == ""
diff --git a/tests/db/remote/host/test_remote.py b/tests/db/remote/host/test_remote.py
@@ -0,0 +1,40 @@
+from unittest.mock import MagicMock, patch
+
+
+@patch("fabric.Connection.run")
+@patch("fabric.Connection.cd")
+def test_sanitize(mock_cd, mock_run):
+    from jobflow_remote.remote.host.base import SANITIZE_KEY
+    from jobflow_remote.remote.host.remote import RemoteHost
+
+    rh = RemoteHost(
+        host="localhost",
+        retry_on_closed_connection=False,
+        sanitize=True,
+        shell_cmd=None,
+    )
+    rh._check_connected = lambda: True
+
+    cmd = "echo 'test'"
+
+    echo_cmd = f'echo "{SANITIZE_KEY}" | tee /dev/stderr'
+    expected_cmd = f"{echo_cmd};{cmd};{echo_cmd}"
+    mock_stdout = f"SOME NOISE --{SANITIZE_KEY}\ntest{SANITIZE_KEY}\nSOME appended TEXT"
+
+    # Configure the mock
+    mock_cd.return_value.__enter__ = (
+        MagicMock()
+    )  # This makes the context manager do nothing
+    mock_cd.return_value.__exit__ = MagicMock()
+    mock_run.return_value.stdout = mock_stdout
+    mock_run.return_value.stderr = ""
+
+    # Call the function that uses subprocess.run
+    stdout, stderr, _ = rh.execute(cmd)
+
+    # Assert that subprocess.run was called with the expected arguments
+    mock_run.assert_called_once_with(expected_cmd, timeout=None, hide=True, warn=True)
+
+    # Assert on the result of your function
+    assert stdout == "test"
+    assert stderr == ""
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -223,6 +223,13 @@ def write_tmp_settings(
                 work_dir=str(workdir),
                 resources={},
             ),
+            "test_sanitize_local_worker": dict(
+                type="local",
+                scheduler_type="shell",
+                work_dir=str(workdir),
+                resources={},
+                sanitize_command=True,
+            ),
             "test_remote_worker": dict(
                 type="remote",
                 host="localhost",
@@ -273,6 +280,19 @@ def write_tmp_settings(
                 resources={},
                 max_jobs=2,
             ),
+            "test_sanitize_remote_worker": dict(
+                type="remote",
+                host="localhost",
+                port=slurm_ssh_port,
+                scheduler_type="slurm",
+                work_dir="/home/jobflow/jfr",
+                user="jobflow",
+                password="jobflow",
+                pre_run="source /home/jobflow/.venv/bin/activate",
+                resources={"partition": "debug", "ntasks": 1, "time": "00:01:00"},
+                connect_kwargs={"allow_agent": False, "look_for_keys": False},
+                sanitize_command=True,
+            ),
         },
         exec_config={"test": {"export": {"TESTING_ENV_VAR": random_project_name}}},
         runner=dict(