RHSecurityCompliance · mildas · Oct 7, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/lib/runtest.py b/lib/runtest.py
@@ -1,12 +1,10 @@
 import os
 import sys
-import re
 import runpy
 import signal
 import traceback
 import tempfile
 import urllib3
-import yaml
 from pathlib import Path
 
 from lib import util, results
@@ -24,27 +22,8 @@
 # this is not a problem for Beaker, which captures test output separately and
 # actually cares about the exit code of the test script
 def _setup_timeout_handling():
-    metadata_yaml = os.environ['TMT_TEST_METADATA']  # exception if undefined
-    with open(metadata_yaml) as f:
-        test_metadata = yaml.safe_load(f)
-
-    if 'duration' in test_metadata:
-        duration_str = test_metadata['duration']
-        match = re.fullmatch(r'([0-9]+)([a-z]+)', duration_str)
-        if not match:
-            results.report_and_exit('error', note=f"duration '{duration_str}' has invalid format")
-        length, unit = match.groups()
-        if unit == 'm':
-            duration = int(length)*60
-        elif unit == 'h':
-            duration = int(length)*60*60
-        elif unit == 'd':
-            duration = int(length)*60*60*24
-        else:
-            duration = int(length)
-    else:
-        # use TMT's default of 5m
-        duration = 300
+    metadata = util.TestMetadata()
+    duration = metadata.duration_seconds()
 
     # leave 10 seconds for our alarm timeout code
     duration -= 10

diff --git a/lib/util/__init__.py b/lib/util/__init__.py
@@ -6,14 +6,15 @@
 #   so we could add the libdir to PATH and PYTHONPATH
 libdir = Path(inspect.getfile(inspect.currentframe())).parent.parent
 
-from .content      import *  # noqa
-from .backup       import *  # noqa
-from .dedent       import *  # noqa
-from .environment  import *  # noqa
-from .httpsrv      import *  # noqa
-from .log          import *  # noqa
-from .old_content  import *  # noqa
-from .rpmpack      import *  # noqa
-from .sanitization import *  # noqa
-from .ssh          import *  # noqa
-from .subprocess   import *  # noqa
+from .content       import *  # noqa
+from .backup        import *  # noqa
+from .dedent        import *  # noqa
+from .environment   import *  # noqa
+from .httpsrv       import *  # noqa
+from .log           import *  # noqa
+from .old_content   import *  # noqa
+from .rpmpack       import *  # noqa
+from .sanitization  import *  # noqa
+from .ssh           import *  # noqa
+from .subprocess    import *  # noqa
+from .test_metadata import *  # noqa
diff --git a/lib/util/test_metadata.py b/lib/util/test_metadata.py
@@ -0,0 +1,36 @@
+import os
+import re
+import yaml
+import copy as copy_mod
+
+
+class TestMetadata(dict):
+    def __init__(self):
+        metadata_yaml = os.environ['TMT_TEST_METADATA']  # exception if undefined
+        with open(metadata_yaml) as f:
+            test_metadata = yaml.safe_load(f)
+        self.update(test_metadata)
+
+    # return 'TestMetadata' for .copy(), not 'dict'
+    def copy(self):
+        return copy_mod.copy(self)
+
+    def duration_seconds(self):
+        if 'duration' in self:
+            duration_str = self['duration']
+            match = re.fullmatch(r'([0-9]+)([a-z]+)', duration_str)
+            if not match:
+                raise RuntimeError(f"'duration' has invalid format: {duration_str}")
+            length, unit = match.groups()
+            if unit == 'm':
+                duration = int(length)*60
+            elif unit == 'h':
+                duration = int(length)*60*60
+            elif unit == 'd':
+                duration = int(length)*60*60*24
+            else:
+                duration = int(length)
+        else:
+            # use TMT's default of 5m
+            duration = 300
+        return duration
diff --git a/lib/virt.py b/lib/virt.py
@@ -37,6 +37,7 @@
 
 Example using snapshots:
 
+    import subprocess
     import virt
 
     virt.Host.setup()
@@ -48,7 +49,7 @@
         g.prepare_for_snapshot()
 
     with g.snapshotted():
-        state = g.ssh('ls', '/root', capture=True)
+        state = g.ssh('ls', '/root', stdout=subprocess.PIPE)
         print(state.stdout)
         if state.returncode != 0:
             report_failure()
@@ -82,6 +83,7 @@
 import contextlib
 import tempfile
 import json
+import uuid
 import xml.etree.ElementTree as ET
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -253,12 +255,15 @@ def __init__(self, template=TEMPLATE, packages=PACKAGES, partitions=None):
         self.ks = template
         self.appends = []
         self.packages = packages
-        self.partitions = partitions if partitions else []
+        self.partitions = partitions
 
     def assemble(self):
-        partitions_block = '\n'.join(
-            (f'part {mountpoint} --size={size}' for mountpoint, size in self.partitions),
-        )
+        if self.partitions:
+            partitions_block = '\n'.join(
+                (f'part {mountpoint} --size={size}' for mountpoint, size in self.partitions),
+            )
+        else:
+            partitions_block = 'part / --size=1 --grow'
         appends_block = '\n'.join(self.appends)
         packages_block = '\n'.join(self.packages)
         packages_block = f'%packages\n{packages_block}\n%end'
@@ -337,7 +342,8 @@ class Guest:
     Set a 'tag' (string) to a unique name you would like to share across tests
     that use snapshots - the .can_be_snapshotted() function will return True
     when it finds an already installed guest using the same tag.
-    Tag-less guests cannot be shared across tests.
+    Tag-less guests can be used only for snapshotting within the same test
+    and should not be shared across tests.
     """
 
     # custom post-install setup to allow smooth login and qemu-qa command execution
@@ -353,7 +359,7 @@ class Guest:
     ]
 
     def __init__(self, tag=None, *, name=GUEST_NAME):
-        self.tag = tag
+        self.tag = tag or str(uuid.uuid4())
         self.name = name
         self.ipaddr = None
         self.ssh_keyfile_path = f'{GUEST_IMG_DIR}/{name}.sshkey'
@@ -469,8 +475,7 @@ def install(self, location=None, kickstart=None, rpmpack=None, disk_format='raw'
         # installed system doesn't need as much RAM, alleviate swap pressure
         set_domain_memory(self.name, 2000)
 
-        if self.tag is not None:
-            self.install_ready_path.write_text(self.tag)
+        self.install_ready_path.write_text(self.tag)
 
         self.orig_disk_path = disk_path
         self.orig_disk_format = disk_format
@@ -553,8 +558,7 @@ def prepare_for_snapshot(self):
         # modify its built-in XML to point to a snapshot-style disk path
         set_state_image_disk(self.state_file_path, self.snapshot_path, 'qcow2')
 
-        if self.tag is not None:
-            self.snapshot_ready_path.write_text(self.tag)
+        self.snapshot_ready_path.write_text(self.tag)
 
     def _restore_snapshotted(self):
         # reused guest from another test, install() or prepare_for_snapshot()
@@ -612,9 +616,13 @@ def snapshotted(self):
                 self._destroy_snapshotted()
 
     @contextlib.contextmanager
-    def booted(self):
+    def booted(self, *, safe_shutdown=False):
         """
         Just boot the guest, ready it for communication.
+
+        With 'safe_shutdown', guarantee that the guest shuts down cleanly.
+        This is useful for setup-style use cases where the test wants to modify
+        the guest before taking a snapshot.
         """
         self.start()
         self.ipaddr = wait_for_ifaddr(self.name)
@@ -623,20 +631,21 @@ def booted(self):
         try:
             yield self
         finally:
-            if os.environ.get('CONTEST_LEAVE_GUEST_RUNNING') == '1':
-                self._log_leave_running_notice()
+            if safe_shutdown:
+                util.log(f"shutting down {self.name} (safely)")
+                self.shutdown()
             else:
-                try:
-                    util.log(f"shutting down {self.name}")
-                    self.shutdown()
-                except TimeoutError:
-                    util.log(f"shutdown timed out, destroying {self.name}")
-                    self.destroy()
-
-    def _do_ssh(self, *cmd, func=util.subprocess_run, capture=False, **run_args):
-        if capture:
-            run_args['stdout'] = PIPE
-            run_args['stderr'] = PIPE
+                if os.environ.get('CONTEST_LEAVE_GUEST_RUNNING') == '1':
+                    self._log_leave_running_notice()
+                else:
+                    try:
+                        util.log(f"shutting down {self.name}")
+                        self.shutdown()
+                    except TimeoutError:
+                        util.log(f"shutdown timed out, destroying {self.name}")
+                        self.destroy()
+
+    def _do_ssh(self, *cmd, func=util.subprocess_run, **run_args):
         ssh_cmdline = [
             'ssh', '-q', '-i', self.ssh_keyfile_path, '-o', 'BatchMode=yes',
             '-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null',

diff --git a/scanning/oscap-debug/helgrind.fmf b/scanning/oscap-debug/helgrind.fmf
@@ -0,0 +1,5 @@
+summary: Runs oscap via valgrind - helgrind
+test: python3 -m lib.runtest ./helgrind.py
+duration: 4h
+require+:
+  - valgrind
diff --git a/scanning/oscap-debug/helgrind.py b/scanning/oscap-debug/helgrind.py
@@ -0,0 +1,26 @@
+#!/usr/bin/python3
+
+from lib import util, results
+
+
+profile = 'cis_workstation_l1'
+
+extra_debuginfos = [
+    'glibc',
+    'openscap-scanner',
+    'xmlsec1',
+    'xmlsec1-openssl',
+    'libtool-ltdl',
+    'openssl-libs',
+]
+
+util.subprocess_run(['dnf', '-y', 'debuginfo-install', *extra_debuginfos], check=True)
+
+oscap_cmd = [
+    'valgrind', '--tool=helgrind', '--',
+    'oscap', 'xccdf', 'eval', '--profile', profile, '--progress',
+    util.get_datastream(),
+]
+util.subprocess_run(oscap_cmd)
+
+results.report_and_exit()
diff --git a/scanning/oscap-debug/main.fmf b/scanning/oscap-debug/main.fmf
@@ -0,0 +1,7 @@
+result: custom
+environment+:
+    PYTHONPATH: ../..
+# these are tools to be manually modified and executed,
+# not to be run in any kind of automation
+tag+:
+- needs-param
diff --git a/scanning/oscap-debug/sysctl-only.fmf b/scanning/oscap-debug/sysctl-only.fmf
@@ -0,0 +1,9 @@
+summary: Runs oscap many times to hopefully reproduce a freeze
+test: python3 -m lib.runtest ./sysctl-only.py
+duration: 4h
+require+:
+  - gdb
+adjust:
+  - when: distro < rhel-9.5
+    enabled: false
+    because: we need a fairly modern gdb
diff --git a/scanning/oscap-debug/sysctl-only.py b/scanning/oscap-debug/sysctl-only.py
@@ -0,0 +1,105 @@
+#!/usr/bin/python3
+
+import time
+import signal
+import subprocess
+
+from lib import util, results, oscap
+
+
+start_time = time.monotonic()
+
+profile = 'anssi_bp28_high'
+
+# sysctl rules only take about 1-2 seconds
+oscap_timeout = 10
+
+# unselect all rules in the specified profile, except for
+# sysctl_* rules
+ds = oscap.global_ds()
+rules = ds.profiles[profile].rules
+rules = {rule for rule in rules if not rule.startswith('sysctl_')}
+oscap.unselect_rules(util.get_datastream(), 'scan-ds.xml', rules)
+
+extra_debuginfos = [
+    'glibc',
+    'openscap-scanner',
+    'xmlsec1',
+    'xmlsec1-openssl',
+    'libtool-ltdl',
+    'openssl-libs',
+]
+
+util.subprocess_run(['dnf', '-y', 'debuginfo-install', *extra_debuginfos], check=True)
+
+with open('gdb.script', 'w') as f:
+    f.write(util.dedent('''
+        generate-core-file oscap.core
+        set logging file oscap-bt.txt
+        set logging overwrite on
+        set logging redirect on
+        set logging enabled on
+        thread apply all bt
+        set logging enabled off
+    '''))
+
+oscap_cmd = [
+    'oscap', 'xccdf', 'eval', '--profile', profile, '--progress', 'scan-ds.xml',
+]
+
+# run for all of the configured test duration, minus 600 seconds for safety
+# (running gdb, compressing corefile which takes forever, etc.)
+attempt = 1
+metadata = util.TestMetadata()
+duration = metadata.duration_seconds() - oscap_timeout - 600
+util.log(f"trying to freeze oscap for {duration} total seconds")
+
+while time.monotonic() - start_time < duration:
+    oscap_proc = util.subprocess_Popen(oscap_cmd)
+
+    try:
+        returncode = oscap_proc.wait(oscap_timeout)
+        if returncode not in [0,2]:
+            results.report(
+                'fail', f'attempt:{attempt}', f"oscap failed with {returncode}",
+            )
+            continue
+
+    except subprocess.TimeoutExpired:
+        # figure out oscap PID on the remote system
+        pgrep = util.subprocess_run(
+            ['pgrep', '-n', 'oscap'],
+            stdout=subprocess.PIPE, universal_newlines=True,
+        )
+        if pgrep.returncode != 0:
+            results.report(
+                'warn',
+                f'attempt:{attempt}',
+                f"pgrep returned {pgrep.returncode}, oscap probably just finished "
+                "and we hit a rare race, moving on",
+            )
+            continue
+
+        oscap_pid = pgrep.stdout.strip()
+
+        # attach gdb to that PID
+        util.subprocess_run(
+            ['gdb', '-n', '-batch', '-x', 'gdb.script', '-p', oscap_pid],
+            check=True,
+        )
+
+        util.subprocess_run(['xz', '-e', '-9', 'oscap.core'], check=True)
+        results.report(
+            'fail', f'attempt:{attempt}', "oscap froze, gdb output available",
+            logs=['oscap.core.xz', 'oscap-bt.txt'],
+        )
+        break
+
+    finally:
+        oscap_proc.send_signal(signal.SIGKILL)
+        oscap_proc.wait()
+
+    results.report('pass', f'attempt:{attempt}')
+    attempt += 1
+
+results.report_and_exit()