Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add oscap debug #264

Merged
merged 9 commits into from
Oct 7, 2024
25 changes: 2 additions & 23 deletions lib/runtest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import os
import sys
import re
import runpy
import signal
import traceback
import tempfile
import urllib3
import yaml
from pathlib import Path

from lib import util, results
Expand All @@ -24,27 +22,8 @@
# this is not a problem for Beaker, which captures test output separately and
# actually cares about the exit code of the test script
def _setup_timeout_handling():
metadata_yaml = os.environ['TMT_TEST_METADATA'] # exception if undefined
with open(metadata_yaml) as f:
test_metadata = yaml.safe_load(f)

if 'duration' in test_metadata:
duration_str = test_metadata['duration']
match = re.fullmatch(r'([0-9]+)([a-z]+)', duration_str)
if not match:
results.report_and_exit('error', note=f"duration '{duration_str}' has invalid format")
length, unit = match.groups()
if unit == 'm':
duration = int(length)*60
elif unit == 'h':
duration = int(length)*60*60
elif unit == 'd':
duration = int(length)*60*60*24
else:
duration = int(length)
else:
# use TMT's default of 5m
duration = 300
metadata = util.TestMetadata()
duration = metadata.duration_seconds()

# leave 10 seconds for our alarm timeout code
duration -= 10
Expand Down
23 changes: 12 additions & 11 deletions lib/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
# so we could add the libdir to PATH and PYTHONPATH
libdir = Path(inspect.getfile(inspect.currentframe())).parent.parent

from .content import * # noqa
from .backup import * # noqa
from .dedent import * # noqa
from .environment import * # noqa
from .httpsrv import * # noqa
from .log import * # noqa
from .old_content import * # noqa
from .rpmpack import * # noqa
from .sanitization import * # noqa
from .ssh import * # noqa
from .subprocess import * # noqa
from .content import * # noqa
from .backup import * # noqa
from .dedent import * # noqa
from .environment import * # noqa
from .httpsrv import * # noqa
from .log import * # noqa
from .old_content import * # noqa
from .rpmpack import * # noqa
from .sanitization import * # noqa
from .ssh import * # noqa
from .subprocess import * # noqa
from .test_metadata import * # noqa
36 changes: 36 additions & 0 deletions lib/util/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import re
import yaml
import copy as copy_mod


class TestMetadata(dict):
def __init__(self):
metadata_yaml = os.environ['TMT_TEST_METADATA'] # exception if undefined
with open(metadata_yaml) as f:
test_metadata = yaml.safe_load(f)
self.update(test_metadata)

# return 'TestMetadata' for .copy(), not 'dict'
def copy(self):
return copy_mod.copy(self)

def duration_seconds(self):
if 'duration' in self:
duration_str = self['duration']
match = re.fullmatch(r'([0-9]+)([a-z]+)', duration_str)
if not match:
raise RuntimeError(f"'duration' has invalid format: {duration_str}")
length, unit = match.groups()
if unit == 'm':
duration = int(length)*60
elif unit == 'h':
duration = int(length)*60*60
elif unit == 'd':
duration = int(length)*60*60*24
else:
duration = int(length)
else:
# use TMT's default of 5m
duration = 300
return duration
59 changes: 34 additions & 25 deletions lib/virt.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

Example using snapshots:

import subprocess
import virt

virt.Host.setup()
Expand All @@ -48,7 +49,7 @@
g.prepare_for_snapshot()

with g.snapshotted():
state = g.ssh('ls', '/root', capture=True)
state = g.ssh('ls', '/root', stdout=subprocess.PIPE)
print(state.stdout)
if state.returncode != 0:
report_failure()
Expand Down Expand Up @@ -82,6 +83,7 @@
import contextlib
import tempfile
import json
import uuid
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
from pathlib import Path
Expand Down Expand Up @@ -253,12 +255,15 @@ def __init__(self, template=TEMPLATE, packages=PACKAGES, partitions=None):
self.ks = template
self.appends = []
self.packages = packages
self.partitions = partitions if partitions else []
self.partitions = partitions

def assemble(self):
partitions_block = '\n'.join(
(f'part {mountpoint} --size={size}' for mountpoint, size in self.partitions),
)
if self.partitions:
partitions_block = '\n'.join(
(f'part {mountpoint} --size={size}' for mountpoint, size in self.partitions),
)
else:
partitions_block = 'part / --size=1 --grow'
appends_block = '\n'.join(self.appends)
packages_block = '\n'.join(self.packages)
packages_block = f'%packages\n{packages_block}\n%end'
Expand Down Expand Up @@ -337,7 +342,8 @@ class Guest:
Set a 'tag' (string) to a unique name you would like to share across tests
that use snapshots - the .can_be_snapshotted() function will return True
when it finds an already installed guest using the same tag.
Tag-less guests cannot be shared across tests.
Tag-less guests can be used only for snapshotting within the same test
and should not be shared across tests.
"""

# custom post-install setup to allow smooth login and qemu-qa command execution
Expand All @@ -353,7 +359,7 @@ class Guest:
]

def __init__(self, tag=None, *, name=GUEST_NAME):
self.tag = tag
self.tag = tag or str(uuid.uuid4())
self.name = name
self.ipaddr = None
self.ssh_keyfile_path = f'{GUEST_IMG_DIR}/{name}.sshkey'
Expand Down Expand Up @@ -469,8 +475,7 @@ def install(self, location=None, kickstart=None, rpmpack=None, disk_format='raw'
# installed system doesn't need as much RAM, alleviate swap pressure
set_domain_memory(self.name, 2000)

if self.tag is not None:
self.install_ready_path.write_text(self.tag)
self.install_ready_path.write_text(self.tag)

self.orig_disk_path = disk_path
self.orig_disk_format = disk_format
Expand Down Expand Up @@ -553,8 +558,7 @@ def prepare_for_snapshot(self):
# modify its built-in XML to point to a snapshot-style disk path
set_state_image_disk(self.state_file_path, self.snapshot_path, 'qcow2')

if self.tag is not None:
self.snapshot_ready_path.write_text(self.tag)
self.snapshot_ready_path.write_text(self.tag)

def _restore_snapshotted(self):
# reused guest from another test, install() or prepare_for_snapshot()
Expand Down Expand Up @@ -612,9 +616,13 @@ def snapshotted(self):
self._destroy_snapshotted()

@contextlib.contextmanager
def booted(self):
def booted(self, *, safe_shutdown=False):
"""
Just boot the guest, ready it for communication.

With 'safe_shutdown', guarantee that the guest shuts down cleanly.
This is useful for setup-style use cases where the test wants to modify
the guest before taking a snapshot.
"""
self.start()
self.ipaddr = wait_for_ifaddr(self.name)
Expand All @@ -623,20 +631,21 @@ def booted(self):
try:
yield self
finally:
if os.environ.get('CONTEST_LEAVE_GUEST_RUNNING') == '1':
self._log_leave_running_notice()
if safe_shutdown:
util.log(f"shutting down {self.name} (safely)")
self.shutdown()
else:
try:
util.log(f"shutting down {self.name}")
self.shutdown()
except TimeoutError:
util.log(f"shutdown timed out, destroying {self.name}")
self.destroy()

def _do_ssh(self, *cmd, func=util.subprocess_run, capture=False, **run_args):
if capture:
run_args['stdout'] = PIPE
run_args['stderr'] = PIPE
if os.environ.get('CONTEST_LEAVE_GUEST_RUNNING') == '1':
self._log_leave_running_notice()
else:
try:
util.log(f"shutting down {self.name}")
self.shutdown()
except TimeoutError:
util.log(f"shutdown timed out, destroying {self.name}")
self.destroy()

def _do_ssh(self, *cmd, func=util.subprocess_run, **run_args):
ssh_cmdline = [
'ssh', '-q', '-i', self.ssh_keyfile_path, '-o', 'BatchMode=yes',
'-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null',
Expand Down
5 changes: 5 additions & 0 deletions scanning/oscap-debug/helgrind.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
summary: Runs oscap via valgrind - helgrind
test: python3 -m lib.runtest ./helgrind.py
duration: 4h
require+:
- valgrind
26 changes: 26 additions & 0 deletions scanning/oscap-debug/helgrind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/python3

from lib import util, results


profile = 'cis_workstation_l1'

extra_debuginfos = [
'glibc',
'openscap-scanner',
'xmlsec1',
'xmlsec1-openssl',
'libtool-ltdl',
'openssl-libs',
]

util.subprocess_run(['dnf', '-y', 'debuginfo-install', *extra_debuginfos], check=True)

oscap_cmd = [
'valgrind', '--tool=helgrind', '--',
'oscap', 'xccdf', 'eval', '--profile', profile, '--progress',
util.get_datastream(),
]
util.subprocess_run(oscap_cmd)

results.report_and_exit()
7 changes: 7 additions & 0 deletions scanning/oscap-debug/main.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
result: custom
environment+:
PYTHONPATH: ../..
# these are tools to be manually modified and executed,
# not to be run in any kind of automation
tag+:
- needs-param
9 changes: 9 additions & 0 deletions scanning/oscap-debug/sysctl-only.fmf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
summary: Runs oscap many times to hopefully reproduce a freeze
test: python3 -m lib.runtest ./sysctl-only.py
duration: 4h
require+:
- gdb
adjust:
- when: distro < rhel-9.5
enabled: false
because: we need a fairly modern gdb
105 changes: 105 additions & 0 deletions scanning/oscap-debug/sysctl-only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/python3

import time
import signal
import subprocess

from lib import util, results, oscap


start_time = time.monotonic()

profile = 'anssi_bp28_high'

# sysctl rules only take about 1-2 seconds
oscap_timeout = 10

# unselect all rules in the specified profile, except for
# sysctl_* rules
ds = oscap.global_ds()
rules = ds.profiles[profile].rules
rules = {rule for rule in rules if not rule.startswith('sysctl_')}
oscap.unselect_rules(util.get_datastream(), 'scan-ds.xml', rules)

extra_debuginfos = [
'glibc',
'openscap-scanner',
'xmlsec1',
'xmlsec1-openssl',
'libtool-ltdl',
'openssl-libs',
]

util.subprocess_run(['dnf', '-y', 'debuginfo-install', *extra_debuginfos], check=True)

with open('gdb.script', 'w') as f:
f.write(util.dedent('''
generate-core-file oscap.core
set logging file oscap-bt.txt
set logging overwrite on
set logging redirect on
set logging enabled on
thread apply all bt
set logging enabled off
'''))

oscap_cmd = [
'oscap', 'xccdf', 'eval', '--profile', profile, '--progress', 'scan-ds.xml',
]

# run for all of the configured test duration, minus 600 seconds for safety
# (running gdb, compressing corefile which takes forever, etc.)
attempt = 1
metadata = util.TestMetadata()
duration = metadata.duration_seconds() - oscap_timeout - 600
util.log(f"trying to freeze oscap for {duration} total seconds")

while time.monotonic() - start_time < duration:
oscap_proc = util.subprocess_Popen(oscap_cmd)

try:
returncode = oscap_proc.wait(oscap_timeout)
if returncode not in [0,2]:
results.report(
'fail', f'attempt:{attempt}', f"oscap failed with {returncode}",
)
continue

except subprocess.TimeoutExpired:
# figure out oscap PID on the remote system
pgrep = util.subprocess_run(
['pgrep', '-n', 'oscap'],
stdout=subprocess.PIPE, universal_newlines=True,
)
if pgrep.returncode != 0:
results.report(
'warn',
f'attempt:{attempt}',
f"pgrep returned {pgrep.returncode}, oscap probably just finished "
"and we hit a rare race, moving on",
)
continue

oscap_pid = pgrep.stdout.strip()

# attach gdb to that PID
util.subprocess_run(
['gdb', '-n', '-batch', '-x', 'gdb.script', '-p', oscap_pid],
check=True,
)

util.subprocess_run(['xz', '-e', '-9', 'oscap.core'], check=True)
results.report(
'fail', f'attempt:{attempt}', "oscap froze, gdb output available",
logs=['oscap.core.xz', 'oscap-bt.txt'],
)
break

finally:
oscap_proc.send_signal(signal.SIGKILL)
oscap_proc.wait()

results.report('pass', f'attempt:{attempt}')
attempt += 1

results.report_and_exit()
Loading