diff --git a/README.md b/README.md index 5972fe2..c8f47b9 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,11 @@ username associated with the process. - Source IP `saddr` - Destination IP `daddr` - Destination port `port` +- Full process tree attestation for IPv4 TCP/UDP listeners with the + same process metadata as above and + - Local bind address `laddr` + - Listening port `port` + - Network protocol `protocol` (e.g. tcp) - Optional plugin system for enriching events in userland - Included `sourceipmap` plugin for mapping source address - Included `loginuidmap` plugin for adding loginuid info to process tree @@ -44,7 +49,7 @@ username associated with the process. ## Caveats * bcc compiles your eBPF "program" to bytecode at runtime, and as such needs the appropriate kernel headers installed on the host. -* The current implementation only supports TCP and ipv4. +* The current probe implementations only support IPv4. * The userland daemon is likely susceptible to interference or denial of service, however the main aim of the project is to reduce the MTTR for "business as usual" events - that is to make so engineers spend less time @@ -70,7 +75,7 @@ pidtree-bcc to work. Pidtree-bcc implements a module probe system which allows multiple eBPF programs to be compiled and run in parallel. Probe loading is handled by the top-level keys in the configuration (see [`example_config.yml`](example_config.yml)). -Currently, only the `tcp_connect` probe is implemented. +Currently, this repository implements the `tcp_connect` and `net_listen` probes. ## Usage > CAUTION! The Makefile calls 'docker run' with `--priveleged`, @@ -119,7 +124,8 @@ making TCP ipv4 `connect` syscalls like this one of me connecting to Freenode in "daddr": "185.30.166.37", "saddr": "X.X.X.X", "error": "", - "port": 6697 + "port": 6697, + "probe": "tcp_connect" } ``` diff --git a/example_config.yml b/example_config.yml index f0a6485..1ecb240 100644 --- a/example_config.yml +++ b/example_config.yml @@ -3,19 +3,19 @@ tcp_connect: filters: - subnet_name: 10 network: 10.0.0.0 - network_mask : 255.0.0.0 + network_mask: 255.0.0.0 description: "all RFC 1918 10/8" - subnet_name: 17216 network: 172.16.0.0 - network_mask : 255.240.0.0 + network_mask: 255.240.0.0 description: "all RFC 1918 172.16/12" - subnet_name: 169254 network: 169.254.0.0 - network_mask : 255.255.0.0 + network_mask: 255.255.0.0 description: "all 169.254/16 loopback" - subnet_name: 127 network: 127.0.0.0 - network_mask : 255.0.0.0 + network_mask: 255.0.0.0 description: "all 127/8 loopback" plugins: sourceipmap: @@ -23,3 +23,10 @@ tcp_connect: hostfiles: - '/etc/hosts' attribute_key: "source_host" +net_listen: + protocols: [tcp] + excludeports: + - 22222 + - 30000-40000 + excludeaddress: + - 127.0.0.1 diff --git a/itest/example_config.yml b/itest/example_config.yml index 45eb84a..fda572d 100644 --- a/itest/example_config.yml +++ b/itest/example_config.yml @@ -37,3 +37,6 @@ tcp_connect: network: 127.0.0.0 network_mask: 255.255.0.0 description: "127.0/16 to get rid of the noise" +net_listen: + excludeports: + - 31337 diff --git a/itest/itest.sh b/itest/itest.sh index 2f02014..b6b0b28 100755 --- a/itest/itest.sh +++ b/itest/itest.sh @@ -1,39 +1,45 @@ #!/bin/bash -eE -export FIFO_NAME=itest/itest_output_$$ -export TEST_SERVER_FIFO_NAME=itest/itest_server_$$ -export TEST_PORT=${TEST_PORT:-31337} +export OUTPUT_NAME=itest/itest_output_$$ +export TEST_CONNECT_PORT=${TEST_CONNECT_PORT:-31337} +export TEST_LISTEN_PORT=${TEST_LISTEN_PORT:-41337} +export TEST_LISTEN_TIMEOUT=${TEST_LISTEN_TIMEOUT:-5} export DEBUG=${DEBUG:-false} export CONTAINER_NAME=pidtree-itest_$1_$$ export TOPLEVEL=$(git rev-parse --show-toplevel) # The container takes a while to bootstrap so we have to wait before we emit the test event -SPIN_UP_TIME=10 +SPIN_UP_TIME=5 # We also need to timout the test if the test event *isn't* caught TIMEOUT=$(( SPIN_UP_TIME + 5 )) +# Format: test_name:test_event_generator:test_flag_to_match +TEST_CASES=( + "tcp_connect:create_connect_event:nc -w 1 127.1.33.7 $TEST_CONNECT_PORT" + "net_listen:create_listen_event:nc -w $TEST_LISTEN_TIMEOUT -lnp $TEST_LISTEN_PORT" +) function is_port_used { USED_PORTS=$(ss -4lnt | awk 'FS="[[:space:]]+" { print $4 }' | cut -d: -f2 | sort) - if [ "$(echo "$USED_PORTS" | grep -E "^${TEST_PORT}\$")" = "$TEST_PORT" ]; then - echo "ERROR: TEST_PORT=$TEST_PORT already in use, please reassign and try again" + if [ "$(echo "$USED_PORTS" | grep -E "^${1}\$")" = "$1" ]; then + echo "ERROR: port $1 already in use, please reassign and try again" exit 2 fi } -function create_event { +function create_connect_event { echo "Creating test listener" - mkfifo $TEST_SERVER_FIFO_NAME - cat $TEST_SERVER_FIFO_NAME | nc -l -p $TEST_PORT & - echo "Sleeping $SPIN_UP_TIME for pidtree-bcc to start" - sleep $SPIN_UP_TIME + nc -w $TEST_LISTEN_TIMEOUT -l -p $TEST_CONNECT_PORT & + listener_pid=$! + sleep 1 echo "Making test connection" - nc 127.1.33.7 $TEST_PORT & > /dev/null - CLIENT_PID=$! - echo "lolz" > $TEST_SERVER_FIFO_NAME - sleep 3 - echo "Killing test connection" - kill $CLIENT_PID - pkill cat + nc -w 1 127.1.33.7 $TEST_CONNECT_PORT + wait $listener_pid +} + +function create_listen_event { + echo "Creating test listener" + sleep 1 + nc -w $TEST_LISTEN_TIMEOUT -lnp $TEST_LISTEN_PORT } function cleanup { @@ -42,23 +48,19 @@ function cleanup { echo "CLEANUP: Killing container" docker kill $CONTAINER_NAME echo "CLEANUP: Removing FIFO" - rm -f $FIFO_NAME $TEST_SERVER_FIFO_NAME + rm -f $OUTPUT_NAME } function wait_for_tame_output { - RESULTS=0 - echo "Tailing output FIFO $FIFO_NAME to catch test traffic" - while read line; do - RESULTS="$(echo "$line" | jq -r ". | select( .daddr == \"127.1.33.7\" ) | select( .port == $TEST_PORT) | .proctree[0].cmdline" 2>&1)" - if [ "$RESULTS" = "nc 127.1.33.7 $TEST_PORT" ]; then - echo "Caught test traffic on 127.1.33.7:$TEST_PORT!" - return 0 + echo "Tailing output $OUTPUT_NAME to catch test traffic '$1'" + tail -n0 -f $OUTPUT_NAME | while read line; do + if echo "$line" | grep "$1"; then + echo "Caught test traffic matching '$1'" + exit 0 elif [ "$DEBUG" = "true" ]; then - echo "DEBUG: \$RESULTS is $RESULTS" echo "DEBUG: \$line is $line" fi - done < "$FIFO_NAME" - return 1 + done } function main { @@ -67,9 +69,10 @@ function main { exit 1 fi trap cleanup EXIT - is_port_used + is_port_used $TEST_CONNECT_PORT + is_port_used $TEST_LISTEN_PORT if [ "$DEBUG" = "true" ]; then set -x; fi - mkfifo $FIFO_NAME + touch $OUTPUT_NAME if [[ "$1" = "docker" ]]; then echo "Building itest image" # Build the base image @@ -86,7 +89,7 @@ function main { docker run --name $CONTAINER_NAME -d\ --rm --privileged --cap-add sys_admin --pid host \ -v $TOPLEVEL/itest/example_config.yml:/work/config.yml \ - -v $TOPLEVEL/$FIFO_NAME:/work/outfile \ + -v $TOPLEVEL/$OUTPUT_NAME:/work/outfile \ pidtree-itest -c /work/config.yml -f /work/outfile elif [[ "$1" = "ubuntu_xenial" || "$1" = "ubuntu_bionic" ]]; then if [ -f /etc/lsb-release ]; then @@ -102,27 +105,35 @@ function main { docker run --name $CONTAINER_NAME -d \ --rm --privileged --cap-add sys_admin --pid host \ -v $TOPLEVEL/itest/example_config.yml:/work/config.yml \ - -v $TOPLEVEL/$FIFO_NAME:/work/outfile \ + -v $TOPLEVEL/$OUTPUT_NAME:/work/outfile \ -v $TOPLEVEL/itest/dist/$1/:/work/dist \ -v $TOPLEVEL/itest/deb_package_itest.sh:/work/deb_package_itest.sh \ pidtree-itest-$1 /work/deb_package_itest.sh run -c /work/config.yml -f /work/outfile fi + echo "Sleeping $SPIN_UP_TIME seconds for pidtree-bcc to start" + sleep $SPIN_UP_TIME export -f wait_for_tame_output export -f cleanup - timeout $TIMEOUT bash -c wait_for_tame_output & - WAIT_FOR_OUTPUT_PID=$! - create_event & - WAIT_FOR_MOCK_EVENT=$! - set +e - wait $WAIT_FOR_OUTPUT_PID - if [ $? -ne 0 ]; then - echo "FAILED! (timeout)" - EXIT_CODE=1 - else - echo "SUCCESS!" - EXIT_CODE=0 - fi - wait $WAIT_FOR_MOCK_EVENT + EXIT_CODE=0 + for test_case in "${TEST_CASES[@]}"; do + test_name=$(echo "$test_case" | cut -d: -f1) + test_event=$(echo "$test_case" | cut -d: -f2) + test_check=$(echo "$test_case" | cut -d: -f3) + timeout $TIMEOUT bash -c "wait_for_tame_output '$test_check'" & + WAIT_FOR_OUTPUT_PID=$! + $test_event & + WAIT_FOR_MOCK_EVENT=$! + set +e + wait $WAIT_FOR_OUTPUT_PID + if [ $? -ne 0 ]; then + echo "$test_name: FAILED! (timeout)" + EXIT_CODE=1 + else + echo "$test_name: SUCCESS!" + EXIT_CODE=0 + fi + wait $WAIT_FOR_MOCK_EVENT + done exit $EXIT_CODE } diff --git a/pidtree_bcc/probes/__init__.py b/pidtree_bcc/probes/__init__.py index e2c1c9c..a6ba806 100644 --- a/pidtree_bcc/probes/__init__.py +++ b/pidtree_bcc/probes/__init__.py @@ -1,6 +1,8 @@ import inspect import json +import os.path import re +from datetime import datetime from multiprocessing import SimpleQueue from typing import Any @@ -27,11 +29,14 @@ def __init__(self, output_queue: SimpleQueue, probe_config: dict = {}): all fields are passed to the template engine with the exception of "plugins". This behaviour can be overidden with the TEMPLATE_VARS class variable defining a list of config fields. + It is possible for child class to define a CONFIG_DEFAULTS class + variable containing default templating variables. """ self.output_queue = output_queue self.plugins = load_plugins(probe_config.get('plugins', {})) + module_src = inspect.getsourcefile(type(self)) + self.probe_name = os.path.basename(module_src).split('.')[0] if not hasattr(self, 'BPF_TEXT'): - module_src = inspect.getsourcefile(type(self)) with open(re.sub(r'\.py$', '.j2', module_src)) as f: self.BPF_TEXT = f.read() if hasattr(self, 'TEMPLATE_VARS'): @@ -39,6 +44,8 @@ class variable defining a list of config fields. else: template_config = probe_config.copy() template_config.pop('plugins', None) + if hasattr(self, 'CONFIG_DEFAULTS'): + template_config = {**self.CONFIG_DEFAULTS, **template_config} self.expanded_bpf_text = Template(self.BPF_TEXT).render(**template_config) def _process_events(self, cpu: Any, data: Any, size: Any): @@ -50,6 +57,8 @@ def _process_events(self, cpu: Any, data: Any, size: Any): """ event = self.bpf['events'].event(data) event = self.enrich_event(event) + event['timestamp'] = datetime.utcnow().isoformat() + 'Z' + event['probe'] = self.probe_name for event_plugin in self.plugins: event = event_plugin.process(event) self.output_queue.put(json.dumps(event)) diff --git a/pidtree_bcc/probes/net_listen.j2 b/pidtree_bcc/probes/net_listen.j2 new file mode 100644 index 0000000..5f08579 --- /dev/null +++ b/pidtree_bcc/probes/net_listen.j2 @@ -0,0 +1,113 @@ +#include +#include + +BPF_HASH(currsock, u32, struct sock*); +BPF_PERF_OUTPUT(events); + +struct listen_bind_t { + u32 pid; + u32 laddr; + u16 port; + u8 protocol; +}; + +static u8 get_socket_protocol(struct sock *sk) +{ + // I'd love to be the one to have figured this out, I'm not + // https://github.com/iovisor/bcc/blob/v0.16.0/tools/tcpaccept.py#L115 + u8 protocol; + int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs); + int sk_lingertime_offset = offsetof(struct sock, sk_lingertime); + if (sk_lingertime_offset - gso_max_segs_offset == 4) { + protocol = *(u8 *)((u64)&sk->sk_gso_max_segs - 3); + } else { + protocol = *(u8 *)((u64)&sk->sk_wmem_queued - 3); + } + return protocol; +} + +static void net_listen_event(struct pt_regs *ctx) +{ + u32 pid = bpf_get_current_pid_tgid(); + struct sock** skp = currsock.lookup(&pid); + if (skp == 0) return; + int ret = PT_REGS_RC(ctx); + if (ret != 0) { + currsock.delete(&pid); + return; + } + u32 laddr = 0; + u16 port = 0; + struct sock* sk = *skp; + bpf_probe_read(&laddr, sizeof(u32), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read(&port, sizeof(u16), &sk->__sk_common.skc_num); + + {% if excludeaddress or excludeports -%} + if (0 + {% for addr in excludeaddress -%} + || laddr == {{ ip_to_int(addr) }} + {% endfor -%} + {% for port in excludeports -%} + {%- set port = port | string -%} + {% if '-' in port -%} + {%- set from_port, to_port = port.split('-') -%} + || (port >= {{ from_port }} && port <= {{ to_port }}) + {% else -%} + || port == {{ port }} + {% endif -%} + {%- endfor -%} + ) { + currsock.delete(&pid); + return; + } + {% endif -%} + + struct listen_bind_t listen = {}; + listen.pid = pid; + listen.port = port; + listen.laddr = laddr; + listen.protocol = get_socket_protocol(sk); + events.perf_submit(ctx, &listen, sizeof(listen)); + currsock.delete(&pid); +} + +{% if 'udp' in protocols -%} +int kprobe__inet_bind( + struct pt_regs *ctx, + struct socket *sock, + const struct sockaddr *addr, + int addrlen) +{ + struct sock* sk = sock->sk; + u8 protocol = get_socket_protocol(sk); + if (sk->__sk_common.skc_family == AF_INET && protocol == IPPROTO_UDP) { + u32 pid = bpf_get_current_pid_tgid(); + currsock.update(&pid, &sk); + } + return 0; +} + +int kretprobe__inet_bind(struct pt_regs *ctx) +{ + net_listen_event(ctx); + return 0; +} +{% endif -%} + +{% if 'tcp' in protocols -%} +int kprobe__inet_listen(struct pt_regs *ctx, struct socket *sock, int backlog) +{ + struct sock* sk = sock->sk; + if (sk->__sk_common.skc_family == AF_INET) { + u32 pid = bpf_get_current_pid_tgid(); + currsock.update(&pid, &sk); + } + return 0; +} + +int kretprobe__inet_listen(struct pt_regs *ctx) +{ + net_listen_event(ctx); + return 0; +} +{% endif -%} diff --git a/pidtree_bcc/probes/net_listen.py b/pidtree_bcc/probes/net_listen.py new file mode 100644 index 0000000..122fdee --- /dev/null +++ b/pidtree_bcc/probes/net_listen.py @@ -0,0 +1,45 @@ +import inspect +import socket +import traceback +from typing import Any + +from pidtree_bcc.probes import BPFProbe +from pidtree_bcc.utils import crawl_process_tree +from pidtree_bcc.utils import int_to_ip +from pidtree_bcc.utils import ip_to_int + + +class NetListenProbe(BPFProbe): + + PROTO_MAP = { + value: name.split('_')[1].lower() + for name, value in inspect.getmembers(socket) + if name.startswith('IPPROTO_') + } + CONFIG_DEFAULTS = { + 'ip_to_int': ip_to_int, + 'protocols': ['tcp'], + 'excludeaddress': [], + 'excludeports': [], + } + + def enrich_event(self, event: Any) -> dict: + """ Parses network "listen event" and adds process tree data + + :param Any event: BPF event + :return: event dictionary with process tree + """ + error = '' + try: + proctree = list(crawl_process_tree(event.pid)) + except Exception: + error = traceback.format_exc() + proctree = [] + return { + 'pid': event.pid, + 'port': event.port, + 'proctree': proctree, + 'laddr': int_to_ip(event.laddr), + 'protocol': self.PROTO_MAP.get(event.protocol, 'unknown'), + 'error': error, + } diff --git a/pidtree_bcc/probes/tcp_connect.py b/pidtree_bcc/probes/tcp_connect.py index 039d8ac..c3c0f40 100644 --- a/pidtree_bcc/probes/tcp_connect.py +++ b/pidtree_bcc/probes/tcp_connect.py @@ -1,24 +1,19 @@ -import socket -import struct import traceback -from datetime import datetime -from multiprocessing import SimpleQueue from typing import Any -import psutil - from pidtree_bcc.probes import BPFProbe from pidtree_bcc.utils import crawl_process_tree +from pidtree_bcc.utils import int_to_ip from pidtree_bcc.utils import ip_to_int class TCPConnectProbe(BPFProbe): - def __init__(self, output_queue: SimpleQueue, probe_config: dict = {}): - probe_config['ip_to_int'] = ip_to_int - probe_config.setdefault('filters', []) - probe_config.setdefault('includeports', []) - super().__init__(output_queue, probe_config) + CONFIG_DEFAULTS = { + 'ip_to_int': ip_to_int, + 'filters': [], + 'includeports': [], + } def enrich_event(self, event: Any) -> dict: """ Parses TCP connect event and adds process tree data @@ -26,30 +21,21 @@ def enrich_event(self, event: Any) -> dict: :param Any event: BPF event :return: event dictionary with process tree """ - proctree_enriched = [] error = '' try: - proc = psutil.Process(event.pid) - proctree = crawl_process_tree(proc) - proctree_enriched = [ - { - 'pid': p.pid, - 'cmdline': ' '.join(p.cmdline()), - 'username': p.username(), - } for p in proctree - ] + proctree = list(crawl_process_tree(event.pid)) except Exception: error = traceback.format_exc() + proctree = [] return { - 'timestamp': datetime.utcnow().isoformat() + 'Z', 'pid': event.pid, - 'proctree': proctree_enriched, + 'proctree': proctree, # We're turning a little-endian insigned long (' List[psutil.Process]: +def crawl_process_tree(pid: int) -> Generator[dict, None, None]: """ Takes a process and returns all process ancestry until the ppid is 0 - :param psutil.Process proc: child process - :return: process tree as a list + :param int pid: child process ID + :return: yields dicts with pid, cmdline and username navigating up the tree """ - procs = [proc] while True: - ppid = procs[len(procs)-1].ppid() - if ppid == 0: + if pid == 0: break - procs.append(psutil.Process(ppid)) - return procs + proc = psutil.Process(pid) + yield { + 'pid': proc.pid, + 'cmdline': ' '.join(proc.cmdline()), + 'username': proc.username(), + } + pid = proc.ppid() def smart_open(filename: str = None, mode: str = 'r') -> TextIO: @@ -63,3 +66,12 @@ def ip_to_int(network: str) -> int: :return: unsigned integer encoding """ return struct.unpack('=L', socket.inet_aton(network))[0] + + +def int_to_ip(encoded_ip: int) -> str: + """ Takes IP in interger representation and makes it human readable + + :param int encoded_ip: integer encoded IP + :return: dot-notation IP + """ + return socket.inet_ntoa(struct.pack('= 1 - assert tree[0].pid == this_pid - assert tree[-1].pid == 1 # should be init + assert tree[0]['pid'] == this_pid + assert tree[-1]['pid'] == 1 # should be init -def test_smart_open(this_file): +def test_smart_open(): + this_file = os.path.abspath(__file__) assert utils.smart_open() == sys.stdout assert utils.smart_open('-') == sys.stdout assert utils.smart_open(this_file).name == this_file @@ -39,3 +22,8 @@ def test_smart_open(this_file): def test_ip_to_int(): assert utils.ip_to_int('127.0.0.1') == 16777343 assert utils.ip_to_int('10.10.10.10') == 168430090 + + +def test_int_to_ip(): + assert utils.int_to_ip(16777343) == '127.0.0.1' + assert utils.int_to_ip(168430090) == '10.10.10.10'