From e1489ccb938fa1f8ed8726b58d74f3e88df6dcbd Mon Sep 17 00:00:00 2001 From: Ofek Shaked Date: Mon, 25 Nov 2024 15:30:04 +0200 Subject: [PATCH] Detect more VMA types Golang heaps can be determined by a pattern in the address, dictated by address hints supplied to mmap while allocating memory for them. Thread stacks can be identified by tracking the stack VMA for all newly created threads. --- pkg/ebpf/c/common/memory.h | 89 ++++++++++++++++--- pkg/ebpf/c/maps.h | 10 +++ pkg/ebpf/c/tracee.bpf.c | 72 +++++++++++++-- pkg/ebpf/c/types.h | 5 ++ pkg/ebpf/c/vmlinux.h | 23 +++++ pkg/ebpf/probes/probe_group.go | 1 + pkg/ebpf/probes/probes.go | 1 + pkg/events/core.go | 9 +- .../e2e-suspicious_syscall_source.go | 17 ++-- .../scripts/suspicious_syscall_source.sh | 5 +- .../scripts/sys_src_tester.c | 41 +++++++++ 11 files changed, 244 insertions(+), 29 deletions(-) diff --git a/pkg/ebpf/c/common/memory.h b/pkg/ebpf/c/common/memory.h index 6f8e0945c8f5..bb93d1df2676 100644 --- a/pkg/ebpf/c/common/memory.h +++ b/pkg/ebpf/c/common/memory.h @@ -7,10 +7,14 @@ enum vma_type { + VMA_FILE_BACKED, VMA_STACK, VMA_HEAP, + VMA_GOLANG_HEAP, + VMA_THREAD_STACK, + VMA_VDSO, VMA_ANON, - VMA_OTHER + VMA_UNKNOWN, }; // PROTOTYPES @@ -22,11 +26,14 @@ statfunc unsigned long get_env_start_from_mm(struct mm_struct *); statfunc unsigned long get_env_end_from_mm(struct mm_struct *); statfunc unsigned long get_vma_flags(struct vm_area_struct *); statfunc struct vm_area_struct *find_vma(void *ctx, struct task_struct *task, u64 addr); -statfunc bool vma_is_stack(struct vm_area_struct *vma); -statfunc bool vma_is_heap(struct vm_area_struct *vma); +statfunc bool vma_is_file_backed(struct vm_area_struct *vma); +statfunc bool vma_is_initial_stack(struct vm_area_struct *vma); +statfunc bool vma_is_initial_heap(struct vm_area_struct *vma); statfunc bool vma_is_anon(struct vm_area_struct *vma); +statfunc bool vma_is_golang_heap(struct vm_area_struct *vma); +statfunc bool vma_is_thread_stack(struct task_struct *task, struct vm_area_struct *vma); statfunc bool vma_is_vdso(struct vm_area_struct *vma); -statfunc enum vma_type get_vma_type(struct vm_area_struct *vma); +statfunc enum vma_type get_vma_type(struct task_struct *task, struct vm_area_struct *vma); // FUNCTIONS @@ -121,7 +128,12 @@ statfunc struct vm_area_struct *find_vma(void *ctx, struct task_struct *task, u6 return vma; } -statfunc bool vma_is_stack(struct vm_area_struct *vma) +statfunc bool vma_is_file_backed(struct vm_area_struct *vma) +{ + return BPF_CORE_READ(vma, vm_file) != NULL; +} + +statfunc bool vma_is_initial_stack(struct vm_area_struct *vma) { struct mm_struct *vm_mm = BPF_CORE_READ(vma, vm_mm); if (vm_mm == NULL) @@ -138,7 +150,7 @@ statfunc bool vma_is_stack(struct vm_area_struct *vma) return false; } -statfunc bool vma_is_heap(struct vm_area_struct *vma) +statfunc bool vma_is_initial_heap(struct vm_area_struct *vma) { struct mm_struct *vm_mm = BPF_CORE_READ(vma, vm_mm); if (vm_mm == NULL) @@ -158,7 +170,46 @@ statfunc bool vma_is_heap(struct vm_area_struct *vma) statfunc bool vma_is_anon(struct vm_area_struct *vma) { - return BPF_CORE_READ(vma, vm_file) == NULL; + return !vma_is_file_backed(vma); +} + +// The golang heap consists of arenas which are memory regions mapped using mmap. +// When allocating areans, golang supplies mmap with an address hint, which is an +// address that the kernel should place the mapping at. +// Hints are constant and vary between architectures, see `mallocinit()` in +// https://github.com/golang/go/blob/master/src/runtime/malloc.go +// From observation, when allocating arenas the MAP_FIXED flag is used which forces +// the kernel to use the specified address or fail the mapping, so it is safe to +// rely on the address pattern to determine if it belongs to a heap arena. +#define GOLANG_ARENA_HINT_MASK 0x80ff00000000UL +#if defined(bpf_target_x86) + #define GOLANG_ARENA_HINT (0xc0UL << 32) +#elif defined(bpf_target_arm64) + #define GOLANG_ARENA_HINT (0x40UL << 32) +#else + #error Unsupported architecture +#endif + +statfunc bool vma_is_golang_heap(struct vm_area_struct *vma) +{ + u64 vm_start = BPF_CORE_READ(vma, vm_start); + + return (vm_start & GOLANG_ARENA_HINT_MASK) == GOLANG_ARENA_HINT; +} + +statfunc bool vma_is_thread_stack(struct task_struct *task, struct vm_area_struct *vma) +{ + // Look up the stack VMA for this task + pid_t pid = BPF_CORE_READ(task, pid); + address_range_t *stack = bpf_map_lookup_elem(&thread_stacks, &pid); + if (stack == NULL) + // This thread's stack isn't tracked + return false; + + // Check if the VMA is **contained** in the thread stack range. + // We don't check exact address range match because a change to the permissions + // of part of the stack VMA will split it into multiple VMAs. + return BPF_CORE_READ(vma, vm_start) >= stack->start && BPF_CORE_READ(vma, vm_end) <= stack->end; } statfunc bool vma_is_vdso(struct vm_area_struct *vma) @@ -174,19 +225,33 @@ statfunc bool vma_is_vdso(struct vm_area_struct *vma) return strncmp("[vdso]", mapping_name, 7) == 0; } -statfunc enum vma_type get_vma_type(struct vm_area_struct *vma) +statfunc enum vma_type get_vma_type(struct task_struct *task, struct vm_area_struct *vma) { - if (vma_is_stack(vma)) + // The check order is a balance between how expensive the check is and how likely it is to pass + + if (vma_is_file_backed(vma)) + return VMA_FILE_BACKED; + + if (vma_is_initial_stack(vma)) return VMA_STACK; - if (vma_is_heap(vma)) + if (vma_is_initial_heap(vma)) return VMA_HEAP; - if (vma_is_anon(vma) && !vma_is_vdso(vma)) { + if (vma_is_anon(vma)) { + if (vma_is_golang_heap(vma)) + return VMA_GOLANG_HEAP; + + if (vma_is_thread_stack(task, vma)) + return VMA_THREAD_STACK; + + if (vma_is_vdso(vma)) + return VMA_VDSO; + return VMA_ANON; } - return VMA_OTHER; + return VMA_UNKNOWN; } #endif diff --git a/pkg/ebpf/c/maps.h b/pkg/ebpf/c/maps.h index 17f36dae37bd..908173a86246 100644 --- a/pkg/ebpf/c/maps.h +++ b/pkg/ebpf/c/maps.h @@ -395,6 +395,16 @@ struct elf_files_map { typedef struct elf_files_map elf_files_map_t; +// keep track of thread stacks +struct thread_stacks { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 16384); + __type(key, pid_t); + __type(value, address_range_t); +} thread_stacks SEC(".maps"); + +typedef struct thread_stacks thread_stacks_t; + // // versioned maps (map of maps) // diff --git a/pkg/ebpf/c/tracee.bpf.c b/pkg/ebpf/c/tracee.bpf.c index c58a1d4e7a03..93e315587794 100644 --- a/pkg/ebpf/c/tracee.bpf.c +++ b/pkg/ebpf/c/tracee.bpf.c @@ -1294,6 +1294,14 @@ int lkm_seeker_new_mod_only_tail(struct pt_regs *ctx) SEC("raw_tracepoint/sched_process_exec") int tracepoint__sched__sched_process_exec(struct bpf_raw_tracepoint_args *ctx) { + // Thread stacks map upkeeping + pid_t pid = bpf_get_current_pid_tgid(); + bpf_map_delete_elem(&thread_stacks, &pid); + pid_t old_pid = ctx->args[1]; + if (old_pid != pid) + // execve was called from a thread and it inherited the main thread's PID, remove the old PID as well + bpf_map_delete_elem(&thread_stacks, &old_pid); + program_data_t p = {}; if (!init_program_data(&p, ctx, SCHED_PROCESS_EXEC)) return 0; @@ -1432,6 +1440,10 @@ int sched_process_exec_event_submit_tail(struct bpf_raw_tracepoint_args *ctx) SEC("raw_tracepoint/sched_process_exit") int tracepoint__sched__sched_process_exit(struct bpf_raw_tracepoint_args *ctx) { + // Thread stacks map upkeeping + pid_t pid = bpf_get_current_pid_tgid(); + bpf_map_delete_elem(&thread_stacks, &pid); + program_data_t p = {}; if (!init_program_data(&p, ctx, SCHED_PROCESS_EXIT)) return 0; @@ -5184,6 +5196,40 @@ int BPF_KPROBE(trace_chmod_common) return events_perf_submit(&p, 0); } +// Keep track of new threads' stacks +SEC("kprobe/wake_up_new_task") +int BPF_KPROBE(trace_wake_up_new_task) +{ + struct task_struct *task = (struct task_struct *) PT_REGS_PARM1(ctx); + + if (get_task_flags(task) & PF_KTHREAD) + return 0; + + // Get user SP of new thread +#if defined(bpf_target_x86) + struct fork_frame *fork_frame = (struct fork_frame *) BPF_CORE_READ(task, thread.sp); + u64 thread_sp = BPF_CORE_READ(fork_frame, regs.sp); +#elif defined(bpf_target_arm64) + struct pt_regs *thread_regs = (struct pt_regs *) BPF_CORE_READ(task, thread.cpu_context.sp); + u64 thread_sp = BPF_CORE_READ(thread_regs, sp); +#else + #error Unsupported architecture +#endif + + // Find VMA which contains the SP + struct vm_area_struct *vma = find_vma(ctx, task, thread_sp); + if (unlikely(vma == NULL)) + return 0; + + // Add the VMA address range to the thread stacks map + pid_t pid = BPF_CORE_READ(task, pid); + address_range_t range = {.start = BPF_CORE_READ(vma, vm_start), + .end = BPF_CORE_READ(vma, vm_end)}; + bpf_map_update_elem(&thread_stacks, &pid, &range, BPF_ANY); + + return 0; +} + // // Syscall checkers // @@ -5215,11 +5261,13 @@ statfunc void check_suspicious_syscall_source(void *ctx, struct pt_regs *regs, u if (unlikely(vma == NULL)) return; - // Get VMA type and make sure it's abnormal (stack/heap/anonymous VMA) - enum vma_type vma_type = get_vma_type(vma); - if (vma_type == VMA_OTHER) + // If the VMA is file-backed, the syscall is determined to be legitimate + if (vma_is_file_backed(vma)) return; + // Get VMA type + enum vma_type vma_type = get_vma_type(task, vma); + // Build a key that identifies the combination of syscall, // source VMA and process so we don't submit it multiple times syscall_source_key_t key = {.syscall = syscall, @@ -5237,17 +5285,27 @@ statfunc void check_suspicious_syscall_source(void *ctx, struct pt_regs *regs, u switch (vma_type) { case VMA_STACK: - vma_type_str = "stack"; + vma_type_str = "main stack"; + break; + case VMA_THREAD_STACK: + vma_type_str = "thread stack"; break; case VMA_HEAP: vma_type_str = "heap"; break; + case VMA_GOLANG_HEAP: + // Goroutine stacks are allocated on the golang heap + vma_type_str = "golang heap/stack"; + break; case VMA_ANON: vma_type_str = "anonymous"; break; - // shouldn't happen + case VMA_VDSO: + vma_type_str = "vdso"; + break; default: - return; + vma_type_str = "unknown"; + break; } unsigned long vma_start = BPF_CORE_READ(vma, vm_start); @@ -5271,7 +5329,7 @@ int BPF_KPROBE(syscall_checker) struct pt_regs *regs = ctx; if (get_kconfig(ARCH_HAS_SYSCALL_WRAPPER)) regs = (struct pt_regs *) PT_REGS_PARM1(ctx); - + // Get syscall ID u32 syscall = get_syscall_id_from_regs(regs); diff --git a/pkg/ebpf/c/types.h b/pkg/ebpf/c/types.h index 6b5bed9c3a21..db1e68b0b937 100644 --- a/pkg/ebpf/c/types.h +++ b/pkg/ebpf/c/types.h @@ -577,4 +577,9 @@ typedef struct { u64 vma_addr; } syscall_source_key_t; +typedef struct { + u64 start; + u64 end; +} address_range_t; + #endif diff --git a/pkg/ebpf/c/vmlinux.h b/pkg/ebpf/c/vmlinux.h index 3d8196187a08..210755473808 100644 --- a/pkg/ebpf/c/vmlinux.h +++ b/pkg/ebpf/c/vmlinux.h @@ -256,6 +256,28 @@ typedef struct { uid_t val; } kuid_t; +#if defined(__TARGET_ARCH_x86) + +struct thread_struct { + unsigned long sp; +}; + +struct fork_frame { + struct pt_regs regs; +}; + +#elif defined(__TARGET_ARCH_arm64) + +struct cpu_context { + unsigned long sp; +}; + +struct thread_struct { + struct cpu_context cpu_context; +}; + +#endif + struct task_struct { struct thread_info thread_info; unsigned int flags; @@ -278,6 +300,7 @@ struct task_struct { struct signal_struct *signal; void *stack; struct sighand_struct *sighand; + struct thread_struct thread; }; typedef struct { diff --git a/pkg/ebpf/probes/probe_group.go b/pkg/ebpf/probes/probe_group.go index ee0d353c9ed9..c59433ec8d1f 100644 --- a/pkg/ebpf/probes/probe_group.go +++ b/pkg/ebpf/probes/probe_group.go @@ -257,6 +257,7 @@ func NewDefaultProbeGroup(module *bpf.Module, netEnabled bool) (*ProbeGroup, err Dup3: NewTraceProbe(SyscallEnter, "dup3", "trace_dup3"), Dup3Ret: NewTraceProbe(SyscallExit, "dup3", "trace_ret_dup3"), ChmodCommon: NewTraceProbe(KProbe, "chmod_common", "trace_chmod_common"), + WakeUpNewTask: NewTraceProbe(KProbe, "wake_up_new_task", "trace_wake_up_new_task"), TestUnavailableHook: NewTraceProbe(KProbe, "non_existing_func", "empty_kprobe"), ExecTest: NewTraceProbe(RawTracepoint, "raw_syscalls:sched_process_exec", "tracepoint__exec_test"), diff --git a/pkg/ebpf/probes/probes.go b/pkg/ebpf/probes/probes.go index 85ed9376b6d7..5aeb18c7b487 100644 --- a/pkg/ebpf/probes/probes.go +++ b/pkg/ebpf/probes/probes.go @@ -162,6 +162,7 @@ const ( Dup3 Dup3Ret ChmodCommon + WakeUpNewTask ) // Test probe handles diff --git a/pkg/events/core.go b/pkg/events/core.go index 0aca7e0628e2..7cb790f5d992 100644 --- a/pkg/events/core.go +++ b/pkg/events/core.go @@ -13065,7 +13065,14 @@ var CoreEvents = map[ID]Definition{ id: SuspiciousSyscallSource, id32Bit: Sys32Undefined, name: "suspicious_syscall_source", - sets: []string{}, + dependencies: Dependencies{ + probes: []Probe{ + {handle: probes.WakeUpNewTask, required: false}, // for thread stack tracking + {handle: probes.SchedProcessExec, required: false}, // for thread stack tracking + {handle: probes.SchedProcessExit, required: false}, // for thread stack tracking + }, + }, + sets: []string{}, fields: []trace.ArgMeta{ {Type: "int", Name: "syscall"}, {Type: "void*", Name: "ip"}, diff --git a/tests/e2e-inst-signatures/e2e-suspicious_syscall_source.go b/tests/e2e-inst-signatures/e2e-suspicious_syscall_source.go index ebd271021f35..5a67c3cf9f56 100644 --- a/tests/e2e-inst-signatures/e2e-suspicious_syscall_source.go +++ b/tests/e2e-inst-signatures/e2e-suspicious_syscall_source.go @@ -11,10 +11,11 @@ import ( ) type e2eSuspiciousSyscallSource struct { - cb detect.SignatureHandler - foundStack bool - foundHeap bool - foundAnonVma bool + cb detect.SignatureHandler + foundMainStack bool + foundHeap bool + foundAnonVma bool + foundThreadStack bool } func (sig *e2eSuspiciousSyscallSource) Init(ctx detect.SignatureContext) error { @@ -63,17 +64,19 @@ func (sig *e2eSuspiciousSyscallSource) OnEvent(event protocol.Event) error { return nil } - if vmaType == "stack" { - sig.foundStack = true + if vmaType == "main stack" { + sig.foundMainStack = true } else if vmaType == "heap" { sig.foundHeap = true } else if vmaType == "anonymous" { sig.foundAnonVma = true + } else if vmaType == "thread stack" { + sig.foundThreadStack = true } else { return nil } - if !sig.foundStack || !sig.foundHeap || !sig.foundAnonVma { + if !sig.foundMainStack || !sig.foundHeap || !sig.foundAnonVma || !sig.foundThreadStack { return nil } diff --git a/tests/e2e-inst-signatures/scripts/suspicious_syscall_source.sh b/tests/e2e-inst-signatures/scripts/suspicious_syscall_source.sh index af27ae8b72f7..e54c3a2ee3b3 100755 --- a/tests/e2e-inst-signatures/scripts/suspicious_syscall_source.sh +++ b/tests/e2e-inst-signatures/scripts/suspicious_syscall_source.sh @@ -8,7 +8,8 @@ exit_err() { prog=sys_src_tester dir=tests/e2e-inst-signatures/scripts -gcc $dir/$prog.c -o $dir/$prog -z execstack || exit_err "could not compile $prog.c" +gcc $dir/$prog.c -pthread -o $dir/$prog -z execstack || exit_err "could not compile $prog.c" ./$dir/$prog stack 2>&1 > /tmp/$prog.log || exit_err "could not run $prog" ./$dir/$prog heap 2>&1 > /tmp/$prog.log || exit_err "could not run $prog" -./$dir/$prog mmap 2>&1 > /tmp/$prog.log || exit_err "could not run $prog" \ No newline at end of file +./$dir/$prog mmap 2>&1 > /tmp/$prog.log || exit_err "could not run $prog" +./$dir/$prog thread-stack 2>&1 > /tmp/$prog.log || exit_err "could not run $prog" \ No newline at end of file diff --git a/tests/e2e-inst-signatures/scripts/sys_src_tester.c b/tests/e2e-inst-signatures/scripts/sys_src_tester.c index 8de6a51c1c24..38325780680c 100644 --- a/tests/e2e-inst-signatures/scripts/sys_src_tester.c +++ b/tests/e2e-inst-signatures/scripts/sys_src_tester.c @@ -6,6 +6,7 @@ #include #include #include +#include // exit(0); #if defined(__x86_64__) @@ -24,6 +25,8 @@ char shellcode[] = SHELLCODE; +void *thread_func(void *); + int main(int argc, char *argv[]) { if (argc != 2) @@ -84,8 +87,46 @@ int main(int argc, char *argv[]) goto fail; } + if (strcmp(argv[1], "thread-stack") == 0) { + // spawn a new thread which will run the shellcode from its stack + pthread_t thread; + if (pthread_create(&thread, NULL, thread_func, NULL) != 0) { + perror("pthread_create failed"); + goto fail; + } + + // wait for the new thread to exit + if (pthread_join(thread, NULL) != 0) { + perror("pthread_join failed"); + goto fail; + } + + return 0; + } + usage: printf("usage: ./sys_src_tester [stack|heap|mmap]\n"); fail: exit(EXIT_FAILURE); } + +void *thread_func(void *arg) +{ + // place the shellcode on the stack + char shellcode_stack[] = SHELLCODE; + + // set the stack memory as executable + if (mprotect((void *)((unsigned long long)shellcode_stack & ~(sysconf(_SC_PAGE_SIZE) - 1)), 2 * sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE | PROT_EXEC) == -1) { + perror("mprotect failed"); + return NULL; + } + + // jump to the shellcode +#if defined(__aarch64__) + __builtin___clear_cache (&shellcode_stack, &shellcode_stack + sizeof(shellcode)); +#endif + ((void (*)(void))shellcode_stack)(); + + // cannot be reached + return NULL; +}