Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial eBPF instrumentation to track process context switches, V1 bpftrace prototype #51

Merged
merged 2 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions cmd/bpftracer/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package main

import (
"fmt"
"os"
"os/exec"
// "os/signal"
// "syscall"
"time"
)

func main() {
// Path to bpftrace script
scriptPath := "./unvariance_bpftracer.bt"

// Command to run the bpftrace script
cmd := exec.Command("sudo", "bpftrace", scriptPath)

// Set up stdout and stderr
cmd.Stdout = os.Stdout
cmd. Stderr = os.Stderr

if err := cmd.Start(); err != nil {
fmt.Printf("Failed to start bpftrace: %v\n", err)
return
}

// Set up signal handling to stop the command gracefully
// sig := make(chan os.Signal, 1)
// signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)

// Wait for the command to finish or for a signal to be received
go func() {
if err := cmd.Wait(); err != nil {
fmt.Printf("bpftrace exited with error: %v\n", err)
}
}()

// Wait for a set time
fmt.Println("Running bpftrace for set time...")
time.Sleep(1000 * time.Millisecond)

// Kill the bpftrace process
fmt.Println("Stopping bpftrace...")
if err := cmd.Process.Kill(); err != nil {
fmt.Printf("Failed to kill bpftrace: %v\n", err)
}

// // Wait for a signal
// <-sig
// fmt.Println("Received signal, stopping bpftrace...")
//
// // Kill the bpftrace process
// if err := cmd.Process.Kill(); err != nil {
// fmt.Printf("Failed to kill bpftrace: %v\n", err)
// }
}
78 changes: 78 additions & 0 deletions cmd/bpftracer/unvariance_bpftracer.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env bpftrace

BEGIN {
printf("Tracing context switches after memory events by process... Output every 1ms.\n");
}


// Count various memory events by process with tracepoints: page faults, page allocations, and reclaims
// Consider software attach points as well
// software:page-faults:100 { @[comm] = count(); }
// kernel-space
tracepoint:exceptions:page_fault_user,
tracepoint:kmem:mm_page_alloc,
tracepoint:vmscan:mm_vmscan_direct_reclaim_begin {
@memory_events[comm, pid] = count();
}


// Count every cache miss by process
// Sample and record every million cpu cycles by process for ~1 digit per millisecond
// Sample and record every million cpu instructions by process for ~1 digit per millisecond
// kernel-space
hardware:cache-misses:1 {
@cache_misses[comm, pid] = count();
}
hardware:cpu-cycles:1e6 {
@cpu_cycles[comm, pid] = count();
}
hardware:instructions:1e6 {
@instructions[comm, pid] = count();
}


// Count context switches for processes with high memory activity or cache misses
// Calculate and record approximate cycles per instruction during those processes
// kernel-space
tracepoint:sched:sched_switch {
if (@memory_events[args->prev_comm, args->prev_pid] &&
@cache_misses[args->prev_comm, args->prev_pid]) {
@context_switches[args->prev_comm, args->prev_pid] = count();
@cycles_per_instruction[args->prev_comm, args->prev_pid] = @cpu_cycles[args->prev_comm, args->prev_pid] / @instructions[args->prev_comm, args->prev_pid];
}
}


// Print statistics at set intervals, preferred target: 1 millisecond
// Document interval timing as well as duration of asynchronous read and print to user-space
// For processes which have undergone context-switches associated with memory events or cache-misses:
// Output process name, PID, context switches, cache misses, memory events, and cycles per instruction in formatted
// Clear all maps for next interval
// user-space + kernel-space, synchronous read (expensive) coerced by type cast, asynchronous map clear
interval:ms:1 {

printf("\nMetrics at %d ms:\n", elapsed / 1000000);

for ($kv : @context_switches) {
printf("Process comm: %s\nPID: %d\nContext switches: %d\nCache misses: %d\nMemory events: %d\nCycles per instruction: %d\n",
$kv.0.0, (int64)$kv.0.1,
(int64)@context_switches[$kv.0.0, $kv.0.1],
(int64)@cache_misses[$kv.0.0, $kv.0.1],
(int64)@memory_events[$kv.0.0, $kv.0.1],
(int64)@cycles_per_instruction[$kv.0.0, $kv.0.1]);
}

clear(@cpu_cycles);
clear(@instructions);
clear(@context_switches);
clear(@cycles_per_instruction);
clear(@cache_misses);
clear(@memory_events);

printf("Operation completed at %d ms\n", elapsed / 1000000);
}

// Waits for interrupt signal to gracefully exit and print all remaining maps, toggle off final print with config variable?
END {
printf("Tracing stopped.\n");
}
Loading