Skip to content

Commit

Permalink
Create python scripts to analyz KFT output
Browse files Browse the repository at this point in the history
  • Loading branch information
franciscozdo committed Jun 5, 2023
1 parent 0ac2874 commit 7c5655c
Show file tree
Hide file tree
Showing 3 changed files with 382 additions and 0 deletions.
1 change: 1 addition & 0 deletions kft/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Kernel Function Trace
196 changes: 196 additions & 0 deletions kft/analayze-thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/env python3

import argparse
import pickle
import sys

from tqdm import tqdm

# Accessors for KFT event tuple
TYPE = 0
TIME = 1
PC = 2

KFT_IN = 0
KFT_OUT = 1


class KFT():
def __init__(self, events, pc2fun, fun2pc):
self.events = events
self.pc2fun = pc2fun
self.fun2pc = fun2pc

self.stats = dict()

def print_graph(self, file):
depth = 0
time_stack = []
bar = '│ '

for event in tqdm(self.events, ncols=80):
time_str = ''
fn_name = ''
if event[TYPE] == KFT_OUT:
depth -= 1
elapsed = event[TIME] - time_stack.pop()
time_str = f'(ticks: {elapsed})'

if depth < 0:
print("Errororrr!")
sys.exit(1)

if event[TYPE] == KFT_IN:
fn_name = self.pc2fun[event[PC]]

line = f'{event[TIME]:06d} {bar * depth}{fn_name}{time_str}'
file.write(line + '\n')

if event[TYPE] == KFT_IN:
depth += 1
time_stack.append(event[TIME])

# for list of function pcs get list of their invocation times
def _get_stats(self, pcs):
res = dict([(pc, Stat(pc, self.pc2fun[pc])) for pc in pcs])
last_time = dict.fromkeys(pcs, -1)

for pc in pcs:
res[pc].times = []
res[pc].count = 0

for event in self.events:
pc = event[PC]
if pc not in pcs:
continue

t_last = last_time[pc]
if event[TYPE] == KFT_IN:
assert t_last == -1
last_time[pc] = event[TIME]
else:
assert t_last != -1
tm = event[TIME] - t_last
last_time[pc] = -1
res[pc].times.append(tm)
res[pc].count += 1

return res

# display time info for given functions
# if fn_pcs is True then list contains pc values
def get_stats(self, fn_list, fn_pcs=False):
invalid = []
for fn in fn_list:
if ((fn_pcs and fn not in self.pc2fun.keys())
or (fn not in self.fun2pc.keys())):
invalid.append(fn)
for i in invalid:
fn_list.remove(i)

if not fn_pcs:
pc_list = [self.fun2pc[f] for f in fn_list]
else:
pc_list = fn_list

res = dict()
todo = []

for i in range(len(fn_list)):
fn = fn_list[i]
pc = pc_list[i]

if fn in invalid:
print(f'Function {fn} is not valid')
continue

if pc in self.stats:
res[fn] = self.stats[pc]
else:
todo.append(pc)

if len(todo) > 0:
st = self._get_stats(todo)
for pc, s in st.items():
self.stats[pc] = s
res[pc] = s

return res


class Stat:
def __init__(self, pc, name):
self.pc = pc
self.name = name
self.times = [] # dict of list of times for each thread
self.count = 0 # dict of count for each thread

def __repr__(self):
return f'Stat(pc={hex(self.pc)}, fn={self.name})'

def display(self):
if len(self.times) != 0:
av = sum(self.times) / len(self.times)
mx = max(self.times)
mi = min(self.times)
else:
av = -1
mx = -1
mi = -1

print(f'{self.name} ({hex(self.pc)}) stats:')
print(f'\tinvocations: {self.count}')
print(f'\tavg time: {av}')
print(f'\tmax time: {mx}')
print(f'\tmin time: {mi}')


def get_data(path):
with open(path, 'rb') as file:
return pickle.load(file)


# Expecting directory structure:
# - pc2fun.pickle
# - fun2pc.pickle
# - thread-i.pickle (for i \in Naturals)
def main():
parser = argparse.ArgumentParser(
description='Analyze data gathered from kernel by KFT.')

parser.add_argument('-d', '--dir', type=str,
help='Directory with KFT data preprocessed')
parser.add_argument('-g', '--graph', action='store_true',
help='Print function call graph')
parser.add_argument('-t', '--thread', type=int,
help='Id of thread to inspect')
args = parser.parse_args()

thread = args.thread
dir = args.dir

pc2fun = get_data(dir + '/pc2fun.pkl')
fun2pc = get_data(dir + '/fun2pc.pkl')

events = get_data(dir + f'/thread-{thread}.pkl')

kft = KFT(events, pc2fun, fun2pc)

if args.graph:
filename = dir + f'/thread-{thread}.graph'
with open(filename, 'w') as file:
print(f'Writing function call graph to `{filename}`')
kft.print_graph(file)

inspect_functions = ['vm_page_fault', 'do_fork', 'do_mmap', 'do_blablabla']
stats = kft.get_stats(inspect_functions)

inspect_functions.extend(['do_exec', 'do_exit', 'proc_create'])
stats = kft.get_stats(inspect_functions)

for p in stats.values():
p.display()


if __name__ == '__main__':
main()
185 changes: 185 additions & 0 deletions kft/preprocess-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env python3

import os
import pickle
import sys

from tqdm import tqdm

from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection


PC_MASK = 0xFFFFFF # 24 bits
TIMESTAMP_MASK = 0x7FFFFFFF # 31 bits
THREAD_MASK = 0xF # 8 bits
TYPE_MASK = 0x1 # 1 bit

PC_SHIFT = 40
TIMESTAMP_SHIFT = 9
THREAD_SHIFT = 1
TYPE_SHIFT = 0

KFT_IN = 0
KFT_OUT = 1


# Returns:
# - kernel_start
# - dict mapping pc to function
# - dict mapping function to pc
def inspect_elf_file(elf_file):
def get_symbol_table_section(elf):
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue
return section

def is_function(s):
return s.entry['st_info']['type'] == 'STT_FUNC'

def read_symbol(s):
return (s.entry['st_value'], s.name)

with open(elf_file, 'rb') as elf_file:
elf = ELFFile(elf_file)

sym_table = get_symbol_table_section(elf)
if sym_table is None:
return None, None, None

kern_sym = sym_table.get_symbol_by_name('__kernel_start')
if kern_sym is None:
kern_start = None
else:
kern_start = read_symbol(kern_sym[0])[0]

pc_to_fun = [read_symbol(s)
for s in sym_table.iter_symbols() if is_function(s)]
pcs, fns = zip(*pc_to_fun)
fun_to_pc = zip(fns, pcs)
pc2fun = dict(pc_to_fun)
fun2pc = dict(fun_to_pc)

return kern_start, pc2fun, fun2pc


def decode_values(v, kern_start):
typ = (v >> TYPE_SHIFT) & TYPE_MASK
thread = (v >> THREAD_SHIFT) & THREAD_MASK
timestamp = (v >> TIMESTAMP_SHIFT) & TIMESTAMP_MASK
rel_pc = (v >> PC_SHIFT) & PC_MASK
pc = kern_start + rel_pc
return thread, typ, timestamp, pc


# Read events and split them into threads
# Return dict of events for threads
def inspect_kft_file(path, kern_start):
st = os.stat(path)
size = st.st_size
entries = int(size / 8)
print(f'Reading file of size {size/1024/1024:.2f}MB ({entries} entries)')

MAX_THREADS = 30
events = dict()
td_time = [0] * MAX_THREADS # elapsed time
cur_thread = -1
cur_time = 0
switch_time = 0
ctx_swith_count = 0

file = open(path, 'rb', buffering=800)
for i in tqdm(range(entries), ncols=80):
v = int.from_bytes(file.read(8), 'little')
thread, typ, tm, pc = decode_values(v, kern_start)

if thread != cur_thread:
# update info about prev thread
td_time[cur_thread] += tm - switch_time

if thread not in events:
events[thread] = []

# save values for current thread
switch_time = tm
cur_time = td_time[thread]
cur_thread = thread
ctx_swith_count += 1

timestamp = cur_time + (tm - switch_time)

# Append tuple in correct order (type, timestampt, pc)
events[cur_thread].append((typ, timestamp, pc))

print(f'Encountered {ctx_swith_count} context swithces')
return events


def trunc_opener(path, flags):
return os.open(path, flags | os.O_TRUNC, mode=0o664)


def dump_threads(dir, td_events):
for t in td_events.keys():
filename = dir + f'/thread-{t}.pkl'
print(f'Writing thread {t} data to {filename}.')
with open(filename, 'wb', opener=trunc_opener) as file:
pickle.dump(td_events[t], file)


def dump_functions(dir, pc2fun, fun2pc):
pc_filename = dir + '/pc2fun.pkl'
fn_filename = dir + '/fun2pc.pkl'

print(f'Writing pc2fun dict to {pc_filename}')
with open(pc_filename, 'wb', opener=trunc_opener) as file:
pickle.dump(pc2fun, file)

print(f'Writing fun2pc dict to {fn_filename}')
with open(fn_filename, 'wb', opener=trunc_opener) as file:
pickle.dump(fun2pc, file)


def main():
if len(sys.argv) < 3:
print("provide a kft file and output directory as arguments!")
sys.exit(1)

# Prepare arguments
path = sys.argv[1]
dir = sys.argv[2]
elf_file = 'sys/mimiker.elf'
print(f'inspecting file `{path}` using symbols from `{elf_file}`')

# Create output directory
try:
os.stat(dir)
choice = input(f'Path `{dir}`exists do you want to continue (Y/n) ')
if choice not in {'y', 'Y', 'yes', 'Yes'}:
print("Aborting.")
sys.exit(0)
except FileNotFoundError:
os.mkdir(dir)

# Inspect ELF file
kern_start, pc2fun, fun2pc = inspect_elf_file(elf_file)
if pc2fun is None:
print("symbols not found in elf file!")
sys.exit(1)
if kern_start is None:
print(f'`__kernel_start` symbol not found in {elf_file}')
sys.exit(1)

# print(f'kernel start: {hex(kern_start)}')

# Inspect KFT output
td_events = inspect_kft_file(path, kern_start)

# Dump data to files for later use
dump_functions(dir, pc2fun, fun2pc)
dump_threads(dir, td_events)


if __name__ == '__main__':
main()

0 comments on commit 7c5655c

Please sign in to comment.