From 7c5655cdde40958a4782746133fd444530cdd2ff Mon Sep 17 00:00:00 2001 From: franciscozdo Date: Mon, 5 Jun 2023 18:44:53 +0200 Subject: [PATCH] Create python scripts to analyz KFT output --- kft/README.md | 1 + kft/analayze-thread.py | 196 +++++++++++++++++++++++++++++++++++++++++ kft/preprocess-data.py | 185 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 382 insertions(+) create mode 100644 kft/README.md create mode 100755 kft/analayze-thread.py create mode 100755 kft/preprocess-data.py diff --git a/kft/README.md b/kft/README.md new file mode 100644 index 000000000..d3f87d998 --- /dev/null +++ b/kft/README.md @@ -0,0 +1 @@ +# Kernel Function Trace diff --git a/kft/analayze-thread.py b/kft/analayze-thread.py new file mode 100755 index 000000000..34c9cfa60 --- /dev/null +++ b/kft/analayze-thread.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +import argparse +import pickle +import sys + +from tqdm import tqdm + +# Accessors for KFT event tuple +TYPE = 0 +TIME = 1 +PC = 2 + +KFT_IN = 0 +KFT_OUT = 1 + + +class KFT(): + def __init__(self, events, pc2fun, fun2pc): + self.events = events + self.pc2fun = pc2fun + self.fun2pc = fun2pc + + self.stats = dict() + + def print_graph(self, file): + depth = 0 + time_stack = [] + bar = '│ ' + + for event in tqdm(self.events, ncols=80): + time_str = '' + fn_name = '' + if event[TYPE] == KFT_OUT: + depth -= 1 + elapsed = event[TIME] - time_stack.pop() + time_str = f'(ticks: {elapsed})' + + if depth < 0: + print("Errororrr!") + sys.exit(1) + + if event[TYPE] == KFT_IN: + fn_name = self.pc2fun[event[PC]] + + line = f'{event[TIME]:06d} {bar * depth}{fn_name}{time_str}' + file.write(line + '\n') + + if event[TYPE] == KFT_IN: + depth += 1 + time_stack.append(event[TIME]) + + # for list of function pcs get list of their invocation times + def _get_stats(self, pcs): + res = dict([(pc, Stat(pc, self.pc2fun[pc])) for pc in pcs]) + last_time = dict.fromkeys(pcs, -1) + + for pc in pcs: + res[pc].times = [] + res[pc].count = 0 + + for event in self.events: + pc = event[PC] + if pc not in pcs: + continue + + t_last = last_time[pc] + if event[TYPE] == KFT_IN: + assert t_last == -1 + last_time[pc] = event[TIME] + else: + assert t_last != -1 + tm = event[TIME] - t_last + last_time[pc] = -1 + res[pc].times.append(tm) + res[pc].count += 1 + + return res + + # display time info for given functions + # if fn_pcs is True then list contains pc values + def get_stats(self, fn_list, fn_pcs=False): + invalid = [] + for fn in fn_list: + if ((fn_pcs and fn not in self.pc2fun.keys()) + or (fn not in self.fun2pc.keys())): + invalid.append(fn) + for i in invalid: + fn_list.remove(i) + + if not fn_pcs: + pc_list = [self.fun2pc[f] for f in fn_list] + else: + pc_list = fn_list + + res = dict() + todo = [] + + for i in range(len(fn_list)): + fn = fn_list[i] + pc = pc_list[i] + + if fn in invalid: + print(f'Function {fn} is not valid') + continue + + if pc in self.stats: + res[fn] = self.stats[pc] + else: + todo.append(pc) + + if len(todo) > 0: + st = self._get_stats(todo) + for pc, s in st.items(): + self.stats[pc] = s + res[pc] = s + + return res + + +class Stat: + def __init__(self, pc, name): + self.pc = pc + self.name = name + self.times = [] # dict of list of times for each thread + self.count = 0 # dict of count for each thread + + def __repr__(self): + return f'Stat(pc={hex(self.pc)}, fn={self.name})' + + def display(self): + if len(self.times) != 0: + av = sum(self.times) / len(self.times) + mx = max(self.times) + mi = min(self.times) + else: + av = -1 + mx = -1 + mi = -1 + + print(f'{self.name} ({hex(self.pc)}) stats:') + print(f'\tinvocations: {self.count}') + print(f'\tavg time: {av}') + print(f'\tmax time: {mx}') + print(f'\tmin time: {mi}') + + +def get_data(path): + with open(path, 'rb') as file: + return pickle.load(file) + + +# Expecting directory structure: +# - pc2fun.pickle +# - fun2pc.pickle +# - thread-i.pickle (for i \in Naturals) +def main(): + parser = argparse.ArgumentParser( + description='Analyze data gathered from kernel by KFT.') + + parser.add_argument('-d', '--dir', type=str, + help='Directory with KFT data preprocessed') + parser.add_argument('-g', '--graph', action='store_true', + help='Print function call graph') + parser.add_argument('-t', '--thread', type=int, + help='Id of thread to inspect') + args = parser.parse_args() + + thread = args.thread + dir = args.dir + + pc2fun = get_data(dir + '/pc2fun.pkl') + fun2pc = get_data(dir + '/fun2pc.pkl') + + events = get_data(dir + f'/thread-{thread}.pkl') + + kft = KFT(events, pc2fun, fun2pc) + + if args.graph: + filename = dir + f'/thread-{thread}.graph' + with open(filename, 'w') as file: + print(f'Writing function call graph to `{filename}`') + kft.print_graph(file) + + inspect_functions = ['vm_page_fault', 'do_fork', 'do_mmap', 'do_blablabla'] + stats = kft.get_stats(inspect_functions) + + inspect_functions.extend(['do_exec', 'do_exit', 'proc_create']) + stats = kft.get_stats(inspect_functions) + + for p in stats.values(): + p.display() + + +if __name__ == '__main__': + main() diff --git a/kft/preprocess-data.py b/kft/preprocess-data.py new file mode 100755 index 000000000..37015dde4 --- /dev/null +++ b/kft/preprocess-data.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +import os +import pickle +import sys + +from tqdm import tqdm + +from elftools.elf.elffile import ELFFile +from elftools.elf.sections import SymbolTableSection + + +PC_MASK = 0xFFFFFF # 24 bits +TIMESTAMP_MASK = 0x7FFFFFFF # 31 bits +THREAD_MASK = 0xF # 8 bits +TYPE_MASK = 0x1 # 1 bit + +PC_SHIFT = 40 +TIMESTAMP_SHIFT = 9 +THREAD_SHIFT = 1 +TYPE_SHIFT = 0 + +KFT_IN = 0 +KFT_OUT = 1 + + +# Returns: +# - kernel_start +# - dict mapping pc to function +# - dict mapping function to pc +def inspect_elf_file(elf_file): + def get_symbol_table_section(elf): + for section in elf.iter_sections(): + if not isinstance(section, SymbolTableSection): + continue + return section + + def is_function(s): + return s.entry['st_info']['type'] == 'STT_FUNC' + + def read_symbol(s): + return (s.entry['st_value'], s.name) + + with open(elf_file, 'rb') as elf_file: + elf = ELFFile(elf_file) + + sym_table = get_symbol_table_section(elf) + if sym_table is None: + return None, None, None + + kern_sym = sym_table.get_symbol_by_name('__kernel_start') + if kern_sym is None: + kern_start = None + else: + kern_start = read_symbol(kern_sym[0])[0] + + pc_to_fun = [read_symbol(s) + for s in sym_table.iter_symbols() if is_function(s)] + pcs, fns = zip(*pc_to_fun) + fun_to_pc = zip(fns, pcs) + pc2fun = dict(pc_to_fun) + fun2pc = dict(fun_to_pc) + + return kern_start, pc2fun, fun2pc + + +def decode_values(v, kern_start): + typ = (v >> TYPE_SHIFT) & TYPE_MASK + thread = (v >> THREAD_SHIFT) & THREAD_MASK + timestamp = (v >> TIMESTAMP_SHIFT) & TIMESTAMP_MASK + rel_pc = (v >> PC_SHIFT) & PC_MASK + pc = kern_start + rel_pc + return thread, typ, timestamp, pc + + +# Read events and split them into threads +# Return dict of events for threads +def inspect_kft_file(path, kern_start): + st = os.stat(path) + size = st.st_size + entries = int(size / 8) + print(f'Reading file of size {size/1024/1024:.2f}MB ({entries} entries)') + + MAX_THREADS = 30 + events = dict() + td_time = [0] * MAX_THREADS # elapsed time + cur_thread = -1 + cur_time = 0 + switch_time = 0 + ctx_swith_count = 0 + + file = open(path, 'rb', buffering=800) + for i in tqdm(range(entries), ncols=80): + v = int.from_bytes(file.read(8), 'little') + thread, typ, tm, pc = decode_values(v, kern_start) + + if thread != cur_thread: + # update info about prev thread + td_time[cur_thread] += tm - switch_time + + if thread not in events: + events[thread] = [] + + # save values for current thread + switch_time = tm + cur_time = td_time[thread] + cur_thread = thread + ctx_swith_count += 1 + + timestamp = cur_time + (tm - switch_time) + + # Append tuple in correct order (type, timestampt, pc) + events[cur_thread].append((typ, timestamp, pc)) + + print(f'Encountered {ctx_swith_count} context swithces') + return events + + +def trunc_opener(path, flags): + return os.open(path, flags | os.O_TRUNC, mode=0o664) + + +def dump_threads(dir, td_events): + for t in td_events.keys(): + filename = dir + f'/thread-{t}.pkl' + print(f'Writing thread {t} data to {filename}.') + with open(filename, 'wb', opener=trunc_opener) as file: + pickle.dump(td_events[t], file) + + +def dump_functions(dir, pc2fun, fun2pc): + pc_filename = dir + '/pc2fun.pkl' + fn_filename = dir + '/fun2pc.pkl' + + print(f'Writing pc2fun dict to {pc_filename}') + with open(pc_filename, 'wb', opener=trunc_opener) as file: + pickle.dump(pc2fun, file) + + print(f'Writing fun2pc dict to {fn_filename}') + with open(fn_filename, 'wb', opener=trunc_opener) as file: + pickle.dump(fun2pc, file) + + +def main(): + if len(sys.argv) < 3: + print("provide a kft file and output directory as arguments!") + sys.exit(1) + + # Prepare arguments + path = sys.argv[1] + dir = sys.argv[2] + elf_file = 'sys/mimiker.elf' + print(f'inspecting file `{path}` using symbols from `{elf_file}`') + + # Create output directory + try: + os.stat(dir) + choice = input(f'Path `{dir}`exists do you want to continue (Y/n) ') + if choice not in {'y', 'Y', 'yes', 'Yes'}: + print("Aborting.") + sys.exit(0) + except FileNotFoundError: + os.mkdir(dir) + + # Inspect ELF file + kern_start, pc2fun, fun2pc = inspect_elf_file(elf_file) + if pc2fun is None: + print("symbols not found in elf file!") + sys.exit(1) + if kern_start is None: + print(f'`__kernel_start` symbol not found in {elf_file}') + sys.exit(1) + + # print(f'kernel start: {hex(kern_start)}') + + # Inspect KFT output + td_events = inspect_kft_file(path, kern_start) + + # Dump data to files for later use + dump_functions(dir, pc2fun, fun2pc) + dump_threads(dir, td_events) + + +if __name__ == '__main__': + main()