-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
simutils: refactoring, support for ret_points
- Loading branch information
Showing
3 changed files
with
106 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,16 +3,14 @@ | |
# | ||
# Utilities to analyze disassembly of patmos ELF binaries. | ||
# | ||
# TODO properly parse the disassembly to return function objects | ||
# on which you can iterate to return basic blocks, etc | ||
# | ||
# Author: | ||
# Daniel Prokesch <[email protected]> | ||
# | ||
############################################################################### | ||
|
||
import os | ||
import re, string | ||
import bisect | ||
from subprocess import Popen, PIPE | ||
|
||
|
||
|
@@ -27,20 +25,25 @@ def _symtab_extract(binary, pattern): | |
return [ mo.groups() for mo in mos if mo ] | ||
|
||
def func_addresses(binary): | ||
"""Dictionary of addr-funcstart pairs.""" | ||
pattern = (r'^\s*0*([{0}]+)\s+(?:g|l)\s+F [.]text\s+[{0}]{{8}}\s+(.*)\s*$')\ | ||
"""Sorted list of function info tuples (hexaddr, hexsize, name)""" | ||
pattern = (r'^\s*0*([{0}]+)\s+(?:g|l)\s+F [.]text\s+([{0}]{{8}})\s+(.*)\s*$')\ | ||
.format(string.hexdigits) | ||
return dict(_symtab_extract(binary, pattern)) | ||
return sorted(_symtab_extract(binary, pattern), | ||
key=lambda tup: int(tup[0],16) ) | ||
|
||
|
||
# bb_addresses can be used whne the binary is created with special bb symbols, | ||
# bb_addresses can be used when the binary is created with special bb symbols, | ||
# i.e., with -mpatmos-enable-bb-symbols | ||
def bb_addresses(binary): | ||
"""Dictionary of addr-bbname pairs.""" | ||
pattern = (r'^\s*0*([{0}]+)\s+[.]text\s+([{0}]{{8}})\s+([#].*)\s*$')\ | ||
.format(string.hexdigits) | ||
return dict([ (g[0], tuple(g[1:])) | ||
for g in _symtab_extract(binary, pattern) ]) | ||
def bb_addresses(binary, irbb=False): | ||
"""Sorted list of bb info tuples (hexaddr, hexsize, name). | ||
Set irbb=True if you want to extract the special bb information (if present). | ||
""" | ||
pattern = (r'^\s*0*([{0}]+)\s+[.]text\s+([{0}]{{8}})\s+({1}.*)\s*$')\ | ||
.format(string.hexdigits, "[#]" if irbb else "[^#]") | ||
# (addr, size, name) | ||
return sorted( _symtab_extract(binary, pattern), | ||
key=lambda tup: int(tup[0],16) ) | ||
|
||
############################################################################### | ||
|
||
|
@@ -56,57 +59,88 @@ def disassemble(binary): | |
objdump.kill() | ||
|
||
|
||
def disasm_enhance(binary): | ||
funcs = func_addresses(binary) | ||
# regex object | ||
ro = re.compile((r'^\s*0*(?P<addr>[{0}]+):\s*'\ | ||
r'(?P<mem>(?:[{0}]{{2}} ?){{4,8}})'\ | ||
r'\s*(?P<inst>.*)$').format(string.hexdigits)) | ||
# some helpers: | ||
def padGuard(d): # space for default guard | ||
if not d['inst'].startswith('('): | ||
d['inst'] = ' '*7+d['inst'] | ||
|
||
call_ro = re.compile(r'call\s+([0-9]+)') | ||
def patchCallTarget(d): # patch immediate call target | ||
call_mo = call_ro.match(d['inst'],7) | ||
|
||
############################################################################### | ||
|
||
class DisAsm(object): | ||
def __init__(self, binary): | ||
self.binary = binary | ||
self.funcs = func_addresses(self.binary) | ||
self.faddr = [ int(t[0],16) for t in self.funcs ] | ||
self.call_ro = re.compile(r'call\s+([0-9]+)') | ||
|
||
def func_at(self, addr): | ||
i = bisect.bisect_left(self.faddr, addr) | ||
if i != len(self.faddr) and self.faddr[i] == addr: | ||
return self.funcs[i] | ||
return None | ||
|
||
def _pad_guard(self, inst): | ||
return ' '*7+inst if not inst.startswith('(') else inst | ||
|
||
def _patch_call(self, grp): | ||
call_mo = self.call_ro.match(grp['inst'],7) | ||
if call_mo: | ||
tgt_wd = call_mo.group(1) | ||
tgt_addr = 4*int(tgt_wd) | ||
tgt_lbl = funcs.get(hex(tgt_addr)[2:], tgt_wd+' ???') | ||
d['inst'] = d['inst'].replace(tgt_wd, tgt_lbl) | ||
# list of function starts, pointing to the address of the size (base-4); | ||
# reversed, to pop items off as they match | ||
func_preview = sorted( | ||
[ (int(k,16)-4, v) for (k,v) in funcs.items()], reverse=True) | ||
|
||
# main loop | ||
next_func = func_preview.pop() | ||
for line in disassemble(binary): | ||
mo = ro.match(line) # matcher object | ||
# return: (address, line without \n) | ||
if mo: | ||
grp = mo.groupdict() | ||
# check for size before function start | ||
if int(grp['addr'],16)==next_func[0]: | ||
func_size = int(grp['mem'].replace(' ',''),16) | ||
continue | ||
# normal instruction: | ||
padGuard(grp) | ||
patchCallTarget(grp) | ||
# yield info | ||
yield grp['addr'], grp | ||
else: | ||
# check function label | ||
if line.startswith(next_func[1]+':'): | ||
yield None, '\n{}\n{}:\t(size={:#x}, {:d} words)\n'\ | ||
.format('-'*80, next_func[1], func_size, func_size/4) | ||
if len(func_preview)>0: next_func = func_preview.pop() | ||
continue | ||
yield None, line.rstrip() | ||
tgt_func = self.func_at(tgt_addr) | ||
tgt_lbl = tgt_func[2] if tgt_func else tgt_wd+' ???' | ||
grp['inst'] = grp['inst'].replace(tgt_wd, tgt_lbl) | ||
grp['call'] = tgt_addr | ||
|
||
|
||
def __iter__(self): | ||
"""Generator for enhanced disassembly""" | ||
# regex object | ||
ro = re.compile((r'^\s*0*(?P<addr>[{0}]+):\s*'\ | ||
r'(?P<mem>(?:[{0}]{{2}} ?){{4,8}})'\ | ||
r'\s*(?P<inst>.*)$').format(string.hexdigits)) | ||
|
||
# list of function starts, pointing to the address of the size (base-4); | ||
# reversed, to pop items off as they match | ||
func_preview = [ (k-4, self.func_at(k)) for k in self.faddr ] | ||
func_preview.reverse() | ||
# main loop | ||
next_size, next_func = func_preview.pop() | ||
for line in disassemble(self.binary): | ||
mo = ro.match(line) | ||
if mo: | ||
# line is an inst, provide additional info | ||
grp = mo.groupdict() | ||
# check for size before function start | ||
if int(grp['addr'],16)==next_size: | ||
func_size = int(grp['mem'].replace(' ',''),16) | ||
continue | ||
# normal instruction: | ||
grp['inst'] = self._pad_guard(grp['inst']) | ||
self._patch_call(grp) | ||
yield line, grp | ||
else: | ||
# check function label | ||
if line==(next_func[2]+':\n'): | ||
assert( func_size == int(next_func[1],16) ) | ||
yield '\n{}\n{}:\t(size={:#x}, {:d} words)\n\n' \ | ||
.format('-'*80, next_func[2], func_size, func_size/4), None | ||
if len(func_preview)>0: next_size, next_func = func_preview.pop() | ||
continue | ||
yield line, None | ||
|
||
############################################################################### | ||
|
||
|
||
def ret_points(binary): | ||
dasm = DisAsm(binary) | ||
cnt = 1 | ||
for line, inst in dasm: | ||
if inst: | ||
if cnt==0: yield inst['addr'] | ||
if 'call' in inst: | ||
cnt = -2 | ||
continue | ||
cnt = cnt + 1 | ||
|
||
|
||
############################################################################### | ||
|
||
if __name__=='__main__': | ||
raise Exception("This module is not to be executed.") |