Skip to content

Commit

Permalink
simutils: refactoring, support for ret_points
Browse files Browse the repository at this point in the history
  • Loading branch information
dlp committed Dec 11, 2012
1 parent 772a733 commit ba2e72c
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 72 deletions.
12 changes: 6 additions & 6 deletions simutils/blockmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def find_le(a, x):
# TODO (re)use the dasm enhance generator
def dasm_bblabels(binary, observe):
ro = re.compile(r'^\s*0*(?P<addr>[{}]+):'.format(string.hexdigits))
funcs = dasmutil.func_addresses(binary)
bbs = dasmutil.bb_addresses(binary)
funcs = { t[0]:t[2] for t in dasmutil.func_addresses(binary) }
bbs = { t[0]:tuple(t[1:]) for t in dasmutil.bb_addresses(binary, True) }
capture = False
func_sizes = set([ hex(int(k,16)-4)[2:] for k in funcs ])
func_sizes = set( hex(int(k,16)-4)[2:] for k in funcs )
with open(binary+'.dis','w') as f:
for line in dasmutil.disassemble(binary):
mo = ro.match(line)
Expand Down Expand Up @@ -84,10 +84,10 @@ def usage():
# format:
# addr: (func, bbname, number, size)
# addr and size are of type int, in bytes
bbs_a = dict([ (int(k,16),tuple(v[1][1:].split('#')+[int(v[0],16)]))
for k,v in dasmutil.bb_addresses(binary).items() ])
bbs_a = { int(tup[0],16) : tuple(tup[2][1:].split('#')+[int(tup[1],16)])
for tup in dasmutil.bb_addresses(binary, True) }
bbs_lst = sorted(bbs_a.keys())
funcs_a = dasmutil.func_addresses(binary)
funcs_a = { t[0]:t[2] for t in dasmutil.func_addresses(binary) }

def update_edge(A, prev, cur):
if prev not in A: A[prev] = dict()
Expand Down
16 changes: 8 additions & 8 deletions simutils/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def checksum(fn):

def maxidxlt(ranges, cnt):
"""Compute the largest index i in ranges such that cnt<=ranges[i]"""
return max([i for i,r in enumerate(ranges) if r<=cnt])
return max(i for i,r in enumerate(ranges) if r<=cnt)



Expand Down Expand Up @@ -76,24 +76,24 @@ def objdump(self):
segwidth = 80/len(colors) - 1
seg = lambda col,q: '\033[{:d}m{:^{segwidth}}\033[0m' \
.format(col, 'p<={:0.2f}'.format(q), segwidth=segwidth)
print ' '.join( [seg(x,y) for x,y in zip(colors,quantiles)])
print ' '.join(seg(x,y) for x,y in zip(colors,quantiles))

# prepare template
tpl = '{{cnt:>{0}}} {{addr:>{1}}}: {{mem:24}} {{inst}}'\
.format( len(str(self.maxcnt)), self.maxaddrlen )
assert( None not in self.Hist )
for addr, line in dasmutil.disasm_enhance(self.binary):
if not addr: print line; continue
for line, inst in dasmutil.DisAsm(self.binary):
if not inst: print line, ; continue
# it's an instruction
if addr in self.Hist:
cnt = self.Hist[addr]
if inst['addr'] in self.Hist:
cnt = self.Hist[inst['addr']]
#heat = len(colors)*cnt / (self.maxcnt+1)
heat = maxidxlt(ranges, cnt)
print '\033[{:d}m'.format(colors[heat])+\
tpl.format(cnt=str(cnt), **line).ljust(79)+\
tpl.format(cnt=str(cnt), **inst).ljust(79)+\
'\033[0m'
else:
print tpl.format(cnt='', **line)
print tpl.format(cnt='', **inst)


###############################################################################
Expand Down
150 changes: 92 additions & 58 deletions simutils/dasmutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
#
# Utilities to analyze disassembly of patmos ELF binaries.
#
# TODO properly parse the disassembly to return function objects
# on which you can iterate to return basic blocks, etc
#
# Author:
# Daniel Prokesch <[email protected]>
#
###############################################################################

import os
import re, string
import bisect
from subprocess import Popen, PIPE


Expand All @@ -27,20 +25,25 @@ def _symtab_extract(binary, pattern):
return [ mo.groups() for mo in mos if mo ]

def func_addresses(binary):
"""Dictionary of addr-funcstart pairs."""
pattern = (r'^\s*0*([{0}]+)\s+(?:g|l)\s+F [.]text\s+[{0}]{{8}}\s+(.*)\s*$')\
"""Sorted list of function info tuples (hexaddr, hexsize, name)"""
pattern = (r'^\s*0*([{0}]+)\s+(?:g|l)\s+F [.]text\s+([{0}]{{8}})\s+(.*)\s*$')\
.format(string.hexdigits)
return dict(_symtab_extract(binary, pattern))
return sorted(_symtab_extract(binary, pattern),
key=lambda tup: int(tup[0],16) )


# bb_addresses can be used whne the binary is created with special bb symbols,
# bb_addresses can be used when the binary is created with special bb symbols,
# i.e., with -mpatmos-enable-bb-symbols
def bb_addresses(binary):
"""Dictionary of addr-bbname pairs."""
pattern = (r'^\s*0*([{0}]+)\s+[.]text\s+([{0}]{{8}})\s+([#].*)\s*$')\
.format(string.hexdigits)
return dict([ (g[0], tuple(g[1:]))
for g in _symtab_extract(binary, pattern) ])
def bb_addresses(binary, irbb=False):
"""Sorted list of bb info tuples (hexaddr, hexsize, name).
Set irbb=True if you want to extract the special bb information (if present).
"""
pattern = (r'^\s*0*([{0}]+)\s+[.]text\s+([{0}]{{8}})\s+({1}.*)\s*$')\
.format(string.hexdigits, "[#]" if irbb else "[^#]")
# (addr, size, name)
return sorted( _symtab_extract(binary, pattern),
key=lambda tup: int(tup[0],16) )

###############################################################################

Expand All @@ -56,57 +59,88 @@ def disassemble(binary):
objdump.kill()


def disasm_enhance(binary):
funcs = func_addresses(binary)
# regex object
ro = re.compile((r'^\s*0*(?P<addr>[{0}]+):\s*'\
r'(?P<mem>(?:[{0}]{{2}} ?){{4,8}})'\
r'\s*(?P<inst>.*)$').format(string.hexdigits))
# some helpers:
def padGuard(d): # space for default guard
if not d['inst'].startswith('('):
d['inst'] = ' '*7+d['inst']

call_ro = re.compile(r'call\s+([0-9]+)')
def patchCallTarget(d): # patch immediate call target
call_mo = call_ro.match(d['inst'],7)

###############################################################################

class DisAsm(object):
def __init__(self, binary):
self.binary = binary
self.funcs = func_addresses(self.binary)
self.faddr = [ int(t[0],16) for t in self.funcs ]
self.call_ro = re.compile(r'call\s+([0-9]+)')

def func_at(self, addr):
i = bisect.bisect_left(self.faddr, addr)
if i != len(self.faddr) and self.faddr[i] == addr:
return self.funcs[i]
return None

def _pad_guard(self, inst):
return ' '*7+inst if not inst.startswith('(') else inst

def _patch_call(self, grp):
call_mo = self.call_ro.match(grp['inst'],7)
if call_mo:
tgt_wd = call_mo.group(1)
tgt_addr = 4*int(tgt_wd)
tgt_lbl = funcs.get(hex(tgt_addr)[2:], tgt_wd+' ???')
d['inst'] = d['inst'].replace(tgt_wd, tgt_lbl)
# list of function starts, pointing to the address of the size (base-4);
# reversed, to pop items off as they match
func_preview = sorted(
[ (int(k,16)-4, v) for (k,v) in funcs.items()], reverse=True)

# main loop
next_func = func_preview.pop()
for line in disassemble(binary):
mo = ro.match(line) # matcher object
# return: (address, line without \n)
if mo:
grp = mo.groupdict()
# check for size before function start
if int(grp['addr'],16)==next_func[0]:
func_size = int(grp['mem'].replace(' ',''),16)
continue
# normal instruction:
padGuard(grp)
patchCallTarget(grp)
# yield info
yield grp['addr'], grp
else:
# check function label
if line.startswith(next_func[1]+':'):
yield None, '\n{}\n{}:\t(size={:#x}, {:d} words)\n'\
.format('-'*80, next_func[1], func_size, func_size/4)
if len(func_preview)>0: next_func = func_preview.pop()
continue
yield None, line.rstrip()
tgt_func = self.func_at(tgt_addr)
tgt_lbl = tgt_func[2] if tgt_func else tgt_wd+' ???'
grp['inst'] = grp['inst'].replace(tgt_wd, tgt_lbl)
grp['call'] = tgt_addr


def __iter__(self):
"""Generator for enhanced disassembly"""
# regex object
ro = re.compile((r'^\s*0*(?P<addr>[{0}]+):\s*'\
r'(?P<mem>(?:[{0}]{{2}} ?){{4,8}})'\
r'\s*(?P<inst>.*)$').format(string.hexdigits))

# list of function starts, pointing to the address of the size (base-4);
# reversed, to pop items off as they match
func_preview = [ (k-4, self.func_at(k)) for k in self.faddr ]
func_preview.reverse()
# main loop
next_size, next_func = func_preview.pop()
for line in disassemble(self.binary):
mo = ro.match(line)
if mo:
# line is an inst, provide additional info
grp = mo.groupdict()
# check for size before function start
if int(grp['addr'],16)==next_size:
func_size = int(grp['mem'].replace(' ',''),16)
continue
# normal instruction:
grp['inst'] = self._pad_guard(grp['inst'])
self._patch_call(grp)
yield line, grp
else:
# check function label
if line==(next_func[2]+':\n'):
assert( func_size == int(next_func[1],16) )
yield '\n{}\n{}:\t(size={:#x}, {:d} words)\n\n' \
.format('-'*80, next_func[2], func_size, func_size/4), None
if len(func_preview)>0: next_size, next_func = func_preview.pop()
continue
yield line, None

###############################################################################


def ret_points(binary):
dasm = DisAsm(binary)
cnt = 1
for line, inst in dasm:
if inst:
if cnt==0: yield inst['addr']
if 'call' in inst:
cnt = -2
continue
cnt = cnt + 1


###############################################################################

if __name__=='__main__':
raise Exception("This module is not to be executed.")

0 comments on commit ba2e72c

Please sign in to comment.