From 0386d170d8db682f103a5d8520e17dc0c1bd6955 Mon Sep 17 00:00:00 2001 From: Tim Blazytko Date: Mon, 1 Apr 2019 20:27:41 +0200 Subject: [PATCH 1/2] stateful mem commit --- .../simplifications_stateful_memory.py | 130 +++++++++++++++++ miasm/ir/ir.py | 21 ++- miasm/ir/translators/z3_ir.py | 138 +++++++++++++++++- 3 files changed, 277 insertions(+), 12 deletions(-) create mode 100644 miasm/expression/simplifications_stateful_memory.py diff --git a/miasm/expression/simplifications_stateful_memory.py b/miasm/expression/simplifications_stateful_memory.py new file mode 100644 index 000000000..e71495313 --- /dev/null +++ b/miasm/expression/simplifications_stateful_memory.py @@ -0,0 +1,130 @@ +from miasm.expression.expression import * + + +def dissect_test(expr, expr_type, result): + if isinstance(expr, expr_type): + result.add(expr) + return False + return True + + +def dissect_visit(expr, expr_type): + result = set() + expr.visit(lambda expr: expr, + lambda expr: dissect_test(expr, expr_type, result)) + return result + + +def gen_smt_mem_read(m, arch_size): + """ + Generates a expression which will + return a value from memory of the form + mem_read(M, addr, size), + where @M is the memory, @addr the + value's address and @size the size + of the value to be read. + The value will be the slice of the + read value of size m.size. + :param m: memory expression + :param mem_name: string, name of memory variable + :return: ExprSlice of size m.size + """ + mem = ExprId("M", arch_size) + addr = zero_padding(m.ptr, arch_size) + size = ExprInt(m.size, arch_size) + op = ExprOp("mem_read", mem, addr, size) + + return ExprSlice(op, 0, m.size) + + +def gen_smt_mem_write(e, arch_size): + """ + Generates an expression of the form + M = mem_write(M, addr, val, size), + where @val is a value of size @size + which will be written in memory @M at + address @addr. + :param e: ExprAssign + :param mem_name: string, name of memory variable + :return: ExprAssign + """ + dst = e.dst + src = e.src + + mem = ExprId("M", arch_size) + addr = zero_padding(dst.ptr, arch_size) + val = zero_padding(src, arch_size) + size = ExprInt(src.size, arch_size) + op = ExprOp("mem_write", mem, addr, val, size) + return ExprAssign(mem, op) + + +def zero_padding(v, arch_size): + """ + Paddes a value to the architecture's bit size with zero + :param v: parameter to be padded + :return: padded parameter + """ + # v is smaller than architecture's bit size + if v.size < arch_size: + i = ExprInt(0, arch_size) + slice = ExprSlice(i, 0, i.size - v.size) + return ExprCompose(v, slice) + + return v + + +def rewrite_memory_read(e, arch_size): + """ + Rewrites all memory read expressions + in an expression to + mem_read(M, address, size) + :param e: expression + :return: rewritten expression + """ + # parse all memory expressions + e_new = e + memory = dissect_visit(e, ExprMem) + # iterate memory expressions + for m in memory: + # create mem_read expression + mem_read = gen_smt_mem_read(m, arch_size) + # replace memory expression with mem_read expression + e_new = e_new.replace_expr({m: mem_read}) + + return e_new + + +def rewrite_memory(e, arch_size): + """ + Rewrites memory expressions in an ExprAff to + + - mem_read(M, address, size) + - M = mem_write(M, address, value, size) + + :param e: ExprAssign + :return: ExprAssign with transformed memory expressions + """ + dst = e.dst.copy() + src = e.src.copy() + + # memory expression on LHS: create mem_write expressions + if isinstance(dst, ExprMem): + # rewrite all memory read expressions + src_new = rewrite_memory_read(src, arch_size) + + # generate mem_write expression + mem_write = ExprAssign(dst, src_new) + + # recreate expression + e_new = gen_smt_mem_write(mem_write, arch_size) + + # no memory expression on LHS: create mem_read expressions + else: + # rewrite all memory read expressions + src_new = rewrite_memory_read(src, arch_size) + + # recreate expression + e_new = ExprAssign(dst, src_new) + + return e_new diff --git a/miasm/ir/ir.py b/miasm/ir/ir.py index eb9857b11..5d28829ca 100644 --- a/miasm/ir/ir.py +++ b/miasm/ir/ir.py @@ -265,7 +265,7 @@ def dst2ExprAssign(self, dst): @dst: Expr instance""" return m2_expr.ExprAssign(dst, self[dst]) - def simplify(self, simplifier): + def simplify(self, simplifier, rewrite_mem=False, arch_size=0): """ Return a new AssignBlock with expression simplified @@ -273,10 +273,17 @@ def simplify(self, simplifier): """ new_assignblk = {} for dst, src in viewitems(self): - if dst == src: - continue - new_src = simplifier(src) - new_dst = simplifier(dst) + if rewrite_mem: + e = self.dst2ExprAssign(dst) + rewritten = simplifier(e, arch_size) + new_src = rewritten.src + new_dst = rewritten.dst + else: + if dst == src: + continue + new_src = simplifier(src) + new_dst = simplifier(dst) + new_assignblk[new_dst] = new_src return AssignBlock(irs=new_assignblk, instr=self.instr) @@ -623,7 +630,7 @@ def getby_offset(self, offset): return out - def simplify(self, simplifier): + def simplify(self, simplifier, rewrite_mem=False): """ Simplify expressions in each irblocks @simplifier: ExpressionSimplifier instance @@ -632,7 +639,7 @@ def simplify(self, simplifier): for loc_key, block in list(viewitems(self.blocks)): assignblks = [] for assignblk in block: - new_assignblk = assignblk.simplify(simplifier) + new_assignblk = assignblk.simplify(simplifier, rewrite_mem, self._irdst.size) if assignblk != new_assignblk: modified = True assignblks.append(new_assignblk) diff --git a/miasm/ir/translators/z3_ir.py b/miasm/ir/translators/z3_ir.py index 7dc77cfc5..1d16d9a4b 100644 --- a/miasm/ir/translators/z3_ir.py +++ b/miasm/ir/translators/z3_ir.py @@ -101,6 +101,110 @@ def is_big_endian(self): return not self.is_little_endian() +class Z3MemStateful(object): + import z3 + """ + Global memory modelling as one large z3 array with low level reads and writes. + Memory access is modelled as 8 byte access per read/write. Addresses have size @size. + + Thus, memory access is a map from
-> 8 byte. + + The structure of that class is adapted/based on class "Z3Mem" from + miasm2/ir/translators/z3_ir.py. + Especially the structure (or methods) of endianess (is_little_endian, is_big_endian), alignment and + read access (Z3Mem.get) are used. + """ + + def __init__(self, size, endianness="<"): + """ + Initialises global memory + :param size: address size of memory entries + :param endianness: "<" for little endian, ">" for big endian + """ + # Import z3 only on demand + global z3 + import z3 + self.endianness = endianness + self.size = size + + def is_little_endian(self): + """True if this memory is little endian.""" + return self.endianness == "<" + + def is_big_endian(self): + """True if this memory is big endian.""" + return not self.is_little_endian() + + def gen_mem(self, mem="M", size=None): + """ + Generates a z3 array that maps @self.size bitvecs to 8 byte bitvecs + :param mem: name of memory as string + :param size: memory address size + :return: z3 array + """ + if not size: + size = self.size + return z3.Array(mem, z3.BitVecSort(size), z3.BitVecSort(8)) + + def read(self, mem, addr, size): + """ + Reads a value in 8 byte steps from a z3 array + :param mem: current z3 memory array + :param addr: memory address as z3 bitvec + :param size: size of the read value + :return: value in memory at address @addr + """ + size = size.as_long() + original_size = size + + if original_size % 8 != 0: + # Size not aligned on 8bits -> read more than size and extract after + size = ((original_size / 8) + 1) * 8 + + res = z3.Select(mem, addr) + + if self.is_little_endian(): + for i in xrange(1, size / 8): + res = z3.Concat(z3.Select(mem, addr + i), res) + else: + for i in xrange(1, size / 8): + res = z3.Concat(res, z3.Select(mem, addr + i)) + + if size == original_size: + return z3.simplify(res) + else: + return z3.Extract(original_size - 1, 0, res) + + def write(self, mem, addr, v, size): + """ + Writes a value in 8 byte steps into a z3 array + :param mem: current z3 memory array + :param addr: memory address as z3 bitvec + :param v: value to be written + :param size: size of value + :return: updated z3 memory array + """ + size = size.as_long() + original_size = size + + if original_size % 8 != 0: + # align size to 8 bits + size = ((original_size / 8) + 1) * 8 + + if self.is_little_endian(): + mem = z3.Store(mem, addr, z3.Extract(7, 0, v)) + for i in xrange(2, size / 8 + 1): + mem = z3.Store(mem, addr + i - 1, z3.Extract(8 * i - 1, 8 * (i - 1), v)) + else: + mem = z3.Store(mem, addr, z3.Extract(self.size - 1, self.size - 8, v)) + for i in xrange((size / 8) - 1, 0, -1): + mem = z3.Store(mem, addr + (-i % (size / 8)), z3.Extract(8 * i - 1, 8 * (i - 1), v)) + + return z3.simplify(mem) + + + + class TranslatorZ3(Translator): """Translate a Miasm expression to an equivalent z3 python binding expression. Memory is abstracted via z3.Array (see Z3Mem). @@ -116,7 +220,7 @@ class TranslatorZ3(Translator): # Operations translation trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - def __init__(self, endianness="<", loc_db=None, **kwargs): + def __init__(self, endianness="<", loc_db=None, stateful_mem=False, mem_size=64, **kwargs): """Instance a Z3 translator @endianness: (optional) memory endianness """ @@ -125,14 +229,21 @@ def __init__(self, endianness="<", loc_db=None, **kwargs): import z3 super(TranslatorZ3, self).__init__(**kwargs) - self._mem = Z3Mem(endianness) + self.stateful_mem = stateful_mem + if self.stateful_mem: + self._mem = Z3MemStateful(mem_size, endianness) + else: + self._mem = Z3Mem(endianness) self.loc_db = loc_db def from_ExprInt(self, expr): return z3.BitVecVal(expr.arg.arg, expr.size) def from_ExprId(self, expr): - return z3.BitVec(str(expr), expr.size) + if self.stateful_mem and isinstance(expr.name, str) and expr.name.split(".")[0] == "M": + return self._mem.gen_mem(str(expr.name)) + else: + return z3.BitVec(str(expr), expr.size) def from_ExprLoc(self, expr): if self.loc_db is None: @@ -146,8 +257,14 @@ def from_ExprLoc(self, expr): return z3.BitVec(str(loc_key), expr.size) def from_ExprMem(self, expr): - addr = self.from_expr(expr.ptr) - return self._mem.get(addr, expr.size) + if self.stateful_mem: + addr = self.from_expr(expr.ptr) + size = z3.IntVal(expr.size) + mem = self._mem.gen_mem() + return self._mem.read(mem, addr, size) + else: + addr = self.from_expr(expr.ptr) + return self._mem.get(addr, expr.size) def from_ExprSlice(self, expr): res = self.from_expr(expr.arg) @@ -237,6 +354,17 @@ def from_ExprOp(self, expr): z3.BitVecVal(1, 1), z3.BitVecVal(0, 1) ) + elif expr.op == "mem_read": + mem = self._mem.gen_mem(str(args[0])) + addr = args[1] + size = args[2] + res = self._mem.read(mem, addr, size) + elif expr.op == "mem_write": + mem = self._mem.gen_mem(str(args[0])) + addr = args[1] + val = args[2] + size = args[3] + res = self._mem.write(mem, addr, val, size) else: raise NotImplementedError("Unsupported OP yet: %s" % expr.op) elif expr.op == 'parity': From 75af17b8e77ace68fb3d7fcbf5d88f7f58ba2ffa Mon Sep 17 00:00:00 2001 From: Tim Blazytko Date: Tue, 2 Apr 2019 17:03:10 +0200 Subject: [PATCH 2/2] fixed typo --- miasm/ir/translators/z3_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/miasm/ir/translators/z3_ir.py b/miasm/ir/translators/z3_ir.py index 1d16d9a4b..c55ad9ef5 100644 --- a/miasm/ir/translators/z3_ir.py +++ b/miasm/ir/translators/z3_ir.py @@ -111,7 +111,7 @@ class Z3MemStateful(object): The structure of that class is adapted/based on class "Z3Mem" from miasm2/ir/translators/z3_ir.py. - Especially the structure (or methods) of endianess (is_little_endian, is_big_endian), alignment and + Especially the structure (or methods) of endianness (is_little_endian, is_big_endian), alignment and read access (Z3Mem.get) are used. """