diff --git a/.coveragerc b/.coveragerc index f1bd1a4e9..032b95d78 100644 --- a/.coveragerc +++ b/.coveragerc @@ -17,3 +17,5 @@ exclude_lines = # We don't bother testing code that's explicitly unimplemented raise NotImplementedError + raise AssertionError + raise Aarch64InvalidInstruction diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ef849e2a5..9863063ed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,7 @@ name: CI on: push: branches: - - master + - chess pull_request: schedule: # run CI every day even if no PRs/merges occur diff --git a/manticore/__main__.py b/manticore/__main__.py index 8f3bba29d..2f7aa8583 100644 --- a/manticore/__main__.py +++ b/manticore/__main__.py @@ -29,6 +29,9 @@ def main() -> None: """ Dispatches execution into one of Manticore's engines: evm or native. """ + # Only print with Manticore's logger + logging.getLogger().handlers = [] + log.init_logging() args = parse_arguments() if args.no_colors: @@ -101,13 +104,13 @@ def positive(value): help=("A folder name for temporaries and results." "(default mcore_?????)"), ) - current_version = pkg_resources.get_distribution("manticore").version - parser.add_argument( - "--version", - action="version", - version=f"Manticore {current_version}", - help="Show program version information", - ) + # current_version = pkg_resources.get_distribution("manticore").version + # parser.add_argument( + # "--version", + # action="version", + # version=f"Manticore {current_version}", + # help="Show program version information", + # ) parser.add_argument( "--config", type=str, diff --git a/manticore/core/smtlib/solver.py b/manticore/core/smtlib/solver.py index b2e54a5dc..1affa3b1e 100644 --- a/manticore/core/smtlib/solver.py +++ b/manticore/core/smtlib/solver.py @@ -231,8 +231,16 @@ def send(self, cmd: str) -> None: """ if self._debug: logger.debug(">%s", cmd) - self._proc.stdout.flush() # type: ignore - self._proc.stdin.write(f"{cmd}\n") # type: ignore + try: + self._proc.stdout.flush() # type: ignore + self._proc.stdin.write(f"{cmd}\n") # type: ignore + except (BrokenPipeError, IOError) as e: + logger.critical( + f"Solver encountered an error trying to send commands: {e}.\n" + f"\tOutput: {self._proc.stdout}\n\n" + f"\tStderr: {self._proc.stderr}" + ) + raise e def recv(self) -> str: """Reads the response from the smtlib solver""" diff --git a/manticore/core/state.py b/manticore/core/state.py index d6ae24608..58a7dd357 100644 --- a/manticore/core/state.py +++ b/manticore/core/state.py @@ -235,6 +235,7 @@ def __enter__(self): new_state._input_symbols = list(self._input_symbols) new_state._context = copy.copy(self._context) new_state._id = None + new_state.manticore = self.manticore new_state._total_exec = self._total_exec self.copy_eventful_state(new_state) @@ -593,3 +594,38 @@ def symbolicate_buffer( else: assert b != 0 return data + + def constrain_and_symbolicate_buffer( + self, data, label="INPUT", constraints={}, string=False, taint=frozenset() + ): + # NOTE(sonya): this constraint dict should expanded to accommodate regex + # constraints is of the form {'wildcard char': [list of 'not' chars]} + # this function should be very useful for constraining input patterns in polling loops and other program arguments + + if constraints: + size = len(data) + symb = self._constraints.new_array( + name=label, index_max=size, taint=taint, avoid_collisions=True + ) + self._input_symbols.append(symb) + + tmp = [] + for i in range(size): + if data[i] in constraints: + tmp.append(symb[i]) + logger.error(f"Constraint{constraints[data[i]]}") + for c in constraints[data[i]]: + logger.error(f"Constraint {c}") + self._constraints.add(tmp[i] != c) + else: + tmp.append(data[i]) + + data = tmp + + if string: + for b in data: + if issymbolic(b): + self._constraints.add(b != 0) + else: + assert b != 0 + return data diff --git a/manticore/native/cpu/aarch64.py b/manticore/native/cpu/aarch64.py index 2957c5fb3..a18304c0b 100644 --- a/manticore/native/cpu/aarch64.py +++ b/manticore/native/cpu/aarch64.py @@ -1,4 +1,5 @@ -import warnings +from typing import NamedTuple +from inspect import signature as inspect_signature import capstone as cs import collections @@ -16,7 +17,6 @@ Operand, instruction, ) -from .arm import HighBit, Armv7Operand from .bitwise import SInt, UInt, ASR, LSL, LSR, ROR, Mask, GetNBits from .register import Register from ...core.smtlib import Operators @@ -299,45 +299,7 @@ def canonicalize_instruction_name(insn): # work for B.cond. Instead of being set to something like 'b.eq', # it just returns 'b'. name = insn.mnemonic.upper() - name = OP_NAME_MAP.get(name, name) - ops = insn.operands - name_list = name.split(".") - - # Make sure MOV (bitmask immediate) and MOV (register) go through 'MOV'. - if ( - name == "ORR" - and len(ops) == 3 - and ops[1].type == cs.arm64.ARM64_OP_REG - and ops[1].reg in ["WZR", "XZR"] - and not ops[2].is_shifted() - ): - name = "MOV" - insn._raw.mnemonic = name.lower().encode("ascii") - del ops[1] - - # Map all B.cond variants to a single implementation. - elif len(name_list) == 2 and name_list[0] == "B" and insn.cc != cs.arm64.ARM64_CC_INVALID: - name = "B_cond" - - # XXX: BFI is only valid when Rn != 11111: - # https://github.com/aquynh/capstone/issues/1441 - elif ( - name == "BFI" - and len(ops) == 4 - and ops[1].type == cs.arm64.ARM64_OP_REG - and ops[1].reg in ["WZR", "XZR"] - ): - name = "BFC" - insn._raw.mnemonic = name.lower().encode("ascii") - del ops[1] - - # XXX: CMEQ incorrectly sets the type to 'ARM64_OP_FP' for - # 'cmeq v0.16b, v1.16b, #0': - # https://github.com/aquynh/capstone/issues/1443 - elif name == "CMEQ" and len(ops) == 3 and ops[2].type == cs.arm64.ARM64_OP_FP: - ops[2]._type = cs.arm64.ARM64_OP_IMM - - return name + return OP_NAME_MAP.get(name, name) @property def insn_bit_str(self): @@ -2366,13 +2328,15 @@ def _CMEQ_zero(cpu, res_op, reg_op, imm_op): cpu._cmeq(res_op, reg_op, imm_op, register=False) @instruction - def CMEQ(cpu, res_op, reg_op, reg_imm_op): + def CMEQ(cpu, res_op, reg_op, reg_imm_op, _bug=0): """ Combines CMEQ (register) and CMEQ (zero). :param res_op: destination register. :param reg_op: source register. :param reg_imm_op: source register or immediate (zero). + + :param bug: Buggy extra operand https://github.com/aquynh/capstone/issues/1629 """ assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG @@ -3648,17 +3612,6 @@ def _MOV_to_general(cpu, res_op, reg_op): # XXX: Check if trapped. - # XXX: Capstone doesn't set 'vess' for this alias: - # https://github.com/aquynh/capstone/issues/1452 - if res_op.size == 32: - reg_op.op.vess = cs.arm64.ARM64_VESS_S - - elif res_op.size == 64: - reg_op.op.vess = cs.arm64.ARM64_VESS_D - - else: - raise Aarch64InvalidInstruction - # The 'instruction' decorator advances PC, so call the original # method. cpu.UMOV.__wrapped__(cpu, res_op, reg_op) @@ -3851,7 +3804,7 @@ def MRS(cpu, res_op, reg_op): :param reg_op: source system register. """ assert res_op.type is cs.arm64.ARM64_OP_REG - assert reg_op.type is cs.arm64.ARM64_OP_REG_MRS + assert reg_op.type is cs.arm64.ARM64_OP_SYS insn_rx = "1101010100" insn_rx += "1" # L @@ -3877,7 +3830,7 @@ def MSR(cpu, res_op, reg_op): :param res_op: destination system register. :param reg_op: source register. """ - assert res_op.type is cs.arm64.ARM64_OP_REG_MSR + assert res_op.type is cs.arm64.ARM64_OP_SYS assert reg_op.type is cs.arm64.ARM64_OP_REG insn_rx = "1101010100" @@ -5168,18 +5121,18 @@ def UMOV(cpu, res_op, reg_op): reg = reg_op.read() index = reg_op.op.vector_index - vess = reg_op.op.vess + vas = reg_op.op.vas - if vess == cs.arm64.ARM64_VESS_B: + if vas == cs.arm64.ARM64_VAS_1B: elem_size = 8 - elif vess == cs.arm64.ARM64_VESS_H: + elif vas == cs.arm64.ARM64_VAS_1H: elem_size = 16 - elif vess == cs.arm64.ARM64_VESS_S: + elif vas == cs.arm64.ARM64_VAS_1S: elem_size = 32 - elif vess == cs.arm64.ARM64_VESS_D: + elif vas == cs.arm64.ARM64_VAS_1D: elem_size = 64 else: @@ -5302,6 +5255,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.STACK): yield address + def get_return_reg(self): + return "X0" + def write_result(self, result): self._cpu.X0 = result @@ -5339,6 +5295,7 @@ def __init__(self, cpu, op, **kwargs): cs.arm64.ARM64_OP_MEM, cs.arm64.ARM64_OP_IMM, cs.arm64.ARM64_OP_FP, + cs.arm64.ARM64_OP_SYS, cs.arm64.ARM64_OP_BARRIER, ): raise NotImplementedError(f"Unsupported operand type: '{self.op.type}'") @@ -5386,7 +5343,7 @@ def is_extended(self): def read(self): if self.type == cs.arm64.ARM64_OP_REG: return self.cpu.regfile.read(self.reg) - elif self.type == cs.arm64.ARM64_OP_REG_MRS: + elif self.type == cs.arm64.ARM64_OP_REG_MRS or self.type == cs.arm64.ARM64_OP_SYS: name = SYS_REG_MAP.get(self.op.sys) if not name: raise NotImplementedError(f"Unsupported system register: '0x{self.op.sys:x}'") @@ -5399,7 +5356,7 @@ def read(self): def write(self, value): if self.type == cs.arm64.ARM64_OP_REG: self.cpu.regfile.write(self.reg, value) - elif self.type == cs.arm64.ARM64_OP_REG_MSR: + elif self.type == cs.arm64.ARM64_OP_REG_MSR or cs.arm64.ARM64_OP_SYS: name = SYS_REG_MAP.get(self.op.sys) if not name: raise NotImplementedError(f"Unsupported system register: '0x{self.op.sys:x}'") diff --git a/manticore/native/cpu/abstractcpu.py b/manticore/native/cpu/abstractcpu.py index cccc2173d..cf4438e27 100644 --- a/manticore/native/cpu/abstractcpu.py +++ b/manticore/native/cpu/abstractcpu.py @@ -294,6 +294,15 @@ def get_arguments(self): """ raise NotImplementedError + def get_return_reg(self): + """ + Extract the location a return value will be written to. Produces + a string describing a register where the return value is written to. + :return: return register name + :rtype: string + """ + raise NotImplementedError + def write_result(self, result): """ Write the result of a model back to the environment. @@ -521,6 +530,7 @@ def __init__(self, regfile: RegisterFile, memory: Memory, **kwargs): self._instruction_cache: Dict[int, Any] = {} self._icount = 0 self._last_pc = None + self._last_executed_pc = None self._concrete = kwargs.pop("concrete", False) self.emu = None self._break_unicorn_at: Optional[int] = None @@ -537,6 +547,7 @@ def __getstate__(self): state["memory"] = self._memory state["icount"] = self._icount state["last_pc"] = self._last_pc + state["last_executed_pc"] = self._last_executed_pc state["disassembler"] = self._disasm state["concrete"] = self._concrete state["break_unicorn_at"] = self._break_unicorn_at @@ -553,6 +564,7 @@ def __setstate__(self, state): ) self._icount = state["icount"] self._last_pc = state["last_pc"] + self._last_executed_pc = state["last_executed_pc"] self._disasm = state["disassembler"] self._concrete = state["concrete"] self._break_unicorn_at = state["break_unicorn_at"] @@ -563,6 +575,14 @@ def __setstate__(self, state): def icount(self): return self._icount + @property + def last_executed_pc(self) -> Optional[int]: + return self._last_executed_pc + + @property + def last_executed_insn(self): + return self.decode_instruction(self.last_executed_pc) + ############################## # Register access @property @@ -655,6 +675,8 @@ def emulate_until(self, target: int): self._concrete = True self._break_unicorn_at = target if self.emu: + self.emu.write_backs_disabled = False + self.emu.load_state_from_manticore() self.emu._stop_at = target ############################# @@ -685,12 +707,13 @@ def write_int(self, where, expression, size=None, force=False): self._publish("did_write_memory", where, expression, size) - def _raw_read(self, where: int, size=1) -> bytes: + def _raw_read(self, where: int, size: int = 1, force: bool = False) -> bytes: """ Selects bytes from memory. Attempts to do so faster than via read_bytes. :param where: address to read from :param size: number of bytes to read + :param force: whether to ignore memory permissions :return: the bytes in memory """ map = self.memory.map_containing(where) @@ -716,11 +739,11 @@ def _raw_read(self, where: int, size=1) -> bytes: elif isinstance(map, AnonMap): data = bytes(map._data[start : start + size]) else: - data = b"".join(self.memory[where : where + size]) + data = b"".join(self.memory.read(where, size, force=force)) assert len(data) == size, "Raw read resulted in wrong data read which should never happen" return data - def read_int(self, where, size=None, force=False): + def read_int(self, where, size=None, force=False, publish=True): """ Reads int from memory @@ -733,13 +756,15 @@ def read_int(self, where, size=None, force=False): if size is None: size = self.address_bit_size assert size in SANE_SIZES - self._publish("will_read_memory", where, size) + if publish: + self._publish("will_read_memory", where, size) data = self._memory.read(where, size // 8, force) assert (8 * len(data)) == size value = Operators.CONCAT(size, *map(Operators.ORD, reversed(data))) - self._publish("did_read_memory", where, value, size) + if publish: + self._publish("did_read_memory", where, value, size) return value def write_bytes(self, where: int, data, force: bool = False) -> None: @@ -774,7 +799,7 @@ def write_bytes(self, where: int, data, force: bool = False) -> None: for i in range(len(data)): self.write_int(where + i, Operators.ORD(data[i]), 8, force) - def read_bytes(self, where: int, size: int, force: bool = False): + def read_bytes(self, where: int, size: int, force: bool = False, publish=True): """ Read from memory. @@ -786,7 +811,7 @@ def read_bytes(self, where: int, size: int, force: bool = False): """ result = [] for i in range(size): - result.append(Operators.CHR(self.read_int(where + i, 8, force))) + result.append(Operators.CHR(self.read_int(where + i, 8, force, publish=publish))) return result def write_string( @@ -973,6 +998,7 @@ def execute(self): """ curpc = self.PC if self._delayed_event: + self._last_executed_pc = self._last_pc self._icount += 1 self._publish( "did_execute_instruction", @@ -997,6 +1023,7 @@ def execute(self): # FIXME (theo) why just return here? # hook changed PC, so we trust that there is nothing more to do if insn.address != self.PC: + self._last_executed_pc = self.PC return name = self.canonicalize_instruction_name(insn) @@ -1044,6 +1071,7 @@ def _publish_instruction_as_executed(self, insn): """ Notify listeners that an instruction has been executed. """ + self._last_executed_pc = self._last_pc self._icount += 1 self._publish("did_execute_instruction", self._last_pc, self.PC, insn) @@ -1068,7 +1096,10 @@ def concrete_emulate(self, insn): if not self.emu: self.emu = ConcreteUnicornEmulator(self) + if self.emu._stop_at is None: + self.emu.write_backs_disabled = False self.emu._stop_at = self._break_unicorn_at + self.emu.load_state_from_manticore() try: self.emu.emulate(insn) except unicorn.UcError as e: diff --git a/manticore/native/cpu/arm.py b/manticore/native/cpu/arm.py index 6d87aea84..9d0ca6a96 100644 --- a/manticore/native/cpu/arm.py +++ b/manticore/native/cpu/arm.py @@ -555,6 +555,9 @@ def get_arguments(self): for i in range(6): yield f"R{i}" + def get_return_reg(self): + return "R0" + def write_result(self, result): self._cpu.R0 = result @@ -754,14 +757,11 @@ def set_arm_tls(self, data): @staticmethod def canonicalize_instruction_name(instr): name = instr.insn_name().upper() - # XXX bypass a capstone bug that incorrectly labels some insns as mov - if name == "MOV": - if instr.mnemonic.startswith("lsr"): - return "LSR" - elif instr.mnemonic.startswith("lsl"): - return "LSL" - elif instr.mnemonic.startswith("asr"): - return "ASR" + # FIXME: Workaround https://github.com/aquynh/capstone/issues/1630 + if instr.mnemonic == "addw": + return "ADDW" + elif instr.mnemonic == "subw": + return "SUBW" return OP_NAME_MAP.get(name, name) def _wrap_operands(self, operands): diff --git a/manticore/native/cpu/x86.py b/manticore/native/cpu/x86.py index e915eaef1..e92bbf16e 100644 --- a/manticore/native/cpu/x86.py +++ b/manticore/native/cpu/x86.py @@ -497,6 +497,16 @@ class AMD64RegFile(RegisterFile): "FPTAG", ) + def __copy__(self): + """Custom shallow copy to create new dictionaries for concrete register + values lookups (snapshot). Should be read-only""" + cls = self.__class__ + result = cls.__new__(cls) + result.__dict__.update(self.__dict__) + result._cache = self._cache.copy() + result._registers = self._registers.copy() + return result + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -827,7 +837,7 @@ def __setstate__(self, state): # Segments def set_descriptor(self, selector, base, limit, perms): - assert selector > 0 and selector < 0xFFFF + assert selector >= 0 and selector < 0xFFFF assert base >= 0 and base < (1 << self.address_bit_size) assert limit >= 0 and limit < 0xFFFF or limit & 0xFFF == 0 # perms ? not used yet Also is not really perms but rather a bunch of attributes @@ -942,8 +952,11 @@ def CPUID(cpu): """ # FIXME Choose conservative values and consider returning some default when eax not here conf = { - 0x0: (0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69), - 0x1: (0x000306C3, 0x05100800, 0x7FFAFBFF, 0xBFEBFBFF), + # Taken from comparison against Unicorn@v1.0.2 + 0x0: (0x00000004, 0x68747541, 0x444D4163, 0x69746E65), + # Taken from comparison against Unicorn@v1.0.2 + 0x1: (0x663, 0x800, 0x2182200, 0x7088100), + # TODO: Check against Unicorn 0x2: (0x76035A01, 0x00F0B5FF, 0x00000000, 0x00C10000), 0x4: { 0x0: (0x1C004121, 0x01C0003F, 0x0000003F, 0x00000000), @@ -1061,6 +1074,7 @@ def TEST(cpu, src1, src2): cpu.PF = cpu._calculate_parity_flag(temp) cpu.CF = False cpu.OF = False + cpu.AF = False # Undefined, but ends up being `0` in emulator @instruction def NOT(cpu, dest): @@ -1099,11 +1113,7 @@ def XOR(cpu, dest, src): :param dest: destination operand. :param src: source operand. """ - if dest == src: - # if the operands are the same write zero - res = dest.write(0) - else: - res = dest.write(dest.read() ^ src.read()) + res = dest.write(dest.read() ^ src.read()) # Defined Flags: szp cpu._calculate_logic_flags(dest.size, res) @@ -1158,7 +1168,7 @@ def AAA(cpu): This instruction executes as described in compatibility mode and legacy mode. It is not valid in 64-bit mode. :: - IF ((AL AND 0FH) > 9) Operators.OR(AF = 1) + IF ((AL AND 0FH) > 9) OR (AF = 1) THEN AL = (AL + 6); AH = AH + 1; @@ -1175,20 +1185,10 @@ def AAA(cpu): cpu.CF = cpu.AF cpu.AH = Operators.ITEBV(8, cpu.AF, cpu.AH + 1, cpu.AH) cpu.AL = Operators.ITEBV(8, cpu.AF, cpu.AL + 6, cpu.AL) - """ - if (cpu.AL & 0x0F > 9) or cpu.AF == 1: - cpu.AL = cpu.AL + 6 - cpu.AH = cpu.AH + 1 - cpu.AF = True - cpu.CF = True - else: - cpu.AF = False - cpu.CF = False - """ cpu.AL = cpu.AL & 0x0F @instruction - def AAD(cpu, imm=None): + def AAD(cpu, imm): """ ASCII adjust AX before division. @@ -1214,12 +1214,7 @@ def AAD(cpu, imm=None): :param cpu: current CPU. """ - if imm is None: - imm = 10 - else: - imm = imm.read() - - cpu.AL += cpu.AH * imm + cpu.AL += cpu.AH * imm.read() cpu.AH = 0 # Defined flags: ...sz.p. @@ -1249,11 +1244,7 @@ def AAM(cpu, imm=None): :param cpu: current CPU. """ - if imm is None: - imm = 10 - else: - imm = imm.read() - + imm = imm.read() cpu.AH = Operators.UDIV(cpu.AL, imm) cpu.AL = Operators.UREM(cpu.AL, imm) @@ -5502,6 +5493,21 @@ def NOP(cpu, arg0=None): :param cpu: current CPU. :param arg0: this argument is ignored. """ + pass + + @instruction + def ENDBR64(cpu): + """ + The ENDBRANCH is a new instruction that is used to mark valid jump target + addresses of indirect calls and jumps in the program. This instruction + opcode is selected to be one that is a NOP on legacy machines such that + programs compiled with ENDBRANCH new instruction continue to function on + old machines without the CET enforcement. On processors that support CET + the ENDBRANCH is still a NOP and is primarily used as a marker instruction + by the processor pipeline to detect control flow violations. + :param cpu: current CPU. + """ + pass @instruction def MOVD(cpu, op0, op1): @@ -5656,6 +5662,14 @@ def FNSTCW(cpu, dest): """ cpu.write_int(dest.address(), cpu.FPCW, 16) + def sem_SYSCALL(cpu): + """ + Syscall semantics without @instruction for use in emulator + """ + cpu.RCX = cpu.RIP + cpu.R11 = cpu.RFLAGS + raise Syscall() + @instruction def SYSCALL(cpu): """ @@ -5670,9 +5684,7 @@ def SYSCALL(cpu): :param cpu: current CPU. """ - cpu.RCX = cpu.RIP - cpu.R11 = cpu.RFLAGS - raise Syscall() + cpu.sem_SYSCALL() @instruction def MOVLPD(cpu, dest, src): @@ -6439,6 +6451,9 @@ def get_arguments(self): self._arguments += 1 yield address + def get_return_reg(self): + return "EAX" + def write_result(self, result): self._cpu.EAX = result @@ -6469,6 +6484,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.RSP + word_bytes): yield address + def get_return_reg(self): + return "RAX" + def write_result(self, result): # XXX(yan): Can also return in rdx for wide values. self._cpu.RAX = result diff --git a/manticore/native/heap_tracking/heap_syscalls.py b/manticore/native/heap_tracking/heap_syscalls.py new file mode 100644 index 000000000..1729cfbcb --- /dev/null +++ b/manticore/native/heap_tracking/heap_syscalls.py @@ -0,0 +1,20 @@ +i386 = { + "brk": 45, + "mmap": 192, # sys_mmap_pgoff + "munmap": 91, +} +amd64 = { + "brk": 12, + "mmap": 9, + "munmap": 11, +} +armv7 = { + "brk": 45, + "mmap": 192, # sys_mmap2 + "munmap": 91, +} +aarch64 = { + "brk": 214, + "mmap": 222, + "munmap": 215, +} diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py new file mode 100644 index 000000000..5d45c1758 --- /dev/null +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -0,0 +1,360 @@ +from manticore.native.state import State +from manticore.native import Manticore +from manticore.native.heap_tracking.malloc_lib_data import MallocLibData + +import logging +from typing import Callable, Optional, Union + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +HOOK_BRK_INFO: bool +HOOK_MMAP_INFO: bool +HOOK_MALLOC_RETURN: bool +HOOK_FREE_RETURN: bool +HOOK_CALLOC_RETURN: bool +HOOK_REALLOC_RETURN: bool + +BRK_SYS_NUM: int +MMAP_SYS_NUM: int +MUNMAP_SYS_NUM: int + + +def read_arg(cpu, arg: Union[str, int]): + if isinstance(arg, int): + return cpu.read_int(arg) + else: + return cpu.read_register(arg) + + +def load_ret_addr(state: State) -> int: + """Loads the return address of a function from the stack + (Assuming the next instruction to be executed is the start of a function call) + """ + stack_location = state.cpu.read_register("STACK") + ret_addr = state.cpu.read_int(stack_location, state.cpu.address_bit_size) + return ret_addr + + +def add_ret_hook(func: str, state: State, ret_hook: Callable[[State], None]) -> None: + ret_addr = load_ret_addr(state) + state.add_hook(ret_addr, ret_hook, after=False) + + +def add_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + state.add_hook(MUNMAP_SYS_NUM, hook_munmap, after=False, syscall=True) + + +def remove_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + state.remove_hook(MUNMAP_SYS_NUM, hook_munmap, syscall=True) + + +def add_sys_allocing_hooks(state: State): + if HOOK_BRK_INFO: + state.add_hook(BRK_SYS_NUM, hook_brk, after=False, syscall=True) + + if HOOK_MMAP_INFO: + state.add_hook(MMAP_SYS_NUM, hook_mmap, after=False, syscall=True) + + +def remove_sys_allocing_hooks(state: State): + if HOOK_BRK_INFO: + state.remove_hook(BRK_SYS_NUM, hook_brk, syscall=True) + + if HOOK_MMAP_INFO: + state.remove_hook(MMAP_SYS_NUM, hook_mmap, syscall=True) + + +def hook_malloc_lib( + initial_state: State, + malloc: int = 0x0, + free: int = 0x0, + calloc: int = 0x0, + realloc: int = 0x0, + hook_brk_info: bool = True, + hook_mmap_info: bool = True, + hook_malloc_ret_info: bool = True, + hook_free_ret_info: bool = True, + hook_calloc_ret_info: bool = True, + hook_realloc_ret_info: bool = True, + workspace: Optional[str] = None, +): + """Function to add malloc hooks and do prep work + - TODO(Sonya): would like this to eventially be __init__() method for a class + once manticore hook callbacks have been debugged. + (from Eric) See: https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L163-L218 + & https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L274-L278 to work on debugging this + """ + # This features use on platforms besides amd64 is entirely untested + assert initial_state.platform.current.machine == "amd64", ( + "This feature's use on platforms besides amd64 is " "entirely untested." + ) + + initial_state.context["malloc_lib"] = MallocLibData(workspace) + + global HOOK_BRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN + HOOK_BRK_INFO = hook_brk_info + HOOK_MMAP_INFO = hook_mmap_info + HOOK_MALLOC_RETURN = hook_malloc_ret_info + HOOK_FREE_RETURN = hook_free_ret_info + HOOK_CALLOC_RETURN = hook_calloc_ret_info + HOOK_REALLOC_RETURN = hook_realloc_ret_info + + # Add requested malloc lib hooks + if malloc: + initial_state.add_hook(malloc, hook_malloc, after=False) + if free: + initial_state.add_hook(free, hook_free, after=False) + if calloc: + initial_state.add_hook(calloc, hook_calloc, after=False) + if realloc: + initial_state.add_hook(realloc, hook_realloc, after=False) + + # Import syscall numbers for current architecture + global BRK_SYS_NUM, MMAP_SYS_NUM, MUNMAP_SYS_NUM + from . import heap_syscalls + + table = getattr(heap_syscalls, initial_state.platform.current.machine) + BRK_SYS_NUM = table["brk"] + MMAP_SYS_NUM = table["mmap"] + MUNMAP_SYS_NUM = table["munmap"] + + +def hook_mmap_return(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + mmap() returns a pointer to the mapped area + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"mmap ret val: {hex(ret_val)}, state: {state.id}") + + state.context["malloc_lib"].process_mmap(ret_val, state.context["mmap_args"]) + del state.context["mmap_args"] + + state.remove_hook(state.cpu.read_register("PC"), hook_mmap_return) + + +def hook_mmap(state: State): + """Hook to process mmap information and add a function hook to the callsite of mmap (which should + be inside the free or another function inside of free which calls mmap), post execution of the + mmap call. + void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); + """ + args = [] + args_gen = state._platform._function_abi.get_arguments() + args.append(read_arg(state.cpu, next(args_gen))) # void *addr + args.append(read_arg(state.cpu, next(args_gen))) # size_t length + args.append(read_arg(state.cpu, next(args_gen))) # int prot + args.append(read_arg(state.cpu, next(args_gen))) # int flags + args.append(read_arg(state.cpu, next(args_gen))) # int fd + args.append(read_arg(state.cpu, next(args_gen))) # off_t offset + logger.info(f"Invoking mmap in malloc. Args {args}, state: {state.id}") + state.context["mmap_args"] = args + + add_ret_hook("mmap", state, hook_mmap_return) + + +def hook_brk_return(state: State): + """Hook to process brk return information and remove the hook to itself at the callsite to brk, + post execution of the brk function. + brk() returns 0 - on error, -1 is returned + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"brk ret val: {hex(ret_val)}, state: {state.id}") + + state.context["malloc_lib"].process_brk(ret_val, state.context["brk_increment"]) + del state.context["brk_increment"] + + state.remove_hook(state.cpu.read_register("PC"), hook_brk_return) + + +def hook_brk(state: State): + """Hook to process brk information and add a function hook to the callsite of brk (which should + be inside malloc or another function inside of malloc which calls brk), post execution of the + brk call. + Note (Sonya): Reminder that any call to sbrk with a val of 0 will never reach brk + Note (Sonya): See https://code.woboq.org/userspace/glibc/misc/sbrk.c.html for approximate + sbrk implementation + void *sbrk(intptr_t increment); + int brk(void *addr); + """ + # Get request size from arg1 + addr = read_arg(state.cpu, next(state._platform._function_abi.get_arguments())) + increment = addr - state.platform.brk + logger.info( + f"Invoking brk. Request address: {addr} for an increment of {increment}. Old brk: {state.platform.brk}, state: {state.id}" + ) + state.context["brk_increment"] = increment + + # Pull return address off the stack and add a hook for it + add_ret_hook("brk", state, hook_brk_return) + + +def hook_malloc_return(state: State): + """Hook to process malloc information and remove function hooks at the return address, + post execution of the malloc function. + malloc() returns a pointer to the allocated memory + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"malloc ret val: {hex(ret_val)}, state: {state.id}") + state.context["malloc_lib"].process_malloc(ret_val, state.context["malloc_size"]) + del state.context["malloc_size"] + + remove_sys_allocing_hooks(state) + + state.remove_hook(state.cpu.read_register("PC"), hook_malloc_return) + #logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_malloc(state: State): + """Hook to process malloc information and add function hooks at malloc function start, + pre-execution of the malloc function. + void *malloc(size_t size); + """ + # Get request size + malloc_size = read_arg(state.cpu, next(state._platform._function_abi.get_arguments())) + logger.info(f"Invoking malloc for size: {malloc_size}, state: {state.id}") + state.context["malloc_size"] = malloc_size + + add_sys_allocing_hooks(state) + + # Hook Return Address + if HOOK_MALLOC_RETURN: + add_ret_hook("malloc", state, hook_malloc_return) + + +def hook_munmap_return(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + munmap() returns 0, on failure -1 + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"munmap ret val: {hex(ret_val)}, state: {state.id}") + + state.remove_hook(state.cpu.read_register("PC"), hook_munmap_return) + + +def hook_munmap(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside the free or another function inside of free which calls munmap), post execution of the + munmap call. + int munmap(void *addr, size_t length); + """ + args_gen = state._platform._function_abi.get_arguments() + addr = read_arg(state.cpu, next(args_gen)) # void *addr + length = read_arg(state.cpu, next(args_gen)) # size_t length + logger.info(f"Invoking munmap in malloc. Args {addr}, {length}. State: {state.id}") + + state.context["malloc_lib"].process_munmap(addr, length) + + add_ret_hook("munmap", state, hook_munmap_return) + + +def hook_free_return(state: State): + """Hook to process free information and remove function hooks at the callsite, + post execution of the free function. + free() has no return value + """ + logger.info(f"Free has no return value, state: {state.id}") + + remove_sys_freeing_hooks(state) + state.remove_hook(state.cpu.read_register("PC"), hook_free_return) + #logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_free(state: State): + """Hook to process free information and add function hooks at free function start, + pre-execution of the free function. + void free(void *ptr); + """ + # Get free address + free_address = read_arg(state.cpu, next(state._platform._function_abi.get_arguments())) + logger.info(f"Attempting to free: {hex(free_address)}, state: {state.id}") + state.context["malloc_lib"].process_free(free_address) + + add_sys_freeing_hooks(state) + + # Hook free return address + if HOOK_FREE_RETURN: + add_ret_hook("free", state, hook_free_return) + + +def hook_calloc_return(state: State): + """Hook to process calloc information and remove function hooks at the callsite, + post execution of the calloc function. + calloc() returns a pointer to the allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"calloc ret val: {hex(ret_val)}, state: {state.id}") + state.context["malloc_lib"].process_calloc( + state.context["calloc_request"][0], state.context["calloc_request"][1], ret_val + ) + del state.context["calloc_request"] + + remove_sys_allocing_hooks(state) + + state.remove_hook(state.cpu.read_register("PC"), hook_calloc_return) + #logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_calloc(state: State): + """Hook to process calloc information and add function hooks at calloc function start, + pre-execution of the calloc function. + void *calloc(size_t nmemb, size_t size); + """ + args_gen = state._platform._function_abi.get_arguments() + nmemb = read_arg(state.cpu, next(args_gen)) + elem_size = read_arg(state.cpu, next(args_gen)) + logger.info(f"Invoking calloc for {nmemb} element(s) of size: {elem_size}, state: {state.id}") + state.context["calloc_request"] = (nmemb, elem_size) + + add_sys_allocing_hooks(state) + + # Hook calloc return address + if HOOK_CALLOC_RETURN: + add_ret_hook("calloc", state, hook_calloc_return) + + +def hook_realloc_return(state: State): + """Hook to process realloc information and remove function hooks at the callsite, + post execution of the realloc function. + realloc() returns a pointer to the newly allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"realloc ret val: {hex(ret_val)}, state: {state.id}") + state.context["malloc_lib"].process_realloc( + state.context["realloc_request"][0], ret_val, state.context["realloc_request"][1] + ) + del state.context["realloc_request"] + + remove_sys_allocing_hooks(state) + remove_sys_freeing_hooks(state) + + state.remove_hook(state.cpu.read_register("PC"), hook_realloc_return) + #logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_realloc(state: State): + """Hook to process realloc information and add function hooks at realloc function start, + pre-execution of the realloc function. + void *realloc(void *ptr, size_t size); + """ + args_gen = state._platform._function_abi.get_arguments() + ptr = read_arg(state.cpu, next(args_gen)) + new_size = read_arg(state.cpu, next(args_gen)) + logger.info(f"Attempting to realloc: {hex(ptr)} to a requested size of {new_size}, state: {state.id}") + state.context["realloc_request"] = (ptr, new_size) + + add_sys_allocing_hooks(state) + add_sys_freeing_hooks(state) + + # Hook realloc return address + if HOOK_REALLOC_RETURN: + add_ret_hook("realloc", state, hook_realloc_return) diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py new file mode 100644 index 000000000..1db79e38d --- /dev/null +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -0,0 +1,82 @@ +import json + +from dataclasses import dataclass, field +from intervaltree import Interval, IntervalTree +from typing import List, Dict, Tuple, Optional + +# Data Class to hold malloc_lib information +# - This is added to state 0 pre-manticore execution and will be saving state specific information as manticore +# forks and different program paths are found + + +@dataclass +class AllocationInformation: + """This class wraps information about an allocation""" + + addr: int + requested_size: int + is_freed: bool + allocation_location: Optional[int] = None + deallocation_location: Optional[int] = None + + +@dataclass +class MallocLibData: + """This class holds the malloc library data in a specific state (or on specific program path).""" + + workspace: Optional[str] + malloc_calls: List[Tuple[int, int]] = field(default_factory=list) + free_calls: List[int] = field(default_factory=list) + sbrk_chunks: List[Tuple[int, int]] = field(default_factory=list) + mmap_chunks: Dict[int, int] = field(default_factory=dict) + munmap_chunks: Dict[int, int] = field(default_factory=dict) + malloc_lib_tree: IntervalTree = field(default_factory=IntervalTree) + system_heap_tree: IntervalTree = field( + default_factory=IntervalTree + ) # TODO(sonya): this needs support + + def __str__(self): + # TODO(Sonya): This does not print address information in hexadecimal + return ( + f"malloc calls: {self.malloc_calls}\n" + f"free calls: {self.free_calls}\n" + f"sbrk chunks: {self.sbrk_chunks}\n" + f"mmap chunks: {self.mmap_chunks}\n" + ) + + # TODO(Sonya): Add some more methods here for helpful semantics of recording/retrieving information + # Might want to annotate all this with instruction address information + def process_malloc(self, ret_addr: int, size: int): + # should add malloc call information to list + self.malloc_calls.append((ret_addr, size)) + self.malloc_lib_tree[ret_addr : ret_addr + size] = AllocationInformation( + ret_addr, size, False + ) + + def process_free(self, free_addr: int): + # Maybe remove from malloc list and add to a used_and_free list + self.free_calls.append(free_addr) + for allocation in sorted(self.malloc_lib_tree[free_addr]): + allocation.data.is_freed = True + + def process_calloc(self, nmemb: int, elem_size: int, ret_addr: int): + # TODO(Sonya) + pass + + def process_realloc(self, old_addr: int, new_addr: int, size: int): + # TODO(Sonya) + pass + + def process_brk(self, ret_addr: int, size: int): + # check last chunk added to list + # if size + address == new starting address of chunk -> add new chunk size to last allocated chunk + # else -> add a new chunk to the list + self.sbrk_chunks.append((ret_addr, size)) + + def process_mmap(self, ret_addr: int, args: List): + # add new chunk to the mmap_list + self.mmap_chunks[ret_addr] = args + + def process_munmap(self, addr: int, length: int): + # remove from mmap list and add to the munmaped list + self.munmap_chunks[addr] = length diff --git a/manticore/native/manticore.py b/manticore/native/manticore.py index b9cdee2af..ab7c66914 100644 --- a/manticore/native/manticore.py +++ b/manticore/native/manticore.py @@ -107,6 +107,8 @@ def linux( entry_symbol=None, symbolic_files=None, concrete_start="", + symbolic_start="", + symbolic_start_constraints={}, pure_symbolic=False, stdin_size=None, **kwargs, @@ -141,6 +143,8 @@ def linux( entry_symbol, symbolic_files, concrete_start, + symbolic_start, + symbolic_start_constraints, pure_symbolic, stdin_size, ), @@ -461,6 +465,8 @@ def _make_linux( entry_symbol=None, symbolic_files=None, concrete_start="", + symbolic_start="", + symbolic_start_constraints={}, pure_symbolic=False, stdin_size=None, *args, @@ -496,6 +502,11 @@ def _make_linux( if concrete_start != "": logger.info("Starting with concrete input: %s", concrete_start) + if symbolic_start != "": + logger.info( + f"Starting with constrained input: {symbolic_start}, using constraints: {symbolic_start_constraints}" + ) + if pure_symbolic: logger.warning("[EXPERIMENTAL] Using purely symbolic memory.") @@ -511,6 +522,13 @@ def _make_linux( platform.setup_stack([program] + argv, env) platform.input.write(concrete_start) + platform.input.write( + initial_state.constrain_and_symbolicate_buffer( + symbolic_start, + label="CONSTRAINED_STDIN", + constraints=symbolic_start_constraints + ) + ) # set stdin input... platform.input.write(initial_state.symbolicate_buffer("+" * stdin_size, label="STDIN")) diff --git a/manticore/native/memory.py b/manticore/native/memory.py index f85d0d2e9..7b9b41977 100644 --- a/manticore/native/memory.py +++ b/manticore/native/memory.py @@ -26,12 +26,13 @@ logger = logging.getLogger(__name__) consts = config.get_group("native") -consts.add( - "fast_crash", - default=False, - description="If True, throws a memory safety error if ANY concretization of a pointer is" - " out of bounds. Otherwise, forks into valid and invalid memory access states.", -) +if "fast_crash" not in consts: + consts.add( + "fast_crash", + default=False, + description="If True, throws a memory safety error if ANY concretization of a pointer is" + " out of bounds. Otherwise, forks into valid and invalid memory access states.", + ) class MemoryException(Exception): @@ -750,7 +751,7 @@ def mmapFile(self, addr, size, perms, filename, offset=0): # Okay, ready to alloc self._add(m) - logger.debug("New file-memory map @%x size:%x", addr, size) + logger.debug(f"New file-memory map @{addr:#x} size:{size:#x}") self.cpu._publish("did_map_memory", addr, size, perms, filename, offset, addr) return addr @@ -798,7 +799,7 @@ def mmap(self, addr, size, perms, data_init=None, name=None): # Okay, ready to alloc self._add(m) - logger.debug("New memory map @%x size:%x", addr, size) + logger.debug(f"New memory map @{addr:#x} size:{size:#x}") self.cpu._publish("did_map_memory", addr, size, perms, None, None, addr) return addr @@ -938,7 +939,7 @@ def munmap(self, start, size): self._add(tail) self.cpu._publish("did_unmap_memory", start, size) - logger.debug(f"Unmap memory @{start:x} size:{size:x}") + logger.debug(f"Unmapped memory @{start:#x} size:{size:#x}") def mprotect(self, start, size, perms): assert size > 0 @@ -1204,7 +1205,7 @@ def read(self, address, size, force=False): except TooManySolutions as e: m, M = self._solver.minmax(self.constraints, address) logger.debug( - f"Got TooManySolutions on a symbolic read. Range [{m:x}, {M:x}]. Not crashing!" + f"Got TooManySolutions on a symbolic read. Range [{m:#x}, {M:#x}]. Not crashing!" ) # The force param shouldn't affect this, as this is checking for unmapped reads, not bad perms @@ -1435,7 +1436,7 @@ def mmapFile(self, addr, size, perms, filename, offset=0): for i in range(size): Memory.write(self, addr + i, chr(fdata[i]), force=True) - logger.debug("New file-memory map @%x size:%x", addr, size) + logger.debug("New file-memory map @{addr:#x} size:{size:#x}") self.cpu._publish("did_map_memory", addr, size, perms, filename, offset, addr) return addr @@ -1459,9 +1460,7 @@ def _import_concrete_memory(self, from_addr, to_addr): :return: """ logger.debug( - "Importing concrete memory: {:x} - {:x} ({} bytes)".format( - from_addr, to_addr, to_addr - from_addr - ) + f"Importing concrete memory: {from_addr:#x} - {to_addr:#x} ({to_addr - from_addr} bytes)" ) for m in self.maps: diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index ae1fa758d..2671f2e61 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -349,6 +349,9 @@ def tell(self) -> int: def seek(self, offset: int, whence: int = os.SEEK_SET) -> int: return self.file.seek(offset, whence) + def pread(self, count, offset): + return os.pread(self.fileno(), count, offset) + def write(self, buf): return self.file.write(buf) @@ -1757,6 +1760,33 @@ def sys_read(self, fd: int, buf: int, count: int) -> int: return len(data) + def sys_pread64(self, fd: int, buf: int, count: int, offset: int) -> int: + """ + read from a file descriptor at a given offset + """ + data: bytes = bytes() + if count != 0: + if buf not in self.current.memory: # or not self.current.memory.isValid(buf+count): + logger.info("sys_pread: buf points to invalid address. Returning -errno.EFAULT") + return -errno.EFAULT + + try: + # Read the data and put it in memory + target_file = self._get_fdlike(fd) + if isinstance(target_file, File): + data = target_file.pread(count, offset) + else: + logger.error(f"Unsupported pread on {type(target_file)} at fd {fd}") + except FdError as e: + logger.info( + f"sys_pread: Not valid file descriptor ({fd}). Returning -{errorcode(e.err)}" + ) + return -e.err + self.syscall_trace.append(("_pread", fd, data)) + self.current.write_bytes(buf, data) + + return len(data) + def sys_write(self, fd: int, buf, count) -> int: """write - send bytes through a file descriptor The write system call writes up to count bytes from the buffer pointed @@ -1906,7 +1936,6 @@ def sys_arch_prctl(self, code, addr): raise NotImplementedError( "Manticore supports only arch_prctl with code=ARCH_SET_FS (0x1002) for now" ) - self.current.FS = 0x63 self.current.set_descriptor(self.current.FS, addr, 0x4000, "rw") return 0 @@ -2554,7 +2583,9 @@ def sys_recvfrom( logger.warning("sys_recvfrom: Unimplemented non-NULL addrlen") if not self.current.memory.access_ok(slice(buf, buf + count), "w"): - logger.info("RECV: buf within invalid memory. Returning -errno.EFAULT") + logger.info( + f"RECV: buf access within invalid memory ({buf:#x}--{buf+count:#x}, size={count}). Returning -errno.EFAULT" + ) return -errno.EFAULT try: @@ -3300,6 +3331,21 @@ def _interp_total_size(interp): last = load_segs[-1] return last.header.p_vaddr + last.header.p_memsz + @staticmethod + def implemented_syscalls() -> Iterable[str]: + import inspect + + return ( + x[0].split("sys_", 1)[1] + for x in inspect.getmembers(Linux, predicate=inspect.isfunction) + if x[0].startswith("sys_") + ) + + @staticmethod + def print_implemented_syscalls() -> None: + for syscall in Linux.implemented_syscalls(): + print(syscall) + ############################################################################ # Symbolic versions follows diff --git a/manticore/platforms/linux_syscall_stubs.py b/manticore/platforms/linux_syscall_stubs.py index 30b08d7dd..8655aefd4 100644 --- a/manticore/platforms/linux_syscall_stubs.py +++ b/manticore/platforms/linux_syscall_stubs.py @@ -1,3 +1,5 @@ +from typing import Iterable + from ..platforms.platform import SyscallNotImplemented, unimplemented from .linux_syscalls import amd64 @@ -1187,3 +1189,18 @@ def sys_wait4(self, upid, stat_addr, options, ru) -> int: def sys_waitid(self, which, upid, infop, options, ru) -> int: """ AUTOGENERATED UNIMPLEMENTED STUB """ return self.simple_returns() + + @staticmethod + def unimplemented_syscalls() -> Iterable[str]: + import inspect + + return ( + x[0].split("sys_", 1)[1] + for x in inspect.getmembers(SyscallStubs, predicate=inspect.isfunction) + if x[0].startswith("sys_") + ) + + @staticmethod + def print_unimplemented_syscalls() -> None: + for syscall in SyscallStubs.unimplemented_syscalls(): + print(syscall) diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index aad3e20cd..a7154c3b6 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -1,12 +1,29 @@ import logging import time +from typing import Any, Tuple, Dict -from capstone import * +from capstone import CS_ARCH_ARM, CS_ARCH_X86, CS_MODE_32, CS_MODE_64 ###################################################################### # Abstract classes for capstone/unicorn based cpus # no emulator by default -from unicorn import * +from intervaltree import IntervalTree, Interval +from unicorn.unicorn_const import ( + UC_ARCH_X86, + UC_MODE_64, + UC_MODE_32, + UC_PROT_NONE, + UC_PROT_READ, + UC_PROT_WRITE, + UC_PROT_EXEC, + UC_HOOK_MEM_READ_UNMAPPED, + UC_HOOK_MEM_WRITE_UNMAPPED, + UC_HOOK_MEM_FETCH_UNMAPPED, + UC_HOOK_MEM_WRITE, + UC_HOOK_INTR, + UC_HOOK_INSN, +) +from unicorn import Uc, UcError from unicorn.arm_const import * from unicorn.x86_const import * @@ -16,7 +33,7 @@ logger = logging.getLogger(__name__) -def convert_permissions(m_perms): +def convert_permissions(m_perms: str): """ Converts a Manticore permission string into a Unicorn permission :param m_perms: Manticore perm string ('rwx') @@ -73,6 +90,9 @@ def __init__(self, cpu): self.flag_registers = {"CF", "PF", "AF", "ZF", "SF", "IF", "DF", "OF"} self.write_backs_disabled = False self._stop_at = None + # Holds key of range (addr, addr + size) and value of permissions + # Key doesn't include permissions because unmap doesn't care about permissions + self.already_mapped: IntervalTree = IntervalTree() cpu.subscribe("did_write_memory", self.write_back_memory) cpu.subscribe("did_write_register", self.write_back_register) @@ -99,35 +119,15 @@ def __init__(self, cpu): self.registers = set(self._cpu.canonical_registers) # The last 8 canonical registers of x86 are individual flags; replace with the eflags self.registers -= self.flag_registers - # TODO(eric_k): unicorn@1.0.2rc1 doesn't like writing to - # the FS register, and it will segfault or hang. - self.registers -= {"FS"} self.registers.add("EFLAGS") - for reg in self.registers: - val = self._cpu.read_register(reg) - - if reg in {"FS", "GS"}: - self.msr_write(reg, val) - continue - - if issymbolic(val): - from ..native.cpu.abstractcpu import ConcretizeRegister - - raise ConcretizeRegister( - self._cpu, reg, "Concretizing for emulation.", policy="ONE" - ) - logger.debug("Writing %s into %s", val, reg) - self._emu.reg_write(self._to_unicorn_id(reg), val) - - for m in cpu.memory.maps: - self.map_memory_callback(m.start, len(m), m.perms, m.name, 0, m.start) + self.load_state_from_manticore() def reset(self): self._emu = Uc(self._uc_arch, self._uc_mode) self._to_raise = None - def copy_memory(self, address, size): + def copy_memory(self, address: int, size: int): """ Copy the bytes from address to address+size into Unicorn Used primarily for copying memory maps @@ -135,52 +135,124 @@ def copy_memory(self, address, size): :param size: How many bytes to copy """ start_time = time.time() - map_bytes = self._cpu._raw_read(address, size) + map_bytes = self._cpu._raw_read(address, size, force=True) self._emu.mem_write(address, map_bytes) if time.time() - start_time > 3: logger.info( - f"Copying {hr_size(size)} map at {hex(address)} took {time.time() - start_time} seconds" + f"Copying {hr_size(size)} map at {address:#x} took {time.time() - start_time} seconds" ) - def map_memory_callback(self, address, size, perms, name, offset, result): + def load_state_from_manticore(self) -> None: + for reg in self.registers: + val = self._cpu.read_register(reg) + if issymbolic(val): + from ..native.cpu.abstractcpu import ConcretizeRegister + + raise ConcretizeRegister( + self._cpu, reg, "Concretizing for emulation.", policy="ONE" + ) + + if reg in {"FS", "GS"}: + if reg == "FS" and val in self._cpu._segments: + base, limit, perms = self._cpu._segments[val] + self.update_segment(val, base, limit, perms) + continue + logger.debug("Writing {val} into {reg}") + self.msr_write(reg, val) + continue + + logger.debug("Writing {val} into {reg}") + self._emu.reg_write(self._to_unicorn_id(reg), val) + + for m in self._cpu.memory.maps: + self.map_memory_callback(m.start, len(m), m.perms, m.name, 0, m.start) + + def map_memory_callback( + self, address: int, size: int, perms: str, name: str, offset: int, result: int + ) -> None: """ Catches did_map_memory and copies the mapping into Manticore """ - logger.info( + begin = address + end = address + size + perms_value = convert_permissions(perms) + # Check for exact match + # Overlap match + if ( + Interval(begin, end, perms_value) not in self.already_mapped + and not self.already_mapped.overlaps(begin, end) + and not self.already_mapped.envelop(begin, end) + ): + logger.info( + " ".join( + ( + "Mapping Memory @", + hex(address), + ":", + hex(address + size), + hr_size(size), + "-", + perms, + "-", + f"{name}:{offset:#x}" if name else "", + "->", + hex(result), + ) + ) + ) + self._emu.mem_map(begin, size, perms_value) + self.already_mapped[begin:end] = perms_value + logger.debug( " ".join( ( - "Mapping Memory @", - hex(address) if type(address) is int else "0x??", + "Copying Memory @", + hex(address), hr_size(size), "-", perms, "-", - f"{name}:{hex(offset) if name else ''}", + f"{name}:{offset:#x}" if name else "", "->", hex(result), ) ) ) - self._emu.mem_map(address, size, convert_permissions(perms)) self.copy_memory(address, size) + self.protect_memory_callback(address, size, perms) def unmap_memory_callback(self, start, size): """Unmap Unicorn maps when Manticore unmaps them""" - logger.info(f"Unmapping memory from {hex(start)} to {hex(start + size)}") - - mask = (1 << 12) - 1 - if (start & mask) != 0: - logger.error("Memory to be unmapped is not aligned to a page") - if (size & mask) != 0: - size = ((size >> 12) + 1) << 12 - logger.warning("Forcing unmap size to align to a page") - - self._emu.mem_unmap(start, size) + # Need this check because our memory events are leaky to internal implementation details + end = start + size + parent_map = self.already_mapped.overlap(start, end) + # Only unmap whole original maps + if ( + len(parent_map) == 1 + and list(parent_map)[0].begin == start + and list(parent_map)[0].end == end + ): + mask = (1 << 12) - 1 + if (start & mask) != 0: + logger.error("Memory to be unmapped is not aligned to a page") + + if (size & mask) != 0: + size = ((size >> 12) + 1) << 12 + logger.warning("Forcing unmap size to align to a page") + + logger.info(f"Unmapping memory from {start:#x} to {start+size:#x}") + + self._emu.mem_unmap(start, size) + self.already_mapped.remove_overlap(start, start + size) + else: + logger.debug( + f"Not unmapping because bounds ({start:#x} - {start+size:#x}) are enveloped in existing map:" + ) + logger.debug(f"\tParent map(s) {parent_map}") def protect_memory_callback(self, start, size, perms): """ Set memory protections in Unicorn correctly """ - logger.info(f"Changing permissions on {hex(start)}:{hex(start + size)} to {perms}") + logger.debug(f"Changing permissions on {start:#x}:{start+size:#x} to '{perms}'") self._emu.mem_protect(start, size, convert_permissions(perms)) def get_unicorn_pc(self): @@ -199,7 +271,7 @@ def _hook_syscall(self, uc, data): Unicorn hook that transfers control to Manticore so it can execute the syscall """ logger.debug( - f"Stopping emulation at {hex(uc.reg_read(self._to_unicorn_id('RIP')))} to perform syscall" + f"Stopping emulation at {uc.reg_read(self._to_unicorn_id('RIP')):#x} to perform syscall" ) self.sync_unicorn_to_manticore() from ..native.cpu.abstractcpu import Syscall @@ -207,28 +279,24 @@ def _hook_syscall(self, uc, data): self._to_raise = Syscall() uc.emu_stop() - def _hook_write_mem(self, uc, access, address, size, value, data): + def _hook_write_mem(self, uc, _access, address: int, size: int, value: int, _data) -> bool: """ Captures memory written by Unicorn """ self._mem_delta[address] = (value, size) return True - def _hook_unmapped(self, uc, access, address, size, value, data): + def _hook_unmapped(self, uc, access, address, size, value, _data) -> bool: """ We hit an unmapped region; map it into unicorn. """ try: self.sync_unicorn_to_manticore() - logger.warning(f"Encountered an operation on unmapped memory at {hex(address)}") + logger.warning(f"Encountered an operation on unmapped memory at {address:#x}") m = self._cpu.memory.map_containing(address) self.copy_memory(m.start, m.end - m.start) except MemoryException as e: - logger.error( - "Failed to map memory {}-{}, ({}): {}".format( - hex(address), hex(address + size), access, e - ) - ) + logger.error(f"Failed to map memory {address:#x}-{address+size:#x}, ({access}): {e}") self._to_raise = e self._should_try_again = False return False @@ -236,22 +304,22 @@ def _hook_unmapped(self, uc, access, address, size, value, data): self._should_try_again = True return False - def _interrupt(self, uc, number, data): + def _interrupt(self, uc, number: int, _data) -> bool: """ Handle software interrupt (SVC/INT) """ - logger.info("Caught interrupt: %s" % number) + logger.info(f"Caught interrupt: {number}") from ..native.cpu.abstractcpu import Interruption # prevent circular imports self._to_raise = Interruption(number) return True - def _to_unicorn_id(self, reg_name): + def _to_unicorn_id(self, reg_name: str) -> int: if self._cpu.arch == CS_ARCH_ARM: return globals()["UC_ARM_REG_" + reg_name] elif self._cpu.arch == CS_ARCH_X86: # TODO(yan): This needs to handle AF register - custom_mapping = {"PC": "RIP", "STACK": "RSP", "FRAME": "RBP"} + custom_mapping = {"PC": "RIP", "STACK": "RSP", "FRAME": "RBP", "FS_BASE": "FS_BASE"} try: return globals()["UC_X86_REG_" + custom_mapping.get(reg_name, reg_name)] except KeyError: @@ -262,7 +330,7 @@ def _to_unicorn_id(self, reg_name): # TODO(yan): raise a more appropriate exception raise TypeError - def emulate(self, instruction): + def emulate(self, instruction) -> None: """ Wrapper that runs the _step function in a loop while handling exceptions """ @@ -280,7 +348,7 @@ def emulate(self, instruction): if not self._should_try_again: break - def _step(self, instruction, chunksize=0): + def _step(self, instruction, chunksize: int = 0) -> None: """ Execute a chunk fo instructions starting from instruction :param instruction: Where to start @@ -291,7 +359,7 @@ def _step(self, instruction, chunksize=0): pc = self._cpu.PC m = self._cpu.memory.map_containing(pc) if self._stop_at: - logger.info(f"Emulating from {hex(pc)} to {hex(self._stop_at)}") + logger.info(f"Emulating from {pc:#x} to {self._stop_at:#x}") self._emu.emu_start(pc, m.end if not self._stop_at else self._stop_at, count=chunksize) except UcError: # We request re-execution by signaling error; if we we didn't set @@ -308,16 +376,21 @@ def _step(self, instruction, chunksize=0): logger.info("Reached emulation target, switching to Manticore mode") self.sync_unicorn_to_manticore() self._stop_at = None + self.write_backs_disabled = True # Raise the exception from a hook that Unicorn would have eaten if self._to_raise: from ..native.cpu.abstractcpu import Syscall - if type(self._to_raise) is not Syscall: - logger.info("Raising %s", self._to_raise) + if type(self._to_raise) is Syscall: + # NOTE: raises Syscall within sem_SYSCALL + # NOTE: Need to call syscall semantic function due to + # @instruction around SYSCALL + self._cpu.sem_SYSCALL() + logger.info(f"Raising {self._to_raise}") raise self._to_raise - logger.info(f"Exiting Unicorn Mode at {hex(self._cpu.PC)}") + logger.info(f"Exiting Unicorn Mode at {self._cpu.PC:#x}") return def sync_unicorn_to_manticore(self): @@ -377,25 +450,21 @@ def write_back_register(self, reg, val): if reg in self.flag_registers: self._emu.reg_write(self._to_unicorn_id("EFLAGS"), self._cpu.read_register("EFLAGS")) return - # TODO(eric_k): unicorn@1.0.2rc1 doesn't like writing to - # the FS register, and it will segfault or hang. - if reg in {"FS"}: - logger.warning(f"Skipping {reg} write. Unicorn unsupported register write.") - return self._emu.reg_write(self._to_unicorn_id(reg), val) def update_segment(self, selector, base, size, perms): """ Only useful for setting FS right now. """ - logger.info("Updating selector %s to 0x%02x (%s bytes) (%s)", selector, base, size, perms) - if selector == 99: - self.msr_write("FS", base) - else: - logger.error("No way to write segment: %d", selector) + logger.debug("Updating selector %s to 0x%02x (%s bytes) (%s)", selector, base, size, perms) + self.write_back_register("FS", selector) + self.write_back_register("FS_BASE", base) + self.msr_write("FS", base) def msr_write(self, reg, data): """ set the hidden descriptor-register fields to the given address. This enables referencing the fs segment on x86-64. + + https://wiki.osdev.org/SWAPGS """ magic = {"FS": 0xC0000100, "GS": 0xC0000101} return self._emu.msr_write(magic[reg], data) diff --git a/manticore/utils/log.py b/manticore/utils/log.py index c9a03ec75..f174071d4 100644 --- a/manticore/utils/log.py +++ b/manticore/utils/log.py @@ -2,16 +2,16 @@ import sys import io -from typing import List, Set, Tuple +from typing import List, Set, Tuple, Final, Optional manticore_verbosity = 0 DEFAULT_LOG_LEVEL = logging.WARNING -all_loggers: Set[str] = set() -default_factory = logging.getLogRecordFactory() logfmt = "%(asctime)s: [%(process)d] %(name)s:%(levelname)s %(message)s" -handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(logfmt) -handler.setFormatter(formatter) + + +def get_manticore_logger_names() -> List[str]: + return [name for name in logging.root.manager.loggerDict if name.startswith("manticore")] class CallbackStream(io.TextIOBase): @@ -23,15 +23,15 @@ def write(self, log_str): def register_log_callback(cb): - for name in all_loggers: + for name in get_manticore_logger_names(): logger = logging.getLogger(name) handler_internal = logging.StreamHandler(CallbackStream(cb)) if name.startswith("manticore"): handler_internal.setFormatter(formatter) - logger.addHandler(handler_internal) + # logger.addHandler(handler_internal) -class ContextFilter(logging.Filter): +class ManticoreContextFilter(logging.Filter): """ This is a filter which injects contextual information into the log. """ @@ -71,39 +71,21 @@ def colored_level_name(self, levelname: str) -> str: else: return self.colored_levelname_format.format(self.color_map[levelname], levelname) - def filter(self, record) -> bool: + def filter(self, record: logging.LogRecord) -> bool: + if not record.name.startswith("manticore"): + return True + record.name = self.summarized_name(record.name) record.levelname = self.colored_level_name(record.levelname) return True -ctxfilter = ContextFilter() - - -class CustomLogger(logging.Logger): - """ - Custom Logger class that can grab the correct verbosity level from this module - """ - - def __init__(self, name: str, level=DEFAULT_LOG_LEVEL, *args) -> None: - super().__init__(name, min(get_verbosity(name), level), *args) - all_loggers.add(name) - self.initialized = False - - if name.startswith("manticore"): - self.addHandler(handler) - self.addFilter(ctxfilter) - self.propagate = False - - -logging.setLoggerClass(CustomLogger) - - def disable_colors() -> None: - ContextFilter.colors_disabled = True + ManticoreContextFilter.colors_disabled = True def get_levels() -> List[List[Tuple[str, int]]]: + all_loggers = get_manticore_logger_names() return [ # 0 [(x, DEFAULT_LOG_LEVEL) for x in all_loggers], @@ -171,9 +153,36 @@ def set_verbosity(setting: int) -> None: """Set the global verbosity (0-5).""" global manticore_verbosity manticore_verbosity = min(max(setting, 0), len(get_levels()) - 1) - for logger_name in all_loggers: + for logger_name in get_manticore_logger_names(): logger = logging.getLogger(logger_name) # min because more verbosity == lower numbers # This means if you explicitly call setLevel somewhere else in the source, and it's *more* # verbose, it'll stay that way even if manticore_verbosity is 0. logger.setLevel(min(get_verbosity(logger_name), logger.getEffectiveLevel())) + + +def default_handler() -> logging.Handler: + """Return a default Manticore logger with a nice formatter and filter.""" + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + handler.addFilter(ManticoreContextFilter()) + return handler + + +def init_logging(handler: Optional[logging.Handler] = None) -> None: + """ + Initialize logging for Manticore, given a handler or by default use `default_logger()` + """ + logger = logging.getLogger("manticore") + logger.parent = None + + # Explicitly set the level so that we don't use root's. If root is at DEBUG, + # then _a lot_ of logs will be printed if the user forgets to set + # manticore's logger + logger.setLevel(DEFAULT_LOG_LEVEL) + + if handler is None: + handler = default_handler() + + # Finally attach to Manticore + logger.addHandler(handler) diff --git a/mypy.ini b/mypy.ini index cbe17228e..ffd2626cb 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,44 +1,11 @@ [mypy] python_version = 3.6 files = manticore, tests +ignore_missing_imports = True # Generated file [mypy-manticore.ethereum.parsetab] ignore_errors = True -# 3rd-party libraries with no typing information -[mypy-capstone.*] -ignore_missing_imports = True - -[mypy-crytic_compile.*] -ignore_missing_imports = True - -[mypy-elftools.*] -ignore_missing_imports = True - -[mypy-sha3.*] -ignore_missing_imports = True - -[mypy-pyevmasm.*] -ignore_missing_imports = True - -[mypy-unicorn.*] -ignore_missing_imports = True - -[mypy-keystone.*] -ignore_missing_imports = True - -[mypy-ply.*] -ignore_missing_imports = True - -[mypy-rlp.*] -ignore_missing_imports = True - -[mypy-prettytable.*] -ignore_missing_imports = True - -[mypy-wasm.*] -ignore_missing_imports = True - [mypy-manticore.core.state_pb2] ignore_errors = True diff --git a/setup.py b/setup.py index 350b66311..ecbf05e2b 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,11 @@ def rtd_dependent_deps(): # If you update native_deps please update the `REQUIREMENTS_TO_IMPORTS` dict in `utils/install_helper.py` # (we need to know how to import a given native dependency so we can check if native dependencies are installed) -native_deps = ["capstone==4.0.1", "pyelftools", "unicorn==1.0.2rc2"] +native_deps = [ + "capstone @ git+https://github.com/aquynh/capstone.git@1766485c0c32419e9a17d6ad31f9e218ef4f018f#subdirectory=bindings/python", + "pyelftools", + "unicorn==1.0.2", +] lint_deps = ["black==20.8b1", "mypy==0.790"] @@ -68,6 +72,7 @@ def rtd_dependent_deps(): "prettytable", "ply", "rlp", + "intervaltree", "crytic-compile>=0.1.8", "wasm", "dataclasses; python_version < '3.7'", diff --git a/tests/native/binaries/rusticorn b/tests/native/binaries/rusticorn new file mode 100755 index 000000000..ba82f9801 Binary files /dev/null and b/tests/native/binaries/rusticorn differ diff --git a/tests/native/test_cpu_manual.py b/tests/native/test_cpu_manual.py index 2e8296af8..24564ca8f 100644 --- a/tests/native/test_cpu_manual.py +++ b/tests/native/test_cpu_manual.py @@ -1300,6 +1300,170 @@ def test_symbolic_instruction(self): self.assertEqual(cpu.EIP, code + 1) + def test_AAA_0(self): + """ASCII Adjust AL after subtraction.""" + + cs = ConstraintSet() + mem = SMemory32(cs) + cpu = I386Cpu(mem) + + # alloc/map a little mem + code = mem.mmap(0x1000, 0x1000, "rwx") + stack = mem.mmap(0xF000, 0x1000, "rw") + + # 37 AAA + mem[code] = BitVecConstant(size=8, value=0x37) + cpu.EIP = code + AL = 10 + AH = 0x41 + AF = False + cpu.AL = AL + cpu.AH = AH + cpu.AF = False + cpu.execute() + + self.assertEqual(cpu.AL, 0) + self.assertEqual(cpu.AH, AH + 1) + self.assertEqual(cpu.AF, True) + self.assertEqual(cpu.CF, True) + + def test_AAA_1(self): + """ASCII Adjust AL after subtraction.""" + cs = ConstraintSet() + mem = SMemory32(cs) + cpu = I386Cpu(mem) + # alloc/map a little mem + code = mem.mmap(0x1000, 0x1000, "rwx") + stack = mem.mmap(0xF000, 0x1000, "rw") + + # 37 AAA + mem[code] = BitVecConstant(size=8, value=0x37) + cpu.EIP = code + AL = 18 + AH = 0x41 + AF = False + cpu.AL = AL + cpu.AH = AH + cpu.AF = False + cpu.execute() + + self.assertEqual(cpu.AL, AL & 0xF) + self.assertEqual(cpu.AF, False) + self.assertEqual(cpu.CF, False) + + def test_AAS_0(self): + """ASCII Adjust AL after subtraction.""" + + cs = ConstraintSet() + mem = SMemory32(cs) + cpu = I386Cpu(mem) + + # alloc/map a little mem + code = mem.mmap(0x1000, 0x1000, "rwx") + stack = mem.mmap(0xF000, 0x1000, "rw") + + # 3F AAS + mem[code] = BitVecConstant(size=8, value=0x3F) + cpu.EIP = code + AL = 10 + AH = 0x41 + AF = False + cpu.AL = AL + cpu.AH = AH + cpu.AF = False + cpu.execute() + + self.assertEqual(cpu.AL, (AL - 6) & 0xF) + self.assertEqual(cpu.AH, AH - 1) + self.assertEqual(cpu.AF, True) + self.assertEqual(cpu.CF, True) + + def test_AAS_1(self): + """ASCII Adjust AL after subtraction.""" + cs = ConstraintSet() + mem = SMemory32(cs) + cpu = I386Cpu(mem) + # alloc/map a little mem + code = mem.mmap(0x1000, 0x1000, "rwx") + stack = mem.mmap(0xF000, 0x1000, "rw") + + # 3F AAS + mem[code] = BitVecConstant(size=8, value=0x3F) + cpu.EIP = code + AL = 18 + AH = 0x41 + AF = False + cpu.AL = AL + cpu.AH = AH + cpu.AF = False + cpu.execute() + + self.assertEqual(cpu.AL, AL & 0xF) + self.assertEqual(cpu.AF, False) + self.assertEqual(cpu.CF, False) + + def test_DAA_0(self): + """Decimal Adjust AL after Addition.""" + + cs = ConstraintSet() + mem = SMemory32(cs) + cpu = I386Cpu(mem) + # alloc/map a little mem + code = mem.mmap(0x1000, 0x1000, "rwx") + stack = mem.mmap(0xF000, 0x1000, "rw") + + # 27 DAA + mem[code] = BitVecConstant(size=8, value=0x27) + cpu.EIP = code + + cpu.AL = 0xAE + cpu.BL = 0x35 + cpu.OF = True + cpu.SF = True + cpu.ZF = False + cpu.AF = False + cpu.PF = False + cpu.CF = False + + cpu.execute() + self.assertEqual(cpu.AL, 0x14) + self.assertEqual(cpu.BL, 0x35) + self.assertEqual(cpu.SF, False) + self.assertEqual(cpu.ZF, False) + self.assertEqual(cpu.AF, True) + self.assertEqual(cpu.PF, True) + self.assertEqual(cpu.CF, True) + + def test_DAS_0(self): + """Decimal Adjust AL after Subtraction.""" + + cs = ConstraintSet() + mem = SMemory32(cs) + cpu = I386Cpu(mem) + # alloc/map a little mem + code = mem.mmap(0x1000, 0x1000, "rwx") + stack = mem.mmap(0xF000, 0x1000, "rw") + + # 2F DAS + mem[code] = BitVecConstant(size=8, value=0x2F) + cpu.EIP = code + + cpu.AL = 0xAE + cpu.OF = True + cpu.SF = True + cpu.ZF = False + cpu.AF = False + cpu.PF = False + cpu.CF = False + + cpu.execute() + self.assertEqual(cpu.AL, 0x48) + self.assertEqual(cpu.SF, False) + self.assertEqual(cpu.ZF, False) + self.assertEqual(cpu.AF, True) + self.assertEqual(cpu.PF, True) + self.assertEqual(cpu.CF, True) + if __name__ == "__main__": unittest.main() diff --git a/tests/native/test_unicorn_concrete.py b/tests/native/test_unicorn_concrete.py index ec8971ec6..4f5fff9ef 100644 --- a/tests/native/test_unicorn_concrete.py +++ b/tests/native/test_unicorn_concrete.py @@ -1,7 +1,10 @@ import unittest import os +import io +import contextlib from manticore.native import Manticore +from manticore.native.state import State from manticore.core.plugin import Plugin @@ -45,6 +48,7 @@ def test_register_comparison(self): should_match = { "RAX", + "RCX", "RDX", "RBX", "RSP", @@ -97,12 +101,10 @@ def test_register_comparison(self): for reg in should_match: concrete_regs[reg] = getattr(st.platform.current, reg) - for reg in should_match: - self.assertEqual( - concrete_regs[reg], - normal_regs[reg], - f"Mismatch in {reg}: {concrete_regs[reg]} != {normal_regs[reg]}", - ) + concrete_regs_vals = {reg: val for reg, val in concrete_regs.items() if reg in should_match} + normal_regs_vals = {reg: val for reg, val in normal_regs.items() if reg in should_match} + self.maxDiff = None + self.assertDictEqual(concrete_regs_vals, normal_regs_vals) def test_integration_basic_stdout(self): self.m.run() @@ -117,3 +119,61 @@ def test_integration_basic_stdout(self): right = f.read().strip() self.assertEqual(left, right) + + +class ResumeUnicornPlugin(Plugin): + def will_run_callback(self, ready_states): + for state in ready_states: + state.cpu.emulate_until(UnicornResumeTest.MAIN) + + +class UnicornResumeTest(unittest.TestCase): + _multiprocess_can_split_ = True + MAIN = 0x402180 + PRE_LOOP = 0x4022EE + POST_LOOP = 0x402346 + DONE = 0x4024D3 + FAIL = 0x40247C + + def hook_main(self, state: State): + print("Reached main!!") + + def hook_pre_loop(self, state: State): + print("Resuming emulation") + state.cpu.emulate_until(self.POST_LOOP) + + def hook_ret_good(self, state: State): + print("We made it!") + + def hook_ret_fail(self, state: State): + self.assertTrue(False, "Target binary called `lose`!") + + def setUp(self): + dirname = os.path.dirname(__file__) + self.concrete_instance = Manticore(os.path.join(dirname, "binaries", "rusticorn")) + self.concrete_instance.register_plugin(ResumeUnicornPlugin()) + self.concrete_instance.add_hook(self.MAIN, callback=self.hook_main) + self.concrete_instance.add_hook(self.PRE_LOOP, callback=self.hook_pre_loop) + self.concrete_instance.add_hook(self.DONE, callback=self.hook_ret_good) + self.concrete_instance.add_hook(self.FAIL, callback=self.hook_ret_fail) + + def test_integration_resume(self): + f = io.StringIO() + with contextlib.redirect_stdout(f): + self.concrete_instance.run() + self.concrete_instance.finalize() + + output = f.getvalue() + print(output) + self.assertIn("Reached main!!", output) + self.assertIn("Resuming emulation", output) + self.assertIn("We made it!", output) + + path = self.concrete_instance.workspace + "/test_00000000.stdout" + with open(path) as stdoutf: + stdout = stdoutf.read() + self.assertIn( + "If we were running under Python, that would have taken a really long time!", stdout + ) + self.assertIn("You win!", stdout) + self.assertIn("8031989549026", stdout) diff --git a/tests/other/test_smtlibv2.py b/tests/other/test_smtlibv2.py index 2eba476f4..9ba50020b 100644 --- a/tests/other/test_smtlibv2.py +++ b/tests/other/test_smtlibv2.py @@ -366,6 +366,19 @@ def testBool4(self): cs.add(Operators.OR(bt, bt, False)) self.assertTrue(self.solver.check(cs)) + def testBool5(self): + cs = ConstraintSet() + bf = BoolConstant(value=False) + bt = BoolConstant(value=True) + x = cs.new_bool() + y = cs.new_bool() + self.assertRaises(Exception, bool, x == y) + cs.add(x == bf) + cs.add(y == bt) + cs.add(Operators.OR(True, x)) + cs.add(Operators.OR(y, y, False)) + self.assertTrue(self.solver.check(cs)) + def testBasicArray(self): cs = ConstraintSet() # make array of 32->8 bits