From 8a9856ada39a7345099eb0d68fc4db6986c88fe1 Mon Sep 17 00:00:00 2001 From: Peter Goodman Date: Sat, 13 Jan 2018 23:47:55 -0500 Subject: [PATCH] Klee maze example (#369) * In progress. Working on an example of using KLEE on a Maze, but with the maze program being compiled to x86, amd64, and aarch64. * Making lots of progress on getting lifting and runnning an aarch64 maze program on amd64, but using --explicit_args. The key thing I'm working through right now is a jump offset table, but where the offset is a block pc, rather than a table base. Also adding various bits of code here and there to making runnning with klee more directly doable, and working on a debugging facility to track down when the emulated program counter gets out of sync with the original program. * Fixed a subtle @PAGE and @PAGEOFF-related reference bug on AArch64. Partially disabled the special jump offset table handling I had in table.py, as it doesn't (yet) handle the shifted table values. However, I still have the code there, so that it can recognize that a basic block address is used as a possible offset, so that I can remove the block address as a reference, which permits a new heuristic on the C++ side to work. On the C++ side, when there's a jump instruction that isn't associated with a cross-reference flow, I try to auto-augment it with addition switch cases, targeting blocks with no predecessors (as present in the CFG). This seems to work reasonably well. * Improved the scripts and updated the READMEs. * Minor rephrase * Minor rephrase --- .gdbinit | 274 +++++++++-------------------- CMakeLists.txt | 3 + README.md | 5 +- examples/CMakeLists.txt | 18 ++ examples/Maze/.gitignore | 2 + examples/Maze/Maze.c | 157 +++++++++++++++++ examples/Maze/README.md | 156 ++++++++++++++++ examples/Maze/bin/maze.aarch64 | Bin 0 -> 14104 bytes examples/Maze/bin/maze.amd64 | Bin 0 -> 9080 bytes examples/Maze/bin/maze.x86 | Bin 0 -> 7744 bytes examples/Maze/cfg/maze.aarch64.cfg | Bin 0 -> 14862 bytes examples/Maze/cfg/maze.amd64.cfg | Bin 0 -> 12401 bytes examples/Maze/cfg/maze.x86.cfg | Bin 0 -> 12818 bytes examples/Maze/scripts/disass.sh | 92 ++++++++++ examples/Maze/scripts/lift.sh | 62 +++++++ mcsema/Arch/ABI.cpp | 114 +++++++----- mcsema/Arch/ABI.h | 2 + mcsema/BC/Callback.cpp | 49 ++++-- mcsema/BC/External.cpp | 6 +- mcsema/BC/Function.cpp | 113 ++++++++++-- mcsema/BC/Lift.cpp | 70 ++++++++ mcsema/BC/Optimize.cpp | 40 +++-- tools/mcsema_disass/ida/refs.py | 84 +++++++-- tools/mcsema_disass/ida/table.py | 92 +++++++++- tools/mcsema_lift/Lift.cpp | 32 +++- 25 files changed, 1074 insertions(+), 297 deletions(-) create mode 100644 examples/CMakeLists.txt create mode 100644 examples/Maze/.gitignore create mode 100644 examples/Maze/Maze.c create mode 100644 examples/Maze/README.md create mode 100755 examples/Maze/bin/maze.aarch64 create mode 100755 examples/Maze/bin/maze.amd64 create mode 100755 examples/Maze/bin/maze.x86 create mode 100644 examples/Maze/cfg/maze.aarch64.cfg create mode 100644 examples/Maze/cfg/maze.amd64.cfg create mode 100644 examples/Maze/cfg/maze.x86.cfg create mode 100755 examples/Maze/scripts/disass.sh create mode 100755 examples/Maze/scripts/lift.sh diff --git a/.gdbinit b/.gdbinit index c832f3562..8788a5553 100644 --- a/.gdbinit +++ b/.gdbinit @@ -6,128 +6,37 @@ define print-rip dont-repeat end -define print-reg-state-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf " emulated native\n" - printf "rip 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2408)), $rip - printf "rax 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2152)), $rax - printf "rbx 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2168)), $rbx - printf "rcx 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2184)), $rcx - printf "rdx 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2200)), $rdx - printf "rsi 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2216)), $rsi - printf "rdi 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2232)), $rdi - printf "rbp 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2264)), $rbp - printf "rsp 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2248)), $rsp - printf "r8 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2280)), $r8 - printf "r9 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2296)), $r9 - printf "r10 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2312)), $r10 - printf "r11 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2328)), $r11 - printf "r12 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2344)), $r12 - printf "r13 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2360)), $r13 - printf "r14 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2376)), $r14 - printf "r15 0x%016lx 0x%016lx\n", *((unsigned long long *)($rptr + 2392)), $r15 - dont-repeat -end - -define addr-of-xmm0-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm0) = 0x%016lx\n", $rptr + 16 - dont-repeat -end - -define addr-of-xmm1-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm1) = 0x%016lx\n", $rptr + 80 - dont-repeat -end - -define addr-of-xmm2-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm2) = 0x%016lx\n", $rptr + 144 - dont-repeat -end - -define addr-of-xmm3-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm3) = 0x%016lx\n", $rptr + 208 - dont-repeat -end - -define addr-of-xmm4-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm4) = 0x%016lx\n", $rptr + 272 - dont-repeat -end - -define addr-of-xmm5-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm5) = 0x%016lx\n", $rptr + 336 - dont-repeat -end - -define addr-of-xmm6-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm6) = 0x%016lx\n", $rptr + 400 - dont-repeat -end - -define addr-of-xmm7-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm7) = 0x%016lx\n", $rptr + 464 - dont-repeat -end - -define addr-of-xmm8-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm8) = 0x%016lx\n", $rptr + 528 - dont-repeat -end - -define addr-of-xmm9-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm9) = 0x%016lx\n", $rptr + 592 - dont-repeat -end - -define addr-of-xmm10-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm10) = 0x%016lx\n", $rptr + 656 - dont-repeat -end - -define addr-of-xmm11-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm11) = 0x%016lx\n", $rptr + 720 - dont-repeat -end - -define addr-of-xmm12-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm12) = 0x%016lx\n", $rptr + 784 - dont-repeat -end - -define addr-of-xmm13-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm13) = 0x%016lx\n", $rptr + 848 - dont-repeat -end - -define addr-of-xmm14-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm14) = 0x%016lx\n", $rptr + 912 - dont-repeat -end +set $__rax_offset = 2216 +set $__flags_offset = 2064 +set $__xmm0_offset = 16 -define addr-of-xmm15-64 +define print-reg-state-amd64 set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&(RegState::xmm15) = 0x%016lx\n", $rptr + 976 - dont-repeat -end - -define print-flags-64 - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - set $flptr = (char *) ($rptr + 0x810) + printf " emulated native\n" + set $__rax_ptr = $rptr + $__rax_offset + printf "rip 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 16 * 16)), $rip + printf "rax 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 0 * 16)), $rax + printf "rbx 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 1 * 16)), $rbx + printf "rcx 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 2 * 16)), $rcx + printf "rdx 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 3 * 16)), $rdx + printf "rsi 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 4 * 16)), $rsi + printf "rdi 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 5 * 16)), $rdi + printf "rsp 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 6 * 16)), $rsp + printf "rbp 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 7 * 16)), $rbp + printf "r8 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 8 * 16)), $r8 + printf "r9 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 9 * 16)), $r9 + printf "r10 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 10 * 16)), $r10 + printf "r11 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 11 * 16)), $r11 + printf "r12 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 12 * 16)), $r12 + printf "r13 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 13 * 16)), $r13 + printf "r14 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 14 * 16)), $r14 + printf "r15 0x%016lx 0x%016lx\n", *((unsigned long long *)($__rax_ptr + 15 * 16)), $r15 + dont-repeat +end + +define print-flags-amd64 + set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() + set $flptr = (char *) ($rptr + $__flags_offset) printf "eflags [" if $flptr[1] printf "CF " @@ -154,122 +63,105 @@ define print-flags-64 dont-repeat end -define print-reg-state-32 - set $rptr = ((unsigned (*)(void))__mcsema_debug_get_reg_state)() - printf " emulated native\n" - printf "eip 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2408)), $eip - printf "eax 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2152)), $eax - printf "ebx 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2168)), $ebx - printf "ecx 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2184)), $ecx - printf "edx 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2200)), $edx - printf "esi 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2216)), $esi - printf "edi 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2232)), $edi - printf "ebp 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2264)), $ebp - printf "esp 0x%08lx 0x%016lx\n", *((unsigned long long *)($rptr + 2248)), $esp - dont-repeat +define print-flags-x86 + print-flags-amd64 end - -define addr-of-rip +define print-reg-state-x86 set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rip = 0x%016lx\n", $rptr + 2408 - dont-repeat -end - -define addr-of-rax - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rax = 0x%016lx\n", $rptr + 2152 - dont-repeat -end - -define addr-of-rbx - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rbx = 0x%016lx\n", $rptr + 2168 - dont-repeat -end - -define addr-of-rcx - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rcx = 0x%016lx\n", $rptr + 2184 - dont-repeat -end - -define addr-of-rdx - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rdx = 0x%016lx\n", $rptr + 2200 + printf " emulated native\n" + set $__rax_ptr = $rptr + $__rax_offset + printf "eip 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 16 * 16)), (unsigned) $pc + printf "eax 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 0 * 16)), $eax + printf "ebx 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 1 * 16)), $ebx + printf "ecx 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 2 * 16)), $ecx + printf "edx 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 3 * 16)), $edx + printf "esi 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 4 * 16)), $esi + printf "edi 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 5 * 16)), $edi + printf "esp 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 6 * 16)), $esp + printf "ebp 0x%08x 0x%08x\n", *((unsigned *)($__rax_ptr + 7 * 16)), $ebp dont-repeat end -define addr-of-rsi - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rsi = 0x%016lx\n", $rptr + 2216 - dont-repeat -end +set $__x0_offset = 544 -define addr-of-rdi +define print-reg-state-aarch64 set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rdi = 0x%016lx\n", $rptr + 2232 + printf "\temulated\n" + set $__x0_ptr = $rptr + $__x0_offset + set $__i = 0 + while $__i < 31 + printf "x%d\t0x%016lx\t&x%d = 0x%lx\n", $__i, *((unsigned long long *)($__x0_ptr + $__i * 16)), $__i, $__x0_ptr + $__i * 16 + set $__i = $__i + 1 + end + + printf "sp\t0x%016lx\n", *((unsigned long long *)($__x0_ptr + $__i * 16)) + set $__i = $__i + 1 + printf "pc\t0x%016lx\t&pc = 0x%lx\n", *((unsigned long long *)($__x0_ptr + $__i * 16)), $__x0_ptr + $__i * 16 dont-repeat end -define addr-of-rbp - set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rbp = 0x%016lx\n", $rptr + 2264 - dont-repeat -end -define addr-of-rsp +define addr-of-rip set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&rsp = 0x%016lx\n", $rptr + 2248 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rip = 0x%016lx\n", $__rax_ptr + 16 * 16 dont-repeat end - -define addr-of-r8 +define addr-of-rax set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r8 = 0x%016lx\n", $rptr + 2280 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rax = 0x%016lx\n", $__rax_ptr + 0 * 16 dont-repeat end -define addr-of-r9 +define addr-of-rbx set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r9 = 0x%016lx\n", $rptr + 2296 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rbx = 0x%016lx\n", $__rax_ptr + 1 * 16 dont-repeat end -define addr-of-r10 +define addr-of-rcx set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r10 = 0x%016lx\n", $rptr + 2312 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rcx = 0x%016lx\n", $__rax_ptr + 2 * 16 dont-repeat end -define addr-of-r11 +define addr-of-rdx set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r11 = 0x%016lx\n", $rptr + 2328 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rdx = 0x%016lx\n", $__rax_ptr + 3 * 16 dont-repeat end -define addr-of-r12 +define addr-of-rsi set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r12 = 0x%016lx\n", $rptr + 2344 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rsi = 0x%016lx\n", $__rax_ptr + 4 * 16 dont-repeat end -define addr-of-r13 +define addr-of-rdi set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r13 = 0x%016lx\n", $rptr + 2360 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rdi = 0x%016lx\n", $__rax_ptr + 5 * 16 dont-repeat end -define addr-of-r14 +define addr-of-rsp set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r14 = 0x%016lx\n", $rptr + 2376 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rsp = 0x%016lx\n", $__rax_ptr + 6 * 16 dont-repeat end -define addr-of-r15 +define addr-of-rbp set $rptr = ((unsigned long long (*)(void))__mcsema_debug_get_reg_state)() - printf "&r15 = 0x%016lx\n", $rptr + 2392 + set $__rax_ptr = $rptr + $__rax_offset + printf "&rbp = 0x%016lx\n", $__rax_ptr + 7 * 16 dont-repeat end diff --git a/CMakeLists.txt b/CMakeLists.txt index 365c23835..fc136fb3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,3 +105,6 @@ install( install(CODE "execute_process(COMMAND python ${PROJECT_SOURCE_DIR}/tools/setup.py install -f --prefix=${CMAKE_INSTALL_PREFIX})") + +add_subdirectory(examples) + \ No newline at end of file diff --git a/README.md b/README.md index a11e6a84c..e10666efa 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Why would anyone translate binaries *back* to bitcode? * **Binary Patching And Modification**. Lifting to LLVM IR lets you cleanly modify the target program. You can run obfuscation or hardening passes, add features, remove features, rewrite features, or even fix that pesky typo, grammatical error, or insane logic. When done, your new creation can be recompiled to a new binary sporting all those changes. In the [Cyber Grand Challenge](https://blog.trailofbits.com/2015/07/15/how-we-fared-in-the-cyber-grand-challenge/), we were able to use McSema to translate challenge binaries to bitcode, insert memory safety checks, and then re-emit working binaries. -* **Symbolic Execution with KLEE**. [KLEE](https://klee.github.io/) operates on LLVM bitcode, usually generated by providing source to the LLVM toolchain. McSema can lift a binary to LLVM bitcode, [permitting KLEE to operate on previously unavailable targets](https://blog.trailofbits.com/2014/12/04/close-encounters-with-symbolic-execution-part-2/). +* **Symbolic Execution with KLEE**. [KLEE](https://klee.github.io/) operates on LLVM bitcode, usually generated by providing source to the LLVM toolchain. McSema can lift a binary to LLVM bitcode, [permitting KLEE to operate on previously unavailable targets](https://blog.trailofbits.com/2014/12/04/close-encounters-with-symbolic-execution-part-2/). See our [walkthrough](examples/Maze/README.md) showing how to run KLEE on a symbolic maze. * **Re-use existing LLVM-based tools**. KLEE is not the only tool that becomes available for use on bitcode. It is possible to run LLVM optimization passes and other LLVM-based tools like [libFuzzer](http://llvm.org/docs/LibFuzzer.html) on [lifted bitcode](docs/UsingLibFuzzer.md). @@ -71,8 +71,11 @@ sudo apt-get install \ git \ cmake \ python2.7 python-pip \ + wget \ build-essential \ gcc-multilib g++-multilib \ + libtinfo-dev \ + lsb-release \ realpath sudo pip install --upgrade pip diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 000000000..77fa7981c --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright (c) 2017 Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project(examples) +cmake_minimum_required (VERSION 3.2) + +add_executable(maze Maze/Maze.c) diff --git a/examples/Maze/.gitignore b/examples/Maze/.gitignore new file mode 100644 index 000000000..413cd2a0a --- /dev/null +++ b/examples/Maze/.gitignore @@ -0,0 +1,2 @@ +bc/* +klee-* \ No newline at end of file diff --git a/examples/Maze/Maze.c b/examples/Maze/Maze.c new file mode 100644 index 000000000..92808d54e --- /dev/null +++ b/examples/Maze/Maze.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2018 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * It's a maze! + * Use a,s,d,w to move "through" it. + */ + +#include +#include +#include +#include + +/* Dimensions of the Maze */ +enum { + kWidth = 11, + kHeight = 7 +}; + +/* Hard-coded maze */ +char maze[kHeight][kWidth] = { + {'+', '-', '+', '-', '-', '-', '+', '-', '-', '-', '+'}, + {'|', ' ', '|', ' ', ' ', ' ', ' ', ' ', '|', '#', '|'}, + {'|', ' ', '|', ' ', '-', '-', '+', ' ', '|', ' ', '|'}, + {'|', ' ', '|', ' ', ' ', ' ', '|', ' ', '|', ' ', '|'}, + {'|', ' ', '+', '-', '-', ' ', '|', ' ', '|', ' ', '|'}, + {'|', ' ', ' ', ' ', ' ', ' ', '|', ' ', ' ', ' ', '|'}, + {'+', '-', '-', '-', '-', '-', '+', '-', '-', '-', '+'}, +}; + +/** + * Draw the maze state in the screen! + */ +void draw(void) { + int i, j; + for (i = 0; i < kHeight; i++) { + for (j = 0; j < kWidth; j++) { + printf("%c", maze[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +enum { + kMaxNumPlayerMoves = 28 +}; + +/** + * The main function + */ +int main(int argc, char *argv[]) { + int x, y; /* Player position */ + int ox, oy; /* Old player position */ + int i = 0; /* Iteration number */ + + char program[kMaxNumPlayerMoves]; + + /* Initial position */ + x = 1; + y = 1; + maze[y][x] = 'X'; + + /* Print some info. */ + printf("Maze dimensions: %dx%d\n", kWidth, kHeight); + printf("Player position: %dx%d\n", x, y); + printf("Iteration no. %d\n", i); + printf("Program the player moves with a sequence of 'w', 's', 'a' and 'd'\n"); + printf("Try to reach the price(#)!\n"); + + /* Draw the maze */ + draw(); + + /* Read the directions 'program' to execute... */ + read(STDIN_FILENO, program, kMaxNumPlayerMoves); + + /* Iterate and run 'program'. */ + while (i < kMaxNumPlayerMoves) { + /* Save old player position */ + ox = x; + oy = y; + + /* Move player position depending on the actual command */ + switch (program[i]) { + case 'w': + y--; + break; + case 's': + y++; + break; + case 'a': + x--; + break; + case 'd': + x++; + break; + default: + printf("Wrong command, only w,s,a,d are accepted!)\n"); + printf("You lose!\n"); + exit(EXIT_FAILURE); + } + + /* If hit the price, You Win!! */ + if (maze[y][x] == '#') { + printf("You win!\n"); + printf("Your solution <%42s>\n", program); + exit(EXIT_SUCCESS); + } + + /* If something is wrong do not advance. */ + if (maze[y][x] != ' ' + && !((y == 2 && maze[y][x] == '|' && x > 0 && x < kWidth))) { + x = ox; + y = oy; + } + + /* Print new maze state and info... */ + printf("Player position: %dx%d\n", x, y); + printf("Iteration no. %d. Action: %c. %s\n", i, program[i], + ((ox == x && oy == y) ? "Blocked!" : "")); + + /* If crashed to a wall! Exit, you lose */ + if (ox == x && oy == y) { + printf("You lose\n"); + exit(EXIT_FAILURE); + } + + /* put the player on the maze... */ + maze[y][x] = 'X'; + + /* draw it */ + draw(); + + /* increment iteration */ + i++; + + /* me wait to human */ + sleep(1); + } + + /* You couldn't make it! You lose! */ + printf("You lose\n"); + return EXIT_FAILURE; +} diff --git a/examples/Maze/README.md b/examples/Maze/README.md new file mode 100644 index 000000000..905ba40c8 --- /dev/null +++ b/examples/Maze/README.md @@ -0,0 +1,156 @@ +# Solving a Maze with KLEE and McSema + +This walkthrough describes how to run KLEE on a simple Maze program. The instructions here have been tested on Ubuntu 16.04. Your mileage may vary if you are using another operating system. What is unique about this walkthrough is that it will show that KLEE runs equally well on lifted bitcode produced from the same program compiled to x86-64 (amd64) and AArch64 (64-bit ARMv8). + +The program, [Maze.c](Maze.c), can be found in this directory, along with the [binaries](bin) and [control-flow graph files](cfg) for the Maze program. + +## The Maze + +The Maze program presents its user with the following challenge: type in a sequence of `w`, `s`, `a`, or `d` characters to guide the `X` through the maze and reach the destination denoted by `#`. + +The characters `w`, `s`, `a`, and `d` are used in place of the keyboard's arrow keys, that is: + +``` + +---+ +---+ + | w | | ^ | ++---+---+---+ <=> +---+---+---+ +| a | s | d | | < | v | > | ++---+---+---+ +---+---+---+ +``` + +Initially, the program displays the maze, and asks the user to type in their directions for how to complete it. + +``` +Maze dimensions: 11x7 +Player position: 1x1 +Iteration no. 0 +Program the player moves with a sequence of 'w', 's', 'a' and 'd' +Try to reach the price(#)! ++-+---+---+ +|X| |#| +| | --+ | | +| | | | | +| +-- | | | +| | | ++-----+---+ +``` + +The winning directions for the maze are `ssssddddwwaawwddddssssddwwww`. The winning output looks like this: + +``` +Player position: 9x2 +Iteration no. 26. Action: w. ++-+---+---+ +|X|XXXXX|#| +|X|X--+X|X| +|X|XXX|X|X| +|X+--X|X|X| +|XXXXX|XXX| ++-----+---+ + +You win! +Your solution < ssssddddwwaawwddddssssddwwww> +``` + +Let's jump in and see if we KLEE, acting as the user, can win this maze! + +## Running KLEE + +### Step 1: Get dependencies + +The first step is to make sure that we have all the dependencies that we need. + +```bash +sudo apt-get update +sudo apt-get upgrade + +sudo apt-get install \ + git \ + cmake \ + python2.7 python-pip \ + wget \ + build-essential \ + gcc-multilib g++-multilib \ + libtinfo-dev \ + lsb-release \ + realpath \ + z3 + +sudo pip install --upgrade pip +sudo pip install 'protobuf==3.2.0' +``` + +Now that we have the dependencies we need, we should clone [Remill](https://github.com/trailofbits/remill). + +```bash +cd /data +git clone git@github.com:trailofbits/remill.git +``` + +### Step 2: Build Remill, McSema, and KLEE + +We have provided a convenient [script](https://github.com/trailofbits/remill/blob/master/scripts/build_klee.sh) for this walkthrough. The script will clone the latest version of McSema into the Remill checkout, as well as clone a version of KLEE that is compatible with Remill. You might already have McSema installed, and it might be a version that is not compatible with KLEE. That is not a problem. The script will ensure that the proper toolchain is built *within* the directory in which you invoke the script. + +```bash +mkdir /tmp/klee_ws +cd /tmp/klee_ws +``` + +Now to invoke our build script within `/tmp/klee_ws`, which is where Remill, McSema, and KLEE will be compiled. + +```bash +/data/remill/scripts/build_klee.sh +``` + +### Step 3: Lift the Maze binaries + +From within the KLEE workspace `/tmp/klee_ws`, run the [lifting script](scripts/lift.sh). This script invokes `mcsema-lift-3.9` on the provided [CFG files](cfg). If you have IDA Pro, then you can reproduce these steps manually by invoking the [disassembly script](scripts/disass.sh). + +```bash +/data/remill/tools/mcsema/examples/Maze/scripts/lift.sh +``` + +This script will likely print out some error messages. That is okay. McSema will always try to produce bitcode, and it will warn you when something seems erroneous in the CFG file. + +### Step 4: Run KLEE + +The build script from step 2 will have compiled KLEE into the `/tmp/klee_ws/klee-build/` directory. We can run the KLEE using the following commands. If things work, then there will be a lot of funny looking output. + +```bash +./klee-build/bin/klee -posix-runtime -libc=uclibc -allow-external-sym-calls /data/remill/tools/mcsema/examples/Maze/bc/maze.amd64.bc -sym-stdin 28 +``` + +```bash +./klee-build/bin/klee -posix-runtime -libc=uclibc -allow-external-sym-calls /data/remill/tools/mcsema/examples/Maze/bc/maze.aarch64.bc -sym-stdin 28 +``` + +### Step 5: Example ouput + +We know that the answer to the maze is `ssssddddwwaawwddddssssddwwww`, so we can check to see if KLEE found it by running `ktest-tool` on all of the `.ktest`-suffixed files in the KLEE's output file directory (`klee-last` is a symlink to the most recently produced output directory). + +```bash +for f in /data/remill/tools/mcsema/examples/Maze/bc/klee-last/*.ktest ; do + ./klee-build/bin/ktest-tool $f | grep ssssddddwwaawwddddssssddwwww &>/dev/null ; + if [[ $? -eq 0 ]] ; then + FOUND_TEST=$f + fi +done +./klee-build/bin/ktest-tool $FOUND_TEST +``` + +The output we get should be something like the following: + +``` +ktest file : '/data/remill/tools/mcsema/examples/Maze/bc/klee-last/test000301.ktest' +args : ['/data/remill/tools/mcsema/examples/Maze/bc/maze.aarch64.bc', '-sym-stdin', '28'] +num objects: 3 +object 0: name: 'stdin' +object 0: size: 28 +object 0: data: 'ssssddddwwaawwddddssssddwwww' +object 1: name: 'stdin-stat' +object 1: size: 144 +object 1: data: '\x02\x08\x00\x00\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\xa4\x81\x00\x00\xe8\x03\x00\x00\xe8\x03\x00\x00\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x00\x10\x00\x00\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01R\x06XZ\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\xbb\x1dXZ\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\xbb\x1dXZ\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01' +object 2: name: 'model_version' +object 2: size: 4 +object 2: data: '\x01\x00\x00\x00' +``` \ No newline at end of file diff --git a/examples/Maze/bin/maze.aarch64 b/examples/Maze/bin/maze.aarch64 new file mode 100755 index 0000000000000000000000000000000000000000..92a481f0950eb160683192c9d5178eec4fe35c7a GIT binary patch literal 14104 zcmeHOeQ;dWbw9i7FYMSzj=`=!&=VgK#P&)C+maFrPm(_{v5m0>HA&mm)9$mhc(p6- zuCRq%5QNFllBC|TLzqr7vvOt%c0w}qztPZ!>vdw%zvbI(2Zyt}J=uOHpMdq*%3ASprmJ~21Cz*dm{B)U|u zSc5guBC=^6t)>M)=Hszd3sYSaI%-S{U1a1!p!2vMG(gbS6+_o(j)aPbOu4_=Vw_SOb% zaLn6GXq)<+r79M{?)^nJ#{GTNQe_19Lyv7#8lPqS5GwJkPsBUx6S0knc(VWKM#o7< zySHu*XHwx!+JCmsvOD+Qr+VUfTc~QfeFf@_aru~E zxB|Yp0{*EA_>KzrH5G8ed`5Zxwvu}^o~7RYY=$C{OxB4Wjzqf;M-Dmh1f$VzCrwUg zDh-)*JefU2Y1fHSCgHlhP+&8xmCij z@TNxi%b5_uQ~aN5Kz;=1AgUaH3(wbcPa|DI^w+3!;dag;$#>lzEVPf+=y?p_C15!z zGQa#XaxNB%nfD-Y&PyqaJUHj3lqwHyUM1DE%7b$*NU=P)*Poj`xV%TK)8N7RJ(kks z!M*FA?ZG8?S?4Yfew7#jogTb!dOAcozZ(3HJp6Uw z$36V@;DdqL{yzoⅆt+p`Tob=NBP5^H#{78oz;Zzr;94i4MMvXCBh(r!zBOAIHPv zeQuG>ggQQ<=_Nyl5CfXrIR*3_e(2cCmIem?J#3YbjSt^xBr6@PGeX6Bsafkk(| z!Fh1Ww2Qye--R_ag74lH7iwsv8WeUfhK2&UQ#C^)!)t~{#zKKJbotSr;%$41e)kB| z*E)!v!aI(~Zhjj6up56=gWbGV+S0?2_2^)$jU$=4aWo<<6O*TL`g|GnRVB}4lI zM9oBWoW^d;jl-4$UnB5EVfNOzBiBY;(K9*;j5YFx#K=mJL0l*mapbGVU6Ig z@Z*%@-oj}Swi`A42+dog^A^P;N(z8eIBsz@ut$kSOYyL6V)pnY4_#VpFBfbz=gvZw=jrK>8B59ad6A`;}e7{T&{y@FA?|mV7`fFI5-aQaF zeG>GA#~X7ePvHq(^V$c|(CPO>!84Z;!w;~o+y@&cue6;&!k_yj-s^8YoVh$6x)$sE z$}?l3RiDl*pj-#muSbI`epNd&)Bf&(mecQjq4o3!(YDjmgFjY%^ExVPo%j1LfL|Y6 z_Bao_RxIA*CNuF=GV>Yhrr6P&VvA{C!WnSWR&Od3&!VA#@5;JqhY>583PV_a8|+J` zy3$UMmF;$|Uai}cddSUKN8;IT%ds+UU%#7-x>o9tRd=LrgH@N|52wy@k}<0;R;Ro{ z;L-zDHf7;AShQOkOUI+`%{Saqs|Effol16D(Ns?l^fp+jWMaTNvLUm<*$}gwv}-xh zsN0)$W3{(v`3F+{Rw9*gjXE)ZB%TzO`LvZuCHmEvZog^st(iM`Os$Dj^f1gAw+h5R z{;<^&H35nu9vO*;^vjC;yQIoIpYYTL`>2$E4|!$5&Xu_c%tc@>0&@|Vi@;n2<{~f` zfw>6GMPM!ha}k(}z+4185xBU}R$SgtX$qK%pYj!2$pcSAh?W-agLq&>VHwGKP>f$qlV+VL#;fT3 zGTRHOdWfoVZ=t0OzMN$J@HU3%lQdaYeg*Dnl(TaUT{7!Hv7Kv4)`wzzCG9iksKxl~ zeSuN5o8HFwNcV=c0UE8KeXUsfVLubFz<_OffUvDOW(&%Wxo{H^0R)5vBw}^ZXP8*9TnnJRo0)Y z>9Q|Ro~&SB;w|fl%3Lb z>=h&LUnk#G@}=vWjzj5s{to1qBj4oS!#s8aC4e6)F}KUekNV_W6uz>=&VMTZOV^*fD%fdP{TBPqT)G1Ogz4A6{^u&l ze+Rf#YMy=wyxje=HyIC>t`9ihFB|{gD?796yv)~C_<>t)zE%UTrj;b`*B;|rjgntl zxc73@;4dPcJYW9!|CREm^!unDcFM^&8r(ncZKhwjcXJLrm%9|M>)y`?fLmU`)O&z) z+|K&)WT1lli3;}rwgP^O!b`vJsJ=gwR+l8ZqS1PDKhoTm>`M06Z>bN5&Hts1(R6lG zI7K(c=*FXesG&FvMV&+R9Ti!b?y+Do#%bT*UiKXfP@#l5||+xACx?>f*PiBQZ< zyIt`Nj=Uq;o=7y2O1gZBFqVpRB~qPEA`;7{(wT_Uf0S?{-J5W6@EmSx*xXcJC~_#C zj7OYw+8Kzr$!vOn4&lh$jl}wUdIq54;UbXC7K!pfJ^XVMhcpS=<+1N} zWs!Y#1G5N0HSOR3BVyWRQ? z&@wvtwT#psjKV1FIX&?xI!nO>KElebwt;9d z?Dv>fB`bBNH{e-_NA%_IXt@t>q0L+5drh!SsCf0|?`{4a!R1Cx3ZH)cZD@HPdoI#{ zN6-es`%Q|_ZcS*up|ZbYSlWvU?=gwK{QZ8`02T)aWdRdcx?pf^w04waZ3-Qo}0#*P`y%|0*z!EBnVq_8})shw^u- zZq>d@o#|iV@#@n8o8f}NQN_%DRGF&KlidQXK&+(I- zi}a(pQvdU4V10?d?59i`ef{@J*1B>#?QjE$qn zG)n(6FPG5f?O*nfF5$u`7m?H4?0Ccu|Gnwemwl!Qqu(uxhKiohU;6ar?~#3$R`@fc zCq(pw|1~O%$@t}W^YfOLsN}zH#br)Z*#8Cr3>E#)`fw9YrNd3tc%*Ny=+DBM`@I)L rUGxQJUUFYBrW2Otnigxi7UXkyWc;%J@cw+c{;l;|??qpuzyJRNLzgV* literal 0 HcmV?d00001 diff --git a/examples/Maze/bin/maze.amd64 b/examples/Maze/bin/maze.amd64 new file mode 100755 index 0000000000000000000000000000000000000000..b6c61941c0f1d6441dbc69e9367c6e47fd3f2ca2 GIT binary patch literal 9080 zcmdT~eQaCR6~DHdkEX4YmH_F8=!1Txr7>wqv(Of>lem4Ul2Y0Pibi;OwO?XWe`NM^ zlXPwAFjRP1S+$|ju@A9t*qFwoi3L`Ot9DJ1Ul!>3dD>$EySLfMIowet52 zM_r^{ALw38FPqOr;7*<(zSV#kak9G;>@dzNr7EGgTa;I_e20EWojUXdG4h{V?-vOchvc ziGMC???+{zbY&0cI7Z8O%6QdO_3#8@=#NOs!NnBfS+zMhjvu1>9GvC^<~8Nux$A~A z2hW{1at=;&8U5_a!J%SVlH(th%fltVtdN-ea8fTxeFw7S_}TI^hDv7B4c{Y| zT<(Mv%0A4ToX7yJdIDvv`s6s5zkxDVc{0i6ucC}qoqU$d52B1!oP3hY2T{hVO`hO# z1ZAw!TDZJj#oIDJa)Q=v$0^A zlcgR9!_Rsvm$$-q;B9D#F1cf^r<-`XEmX4q+tp398QOw9pfbv7!fA<-zb+3cu zmoUFe{>optgVUF7;6P4H%coPf%a(KUYnSz_v)<;Bx60+L`;1Zk(Oa*9_*raW7Z|2O z5LoxQ5*t94PkBkB5-iYi{B{;AnffN!Wu1M32AT2h$%9~nY3RACF?SlHoZ2(yzQhxa z!|qG6=lrlcEqli0#JGHV!YW(FWlNI3&&wr8A^yteSmjH}#8}F4Ig^RIFInP;FI5~l zJ6ZugQcF0N1}t5AFn+X6lA`I;zi$rw3v+@nF(2RUd~6POXavPg$l`#-Ywr*NvXQ zQZHPB@_&b^2~1%lq*g<5SJV>FGh0{0){Xui?VkjD3>~BX0{}*&{2aaVN{mD5ioHtu zj`-oEHP&>+7KdW<$T8aRJ+5zuOaNGje2zHJ<97}uUn;ZB@h6QjlWV{S>(U||PyIps=5*b&Q ztF!(1s_)w*nEc1ZP{yf^v?~(si>N_K_i0kUVH*q| z(4x|SU-wC>6xF^I(?T9i3inED2G*>X)$dus*t8UGJ6Dh$g8XkJhhi-s{)0d%`g(5RPis z^VEPp#1j!I8VeJk!I}eM&wemL|K@r*B$r22+ynKDR%G46_jsvoOB2}O z$E5Y|nGE*m`@lPa{}H{u|&8Ff4F?hh#apn^^{{EKBE?m7lPb zRUwWm$s^!r9V!Satk2j=$VdH=G_0VgYp>d7uh~&KZ=n1ryM4))bvLiP0b$f>g=aOy zHGqK2UUkgcvY^x&fs&1UFTjT(e+q{L<=Z~Aq5|4J1kWy5X*A%h*HPM(6zAE!6qO_`3^F3uVWZJWhSPhxxC+~aE$1fpLjlmf1^K%I|RGi>*jzoQxYy^CVzK5_BoaHBlSwg@^4e zEt{pl^EDDHc~QZi7+ZWOe33 zJdLNK&Vsm&=>_pQ+3}p~%m$--q@aE&qw$`vU&acJ6C2DP8ejSPbK%6`%!PQ`#wxPo zIXBY=vxnw&K0crI6zE?YTfn9Z;&5qVT1U{Ija4#QC-VJQu?ew$~ zGhT3=n8QBF%pgVz*I^5+Zsz!L{k1YXt2ebjdPcn8gyVe&;^uX~mHVHGlh&&}9G~5u z2Zf(;lmBB}e|Eq81n@%jDpucTxSvJLx4;Rh8F~Sdh1vl%3i&_BO|P0Y9^U4DX7~Gq z(5LSi7vc3OPs|>lrBDG&f$^{m@I}zhc*;hi@6LLTb13iS0$j>N4B<~4pFOU+i}(q1 z+|DkFexdKA4tN!Ot50)DC#BywJLs z`AqYu1R8cBc%t=Wj%dSbjxT0-j7RF=Z`iK@j{VXj`i1WSA@6l~s@M{S*A=<+9+Zpv zw7$_^b}jfpeRF$m68`Z%1&QxLfo}#}Lcl~jxxNVmu?KLhuQ{GT5kJR@_*ud60_(gd zqDS>uZ?D4xH-tM{b}Q{|dpZ?`d9{ev=a0g%NYR6eClC&4cme4RD}90RJ~g0t^>8Gr zsIfr?rlnbLaCPFF`;3uxu!@Y_&;;1XJK%ZlMmZr#n4?d^9o zyV{jIwr$(v?o>Km&FyY1f#0EeqA?yl-L^WNba#q3rb=69hceSxogFRMRGsR+fW|Lv z^R>MZb$}_FSJhQ;pMRS>quC%XbY~DO32%4vky)3#=4>p-xbcP3@~(fOC!=A-2Zv6G zQ)#;cw7mY15{qhH7KG-`tvt3M$oryE;mf#B-f=+LErv)-K#fLWeBf2JIRM?98 z8dZ4>#qX^14Vfc46x7vy!1aj1eUyUBZY|Q!9HFqTIb6+cb-LOoq`pwhu`lL_-zofF z=0M4(Mt#iT9SVUzgX<9^=YST$%Yi&WfxL(oP|-l7`U5(1@HTKj($N= z!n`{jnoo>kpBJ2vWSAPm*|4DW{T@(*V$`7D12KekbPouQ4F@zfbKtcAOqSWJ!het8 zJ^+@m?DvQExAc1)oyW*C>gLi45X#_zWrP#$gX#A!N#Mk2{Uvz=By#O(zpM*~D@jps zvwbVz7vaoA<)5@Vx#md$Q2ZqvojF$uk?n*8o)O8O_TM7{m==nqz_IS;^W9y5;Te+b zX`lX-I9Ss66;wEx+wVR=@eD}zw4awmMXQA!#ZP*~_d`CODM_Y%f0MAU7xw1(1;DksqJ6WWmOV!t73C)dj`rxwZ2u@=*d`bs5}h}kqCmPI zFmjEuD2+S;Pp&{+SP$PyKLU9#l=wde<=TDu!Y?dpF_a1=Qa8r+8nQ0eiLjebL{C{SA$N0%=%{e z&!B@Lis_m?KS@_;{~_37SCc)R6Y2j1=>Gv|d{g-}4o;fNr{71@SScXVH>4Sv@V`Pv zZv5GErzl`A846Bz#M35w+Hdy=`=H5QGRc=gMqgBa+V8$9>UI00P;&rR@9yWv`fg!8jd`Bg&U O+cYt_wozA+=;nn=P7?L@TdbUITzN@r=}46TT1rEGt{`}T!R z6sP{zKfIZ<=l;$)_uO+o-h1cXJmhOwXS3OaRq}*g5Vh`eIIF?+t4&R{P{b9YTwEg- ziUNqJtKJJ6kb(xJe8?OiJE$4B`C*5XbR{r}I*28K34ykxeY1rivArlV_0G9MIDhJJ zlAcAQ=RoY2`aG}_=u4m@rGZJ*vk#IEdmVj~>VQd>uIaQ9Y%RnXA9pxOax6Le&CmmW zrqDmS7h9mujc*b%68&?0H-$rOH-&@C!l7ug%ae$CmYX`ZrGEX!7W|sypic$Fwk06W z9rfS&*1g|->4}(U~+7LWS5L*r5jW9csHj6s9zw`{SZB9*P?6BCh#^A`#ZKPMFdk?c;_T@rR<2 zbVOp&tV9*-8|v5As>?mqQ?*J&k|+P{a#xvku8|FxoNLBX5(>@X>U5$`S&_tf;z24= zE?^%PU@=`!JDju7u!tNBRzi+NollN|FC@p}Dde++C?j_YQBHn^5L;6d&h#g6`0!B2 zLLvH67c!a5{y`%@{WAzt!_Iq#*xgjgtZ<-ZX0a2$6lNAP0-e<}@1bdCF(c8bJ2NIR zBhop9GlLQ{GJQVr5s4Y0t`I*hF(cK>i3cQR#Cj$1UWpmGek*aO#GHxt0bp6GKD@v8 zuc;5tHZ^b2i?H_kkpm9r=56{P_T#yI9u3a559I#!Ei4<##YbO&`(;P)lbS5pS$dRC zF&*<-dQYb&N^~kRW2xby6CVu?^c&ZUqm)SdW0QHkzdQE#Yiwh0Od1P*EgPOPieGDhx&7@@hi0>@@j&B_Cos$($U(4Cvv19M$*~V;xwz0mJbGEVkR&+4;B{BEa z!PJDSDgBF!nG8Zp`Oeu=yU!KwISJ|Z!t_&?_$A1KnHR0Gk zYx``y604U!{2w#02}dS5nn@1p8z}vuC3R+!C-qi}_E{pAZV>`od*O z{|hcWaOv0noyqj4oTx56e(Yl2{`1D-s!ab&a0)|5#we>GwPX&yLYvfx4a;}iC&~9y z!>)VoSMM9b^ZXHBuksz7kbS&=RraOJ?=rs$js6EUB^Zim(L^W~P28?54t6aL7K)~@ zzgvqdov}p7fMQBsZ)kBpB}z2rfib_|P4QSq+#gX4T~j*Ec9GalEuriR8M@+E650bv zEgI02Si7=h*OCflNrFHAC5k^9RF(vnh~{{=V#E}D`~te^CLRiC<%^bH%dy@Wk3~C_ zKr9l0eT5Q>hP#zr6^ROeMNsj_HN_tYXq|=@ymqO$JC;~$&Ur|mDYb{G7aFLVwpO@ zqnKiTD4MV-2EmVjyRaF!3s`IqZV|Q~r)`1D;Wz-f0vW!E!UaEUC*00;Zr2@iXB&<# z@vYL8H!Lr^j`}sA6#7Vj3$NR`KX2`v{Jhxfa3d-pY6ZOw{hLgv>d@o*qWrw1sGVz1 z_7vC$^C0oQUQ25sHh|dI0qp7fz)gQ%x9c&x*Io3e!|N_dT zC9T;$>tTIu?d?i=OItE(B$XAOYER{|nxsTkd#Y+YmDQdVOHH}5yt1mMvZnI3%f-rj z3F^$-4)d3?6d1(7u=orrcGhD#N4l}*aa?1P|Bw;cOdMM)sfrf zjuYbG6y|O-_c^&;?mBayo7nt1^B81eb2nT2$S#mEq3{%F>baXyu+Pctf=5m4_bF^% zFv(Mw)bpvjRP17bd=@Q=d>^uHh2TE8OLKQY7pdSGim7GppDWQP7_ zV5>j2KR!eMb`Dn1=jj~Gcs>TM%Z`6u-qj)x1JAu@nrA-Juw=YKcz|1B--!A=^mj8b zuO*n@aXhs%>`y_?YXva8ze~ky=+AoIXul2ie7-n;yw|!9_|kloiV!gGJD6|MJ_>FX z1(ipDE&q)F$r<|9!2AxeO)BZq24 z)Ic~E)p&yzjHw;rSerkr28~!eq56|u0;hw{u!ci`=a%J_m7i~8jBI+nD!}@_O@D}wf5b#`4pZ+cf|0w zfg5p?>sAeyw_3bYc%m^w^LW?RFEjidS=13tdfJj9T%d=7!b7C{6T0vOyQA=DawBeP zc4~2+!>0iiy0{jmy;mtDrS}5sY2p{zeMyD{x(yAi~Ugm%nr zPr~j9>dbmjmHwDvYy&ZKq8)SL9#rN%)`FOOHNkET>a=5qV%a@`I%8!&d~=`jDFhji5PL36AAChE*oX~$g1bs4+AK+k?@$DFF@GIk$8&o!eR zb1DA6FrAfw$$mNSKLA<&n6qB^9Dj`Q!yG%AYheK@AhQZ>NasP8Kjt)RU>C8Bz?E$M z0yz6;e9RRN!;W>U&yf-UTF*Mm9|&x*P2@5LrAq#|3aDmmBIDj&e#7y|zW Y1UG>=28iUdk?Ze8r1{U~G_?Hv7s)m0uK)l5 literal 0 HcmV?d00001 diff --git a/examples/Maze/cfg/maze.aarch64.cfg b/examples/Maze/cfg/maze.aarch64.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f1fcdf67cf9ceb051fc661ae68daba04275b113c GIT binary patch literal 14862 zcmb7LYiwM{b-ufMZBHn#wOY&8dL5aUR4mD~MC;*)6toFtr+y`A9Z5~x=ErJHt|V3@ zS6)(nqh7m2KvZ-}*0jyW3b0bumT47$Kxu$(g?jA>E|IbT+o%T73I#1QEsO9612Z*^ zQc=(Mow;`}Nj)e6nYrhlnK^UjoMF|Ct=SiH_1SEp@!;+j8yv zoYQFNdQNdSg)0;8J0D$!aQg0Th9bn3xSy_QfMViiggsq&@q+=vQNnU8^N~pI)ZUWG zH03h~TJle2TQW@@`9gapd*r#~hu52)9ueq1Qv#8(0CC@|cHWg<(&i-LD#uwJC}T>B zq)U+GV}>NI#4S}fpjq5Zvsv{<=V|sbL-@}ZF76F_8PHxpQnRSuqk_bhRAxgT+Xl0t zpYXlgt$fD_Zx74Y)l2z6L_U(Z61Q}_m9HC>ZgtAa*F*T)d`Ld%LB0|s)raashGNE* zxD62w;5su$3BHdRuv%tUEEVyFKj@5ZyW#MWj>d!80`)OZrr`BOzRz);RjlnKEb*IC z6!^WUfo78HmAEn+$I@%; zr6I!Ow_7+&I1!8dHKjUD=*+$RGs3b$CvueVjKbMJ)Y=ih_MccgO1RH)lzGh7Mirj5 zuwkb#uQTI>Z(do0=g1UHunFzL*WjeB#jMbox!-Wq;z}yR)lXf2@5(PbMy6^n_HX=} z5@E2iP7>Oc6{&Tb3fX6qBQY}}3qSczROxml?kgC%FLC4|-eAR0t;%HDJF<<3GK~ih zWeyz47MkQ$+s!2U(BWQgqb+8NjcUr#UYbNlrXb%WV}HrlY%$Yp;VipF+|1}Z97hLu zR^j|ZuTw*F7Vgpkp7$YY)22vMB$^lsmVb{F$xwl$I#Ipa;8{!uiL2U8K4MszR^py; zoF~~uM5cE?yO8d?(9nJGa@}`p_at#c;x;(dY8I1At#q;&F;hpS%R@@UxB-bpdQbPK zX@$B!Z8S544?9j@`Zl`-bjkN`yRk9b(vo>Lmpzo(-$pIX+J4?w3q3~|%%iIM=Cer%zcE% z|L80K!nXZ{p1)R)5jy)`#^RLyKw)R(TZAVGtDm%nbAWIv>QoGSXCFrvBy|eaKa85< z8$#DB(aC*9Thmv+oS-(QxmK{^*+p>~ht5f|X zVPMM!Y^}9rr!1r`6AMdNU9|ccA}q#2`Z@U~^)oCrDLf~Kq6W%JZdKwsj?mA#S%qj8}v(+26|_`9{7So6ZK)r@Z?f=uM~W+sP|$4>NkE zpN#3tzBX=-k;A%0wWg#!hZ~eAXYRS?4lg2am#rPzlerI4SsC4gf$Pyj_+w}32tfI% zK2$$!NMx@db7^~nE|h+S8ytnl6h2^UOA7Bst&ZelroW#ek+`LMEax$ly45XqQj`ee zzX*97Sme?RNa`f2_n0$SJL2SlxFvsOj}VIIpv!U8)>6W>86&(U>@@7`O{eWJlUs&)A**Ljk@byVUv7TM3L%Fb z=@w(Y)`5Ru`oMwv?_gG)KtQ|NOo*~@jM1CsYE1oprR~_e<0@G%cupvB! z&PtJJbxcSK&oFXS@p>i8FmeN&;a4$or^pl-xk19f$PEz&MsAoeFmk6A`bMs-&^K~t z{8}|~BZPsG8@098$cTWxq#|d{uLQ4z=AM+VT?i{M$UZERWT#0O^f2W)2 zDfO-V!`>iaNsk|KGeme?&wks@Fk$gpT&gYRG#g!uuTy4=NH9B8h5^85t2)SjGIkaTa-jbghl8&poPe|lwCF=l}Y!sS~EiU?JLN%)r?kT^8J}|MW&V6Ckpuk zg>38IgE^>#iW{>{?yD${O;mA<`8{)%{9h0I2@Cw0t!8|w5M332W@UD7;rYG!-MMVz zLC8TN&S?`bJxnF;bgdRZYnGf9rWQ4S3gA?&h^XGeLboH58z`ospkrU_q- zStBz;_;Jk9qxmeMqjTCbbA%6v-``JXsgTR!SeZ8S1t~md5QY4*%q^42*YQ*pmAf5qzlL#g#JWs^!&IK-q z^taXwlB4FZxEVLu2}zEN;>?L`L#+0m+&4@(Oqg|?1mS6|9r2D5mI)mm^Tbtzw=G?e zRGg{PD0Qoi-N(v=$HRviIE3;&AgME`mVKUdy%P88#(IRkUd33Ck|{9OV=9S%m^n)r z80&Guz*wJC=o{+^g}$+#^lQ~vPZ0*jdfL`nV?AS`-REZst7jrli%!#Kj_~8~c!%{f zkLn9iGy0a$^-7kZpE0ms?XY*2Oo4vJ34_h-9ATiJ3Bo`>lL~$POeysBGws)^er5;* z{mj~0tDiZ-U^AOnsGFH*if{y7_X>-=lN`ahpo>tP`Bj{=-L}?x5L^$rWKik~G!_?nK%rHJ?laghh z3{Id(2rZbMdmc+Ji&VxGsKQujg#7JtR#K zE(mFd4@o29$-{DORkH9nezkpn04-f-+57$}@(26=AYriY4-x(cG;kT`L`)iaN`7qT zwD#b~cFKgFAKN)Y7{qo)2+zlyI?lgQZR?+Y#|VRn(^U7t!cKqK_;Ww`Nz~B! zze|a#5+_I)|EbC*d;`v#-)V;HBu=oNHA)gwfD;V2gsigphWKPlCEpFe=@FIy@( zj_a1X)l6&0VK;8BBEw3^P}q}iIRYo?-fbdFy_wM_6C#TrU11YC0!#R7%`Ev>K&HzM z*qjxE4sorSC;Xo)%<^uDpJ6F%RpN3B_5G=VU<;La&jKGW!!PLMBKlHkN97CHgkP|P zUxF#*|HOARx(Sc_fs7u)iI@vFq}KEi{^W}DvtN7gC)F{+U{aL`gGqIgaKT9uyNMXL z0U;^wEPKo(7I0iiCGIojFM5rPOzWc8gIC(j5XQp}PDhsr$KbZ-^o`Gv5!VO@RQV4)|)?bPoNdvv8h zSE8Y{W?ngpq9Na-BL_S>{q}W+;S(|8+}NsjvsE9;cFKf8Ew4MVTkv&XS#-;~K5 zXwA1FVN~eI$ihxdbW1lKhD6REKyDAhBED-~3UY*$eUcojk_{Ev$wArInkiN%UUfp_ zgbu9dJ!fhQ|#f>JrQ*HsF@|~ zLx(&4t)|l9`b(z4GyA3I%)*X;)6B7_cU~3iUvGCJiYDv(>T>;-nO8whNb~#sjSPZF z^W{q+i64oFB;CH~E$X0l5`NYgQ{{FM{%x2lY41%jBsxxl)gJ7MKTRbLQ;56-e&MDqe0@{?BL*C|Rw9-733CQq64)T~r+ap(Mn?#L_1y;*juL*>_>jUe!V52+Q8RUx z(5CR5+Lf71OY>8Wna1`boU6%in{(jUr9{rxwz{)%zj(2dRK}~Mj~OA9L}ukAxWrc=vnKRazGn>B zq>7}H+?^$7aNT5%(DUcnyw<+R5vn&uUOO1ShHsObI4eRHP4qL4QC_V?G#wuEg%b)k zD)|>tQ+`bv{C7lYT#37u3l6gxweB5093lL<@6U}AejKxTk1@jY>+GUBOZZn2zyerjdZiZZOyZnK$WoosP$G3babu8b{C?kRBP=oW`9 zS_X0o%youjof7vvn)(Oe0Um&}Pvr~D35s&B*G)3{kz6vvRQk)P((OvF^*Y8$^x?{? zTK*l5EoOj?mYeszaUV4WuXQP?pWlOoY@Sx5@c zvgYs%+X>=|Us)NwaVg@cpJAOW{K}fM9gRqhcC^~yS!N=MTy%riAQPN#k^Bg|P@^X6 zb5warT)RHOB13t}D|G3P$wa5PeBzg}>yVww6*om+pnrDKQF;G}!)Qa*Mg z)-o0_RxOq))+ZKaS;hn_4l4^Q2ulGv7%&YW&b}L!j$2w39|-QMln%kEgwlo0c|{Z> zbz_*Jm~%BUi)v)*V7_3Iyf5#_N*fFvh6L?E(H1{T+DqFSxd~J=zw4;{zAp7cxT%91 zIk;%ExIHd0Xlbe}Exo(MDl!*S|G9+#_;=&c$O5GqfD(pWZ*E`rtAP@Eh z*neEiL4TfD=?h|-VUNS|`tQS~LEIF?#Sr%jQIAV!kjwur zD&2?(SZ zaJt9MB*f!Nat8^)Dcm;lpi8+|I5r2gBWo#3k1n?2HM_bY3w@UK8 zxX)nmDqxHq-obj&3r_VGuY#U%a@Fs+zZW-M_`FXX9eIWh9ic^k?3bZKj6IUwyCm49 z{XWo(xDz5v#1bDu%m+F^Am^I5G6EThYwkmW^dHOGg&uGq#wnQ3GCzNe zX9>VSHzi5!K!=n&H1-HGE*>WY29L8{!@CGO$a>q0)DYwZentA#es=UCxWQ*R>tjq2 zaY7Uc^9b=G1c}0ZL6_ma-VgfP*^3x4#tEU{Yte&0*Aan7j9uo(s*+tlh?}DfmvC_N zM&;hwi<>HVmwXHDA#RA&$oj)w5c$SmHi%pZq9ZT?TO{oqoNTdfafZdg6-UoLp*Sh_ z$#D;7H5|}zD#KCiGqEc;bm2gSQxpzII0WJ3gX0d)I5^ns6Z?fD49+b$j^GS}g9lC< zcsG8zXD{FA)G?{>g!as5l!8Z3)dpp=3Uq?21E4b^H zUQgX3OuKI}xYD0obeTW>^XtqE{Ns^W#EJ49$oG3q!3BXYdv`WHcV|~0gE{Xo)A?R&{+V36_iS^=K`-mI=bk>2YirDT`TgF8XE$u} zHnihUc7vB~Yw|WUZLs}Fa@gz0dy;o;JgA)&nj3Q)*WFd447|bT$^R(e3g0F#-`4WH z_w1(jP1#LNUbc|)vW<_>+eh6OCEk+TK5tjrBwd%QOfjDc>PCmf*b8^4-`;eqW^8ML#)T{75Xav`$75< zGcMuOWMXdZE`HOKHA?Nb9muOalVGN>!6?bmu5MA_6Sg7!(}+P7E}0m+p@BV>EM-qC z^b$2gd9*ic$t@isKvEYzd(#MRFjLZ*RF_bF>3@;|g~)rN+`nE%k?E zqs0kJQ*nFbZI#b`mp$m4*FzTV|tVc zIqf>lX|tl%^lJsO*L6#>*W~dGg4tJ?G>@+_1ImGjLEUwX7_jNBW>71TJFMd^Xj?w4 z<+w;w=i(v_%TuPT<+vqQXK#tI<~B2;HN&X!|N943wW%r}CW=xi6ICuSZ;9-P)ZS7> zJtfMr-Gz#8tx^pnpp8(uutSnWkL>NXIHtX@7_x?nYpa$jJApy4JSHSSNzT-v7hSUH zV1)-wMM&x~0i8-)XJcfGBjszVtXdM#k?k}g6(KbYRz3h~sHs{qunCNJsMke literal 0 HcmV?d00001 diff --git a/examples/Maze/cfg/maze.amd64.cfg b/examples/Maze/cfg/maze.amd64.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3018eb00f5a8166dae7b73ac1790c4bd816c7029 GIT binary patch literal 12401 zcma)C3v`r4mhS#Lom~5srhWYrP@0!u#7qbt%#6%x5S*6P9X7(~?hLkV(nvH(vz=~H zk75*jL`0)F3@$pdzD7MbI_t=wAc=yYj*s9O-yS!*I5tFFd=8^X_I~$P|J?}@=yQ_$ ze^vM1s=8HmtLj$yd@YG9l2wV8#wjO7wz;YMrgkfmR1#ED5?lN!O>jkyq0PuFi!Jp& z>WjK6{NxV1ZC7;9J?$RO4$xna?TKC2&B&%#fvk*|`CMTS^S!|d6I^x=6C2-=$%OC7 zcqQK4DwoSuaWrcMH?-FHQuF4;r&L#;aYijmicpk<1eI{zHluDndIzf9o0!c#+E>-o z+LVbeNT(ViIWK*)jI-j@dkd405^tA}3uD2t^f8jXxzc_8{S`+@;qGYXz3rZwvIW^p z!~8^A)b9kByHmdVOW_`Z(35myl{L+U(e&lM}d_Aok11 zoI*E%gq4(Sow1kehRWG=3o@*}DULA8U|}ih7bM~#B}Glz1}Sf@;y_Ib(wpEaSv+Hp zn-#tbaHmD66?df&&(axtxfTIhjzmwa;RQh}1TCGhN6?)nuSd{biaLOTbPrHT?Aczf zGOM{yGu_Jipg|Zd0eW~q-Wr2Y;Anabs5Itn4`1M_7{*=X;_=b`U+tcn5)59Ee^MmM z*TQT8TBm3ubo*&Nkkzr3!B~0*z?-W&ux*|L7u#kpY$L{w>oD>uanF%*{*IC5iFJI; zC`jJ|VYY-BhG`!l_vRj_H?Nb5^qu4MCaj(}el1BK1eNgk*;C4DZ2CZi7X984!eC(` z3}+kPJm5Gap%aFIV&5>%3(y+L{#nS}}SKAb--rZuBVi zE3xX%ugjmPJu6D`SjW45K`^0-XgL^_?gJf3XLq%UW2AAJFFG5HMoQdGs3ohYg%8&xZz z>bQ8kF%`e4Idx&8Io_B_rQ70(>}BzJtTR4X<+q~p99AEo@9^z8v*Q~<`wV(R&^54< z=d9zkhnE-=ia!j}jS8Rr9{z7U-3;i>J@zeolB12&gFme)@<*cdHc=Y8ZWkk)?f|kf zQKy5~asqcSh`j=tSJP^cfD+GH9XmzMeMn@a^?p#$bm@yAJq%P**MG$O%o?Oe1;!R{ z;_=LS{wTb$pY%xu{sd%YcRCe7Xn;f&tDGPXr>( zgo<;SLJH&UX{5S;#gaZ+FW+o9rh6>nB#9O$h;1qZrZGmQgXVUVi1b3oo*2B`zR2k0OU^Z}7j2kNJXfUK!eY4Rfi zGBceff5QpfPkEED!f|?PhQrPiDC%Npx-R#8U~c`5P6vs1GEx}o>OE{*t81B%9c5nxt|l*^}x3*%ll3FQSF0$lsetldv*fUMX`1|j+bfVX_@bgz!-Cxo?pqoTC~1fLSYjx8)W zk=Bb~*Y4tu-mM*dCNEHlOjD}$UsCbuZ~`If7VdTGd)W5R3(mdy9oznJ)A0|%_N5Eb zO|6-EEEJ%Zz{J(ot;{QePLWw^QxA}p9pM=LGfv=^zs|(QH|Dp@^MDd(em{zCcjouw zg7WkG36p19ugJ{rr+^Cc`x(tN^Sj$1o!^@V8}E^l}@|i!?@ib_M6D8CCqhphC+C6(#`oS`8ZOoY^@`?Qzic!lK6(j0NC!uj zcSy$-qGyCXB@E4Qa*4FnDV2+-*#3@!BWT z2c66lP~us=+QuL0$q$Tol?C-SDSmdlntK<^03SM_}n%uH~)1M3V|6~vatO(C?Gx5h%exu3$w z#~3HE8cre{#}=UBl6b@XOXBkqP0hwxWAbwgg>VbC-ftk$an^4GS=l3;;*H;Um($g6 zEVfAI(O4zkuIJ(6aN5LZcpfgU45Q)PCSAkc!=&@(R{!A26Qo|Sl*E?J&{?t_kWV7& zNIwzzu2+j4>8t8Uxs>!dspc$v=}9#_Y(n&@a2Fiu=Yn&Op!6Sbq#^o7i*UT}cPi9V zT;)M4-}P>SNq9~OIg~l^Kj8P~?mEDn*e)zx&Jw&sMLsGP8B~#4k~tbw!tQK?OQ!Wl zGOZVtIvr`;nM~{L$UK>3J;f(NP)9;mv&?Ls0?Mcsz(7L*^vUz$Aj zmb!~uL%Q|#DKTf<&#)VI&bXfg;Y@h}H#|UJOJ>JaYZwJ-k7jmcS2K*zUO;w7OH1ApEnzCm;P(aRy08uX zff*d255eSFI!9ZzU6G6wKYapZ8E>*fv~{_Py-E8Du0nZYR&(O=WO`PrtqJ}_>hhUz z7wYn*;M_w8tjiuSaVP3R@Ewqregb_j@?ADcA8rh=5~YU{12%?5<)D&ICa?h*PV0qb#}1xpj>~yiHxD3@$1IXfaX^VP1wV%a zcD8n33Cb`3e>ZtL1;15frr`HLg(-Lq%J9U@6kKAE4%Qn6bLsGP!^q ztXS-1`ajq-poCYCg{;k{FOXYQ| z1tg%vnd$#9s{qynKkWs|&-8r&-b0<~i_u|4lkG;qxx){zZwSyb>GS;h)F~)`09y`Z zWy_uB6rXzK)I62MkR0FA4!LUcaBjs2dK9p<{FWf*Oa8(En^QJDh%KA($@DD@#*tbO z?oMOIeSlV9;Q&-@#*+yhuh5!=U}1o+MIOg0w=we3bwF+e$O3e|+xqMU z#r%jD58y`>m-)xg-G+Z_OBw4QX7nS#hScUkP3 z&kiUPr0>DUOT#{o82p-|YnK2mF=#jE1?dJLX3d={tV=c1jdM|eZc(&tA0wM?1F|wl zIu?A06S&17_Htw%L-^+25n_7kVI-NJ60Q&lDe9?}U~zhC6_8crDvHaw8=2Edh);>> zsox>zpgnb;C^tP78KR$7@w@=f93_=X%G!T+6vx9|6*f;e+~U51QSOh&FKlbm2_+`_ zvuMv22H8uTz$SW%iH+~bt!@Hf8woLohNqEaro<$A2FROh#RMN^HZz#3HA9XpF4#xU zK|~BuNt^Ipw#Dn=3cC+fVXaYwq+)(0c}IKXke5S|Pc7WlZh&7C494)gczImA<5X>PoDu<{XJsM-2h_EAJgaf_U^a> zoiWmM1}1XN5QB#cl{sEZQvZoctiI)kD#`xYUv#}!QLRr3D)<0N<{bgQ7s%+{Kko83 z@y^Lr>6HuoS{JVw54x!rSNokf@D2F&kGmHm*!lI3`_Do6_CffjLHOE1_=-U|JqTYs z0833vsunb7q@q2%V6SfXM9y<6H0UEd>;g%#4UHN)!O&_$YYjck(3qh-NnMpxbx|sl zFLUOPXeAdJCV8+>nP1kb-KV)`9!0A)3n!IIx%-gx(e#j}BkEPTU(`!@IIycc2%lG) z@VU{tPIGWps4T%*LDI)ix29v=uH1j!Ho`9`6T&f-CAblh^rNXq)3If))GaeIDyC2` zXGEfd47nc^%Xq6A6Pbi`#ifMT0gBx_~canFQ1&IpUj*8 z6i#qM-e@Q)MBg3Mauw@&dxoOqAwtDZ2wpVe;R*_ONfmq30Zvt9BFa|mY}qZ!r6uV# z<-ATAT!fBBhfAwYCA^7=(0Rq9AhL0O6 zmG$eR8M!D&c{7g@ZdM2fSy?=drPsz2F5NjBt4rn3?iGyn;{8Cxk=n`|Avf5=SW ze^-}rAUI?9jpoB)#?Qz4tc@djUo}pceU*4S&^H#h-hIc)Ew)@v%k^^(m&+1I$l=cT zkSxJHGj&Y1QtlAsiU3tdtzUIoiqO3_0`d4m)*8*s#MNH5_K+p6%mxrWUsQ^K_M!kE z>p9lU0p^Yw-9vN=A_L6A$Ng+MGx242woERr`}E4W*tk8~d%K-B;a=AWk1)bV$gP^( z4yX+g^gpD!^JtQI4hakh~7!mp_^4r1$dj{GNv&@$kzXJlDY+ z9e$XT-HF}MujbbMx^8aW`E~J4ody5$Wqy;CNnWN)Lk(ez8X<8<{LidD7dNEg3hNKo z>;XI23J%}B&DW5TJFFv&nEQ%)m;17?dw8IXI^AVqq$8Yo<3IZQD-93G(EKjfSk43g zx;^X(z!(aj5wd5N`xjEX>(q)52Y3&p)M4@-a0*4YNfek&2Qx7 zt2n>UD$4i#%>n~tm+;>#o-s+Lu4$q0t=6=VeT!#CD7vgNdXo-LtX zM2Xg0ZkN{OY<}7pZER{uwzhF-z3r#bagCRaYxKD^#YOl*uM z(#dF|p&_{-lWZJyyo>K1qs{!fW|Vv_Y-*K{bhORfM@FZPJE^AapL{O!a7FYFZdX;b zt^t<~-0%i&XPeJatHv#>N~iKW!TKQKt|VgaO6rdy+*?Ej-&@olNw`sn47yRMC&D#A z#NEb%h~~54z-T_F;~^5K4}6GJUru;W6mi}Y)pIll3He)I7Q}rl3l6-GWkI~%VL@X8 zX2c5^W_0WzFJS725grjliXIV+KGG-K{BS!ze`628;jwCWtsBYMYKPl-Us#lUluvis z;Ts30kMiwzCZv$~Lh>Nw`cA*ZPRMJ+?1u_z~RM4XleCW{f@9>_hE5qp(o2qWTwdy&pUt#bvf b#YWs+NQSkX#`G2%iJ>;6755Zm*+Tyh8kT{! literal 0 HcmV?d00001 diff --git a/examples/Maze/cfg/maze.x86.cfg b/examples/Maze/cfg/maze.x86.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2e7b9919369006746f8bf257e2775c2fda8ad66f GIT binary patch literal 12818 zcmb7K4S1B*m7Z@VlN@KhB*Q1_G|)N;P%y?UL?4SQ-AXmKb=AfQy8HC8$zM-gDCr(L)`^bHl92oPuLUbyYtQq zYD%Ng)|TrUq79v?Xj4mji*O%O?lp_^xYsNj!OhG`Y3Co(&J|KVBIT>NTuvXTmftO6 zEn`fZzw2Qw1#PjGc4n!jW5SaDc_x!7`vO@ZU)YMIH!;f?)2+^AtRopU{5IXv4T}s# ztSLNzio&Rnoqu>~H3;8RP_DqgVeM*Z+2YY!k+o1;&=HR{O6MMgfd8yD`7CSlS#0_l zSbSYa3t1pcj|21GP*xV?q4?N*ySRe80TBKPfS-VURoQp~NP@=%U!NY&d7o>HfVo^UM zVOlRpa?Hp@c+s+*t;USXpiXmyXtX9>Uq!AyT*0g4 z9<+zDv(h3UJbVFW_&-Kfb|<}dRzN-haI1(7Bp6Mj&suXw!mzSMx42-Q?bDY(| z;g2Wddq%nIG3Y-WYNC~SOiIsZg2q3nM< zg^B-+{o*AZZ0d-$#iNa>wziv`RlJsOV=WI^3IhtPE4cximVso}Sq-fK=1+gXd%di} zn4u45;K}FjarK;dGCHpznru$BUtfLQeAZA(_durbmMwF}%Yj--tE8f*>lk-hX7)i; zc+Cm>upr6Y%s#;tY|@`H2&3$rLnu2_6!_Ekqc_-c_^2Fvj(ZbKG&M!1Pd(?ni!Z*A zMMdgN5@0Lvb#2AY3DA>JWqnJ^Pe~aEwp9_R#^*UYuOrdme1$MZ=X3?O0$~4HFE=}Gm(xvM?BE4FqXDnbOO!o_tvfZ7uhAY^V0O1Ef#5z#l{VdjO5gdlnC>vG1m;cUT&4|N!Oa7Ne*)rpbPt+mK>%ORWye@BKr4aa zz-G$5Lg|euWwlUvwL0r8TG=bI0=NrU6E>|vrLVhI7uo%S^+<xtz!}N&kmxYDyNb~p0&p@0`0*VztG9Z#qsAO)ZJ;N2*orbdWRf00^x&WS8oy)bT z? zzrbhwiH!UlSX?7-R!saEo3;toTZ@&&o;oo3E>_Vll>N;PnjdePzZ1=uqWO+16RoLa zOQQXp8kwKp33s{|-8k9I^F4%>v^7VuX}btz&$wMVQaSww z$mnO>fJkrm{2MDSr{9Y}+V`={jz(jzvL{hf(AW{Xky(oA6|fX^)$YbZw`rfq>s|}e zUx4WzsG!$W*%Isl$JqE4*x29M8jsHt@xQ7#HdOd+l)psS(X3bi$B4T9pGBqj`$g`i z*ufPn)6XD`vNK?jEKgejFUtaH;W=5qnG_CAn>HH^E4_-g0rNM1U1N9kICo0y9;<7O z-4ySrOLVqi)iC!2`n7PU$BepWo|gu)Yi5sN*=_csVCTy=3)3sWys1)mNN@pQ1_t3j zg7_uE@VM6k*cLJFN0_#t(whvsf`8%)7Tm@ljIuM4P-a#E-oXl&^=)MZVD(}+TLG)b z85gEsgXT>JU743vW;cT{$}EU7_e%f^Vr7Ksc~p8+xqg|(UVxyqc-Kkl?i4F1=)H8J zPSPGYK_=~D(~E*-zM~(eSAZEG`41>-q!XZ5g@Eh`FGCUOIyj3lUWUr1Q~sKedM?nG z-Vm(2b{#X8(p$g^y%dh`Q!$G5m36`1>*ZlGrymZ4<)*ad{$ok(l9SVNU;G)VB9&YsQWEi(>L&`2xMJ zttHV~iD_*Jty$Qf<@A;|@PHoCn%EoRr5qfl{_lC}tLS}IzdrxSTfdyum&Rsw+%zju zACEOO&uWg>Nn;;qV=G2!>{wo7<&;r@mp?ht2gIfy;FvJSbC7P6Lu2lEZqBA|A!Wbr zOM#iFw^!Lb(yKdw_|r4;do>`=l}SR{v_drL?2e-!iNM>xdy9=B}{vk;!Jodvju(gAU0j0^H65Z47s z3&|Wx?AXGCNIJ{BbTWp_NrqXh_O zK>Sw{gG^H3-#7*S-zjF_JOclZVqa}dG+ZBV6pe3*Z3lB%1ccQ*r4n7AO%h2IE8rBL;ZDB@KkkGldk>i0ll^wVvVPAp!LokO3Siz;&L@FG%r*fC zOYFPJA{dCfEx?WBj-c|hL~fdr4d#TY(uv6&=J<@;C3zKY#b?`FnLoqJ7SV ziMF;_d*e6Za+cG3!ksS5Kj}scNe^#PA|R+Bn{FO?@8+zwXa(4p7#L90fc*Fn_ueE2rW! z<}vSu4CgTqoGN)toBmr>nndD5z)T|XVJT}$Fi_3TLz^ZN4-^n?0i7qj3Ihtn>*Z7= zNY>Td!2mraWn2RWXswiyj;*0}DEred|9?8Z5i;EIubpN({v0aIQZ1&qVKJWC7l3$Qzf^>!>U2U9X;cI1QAq zP6XNtc$5eTifItUf=qH^m7ssmM+0;is7ynMhEOibta#dt!={f=o;;he2#6 z0k#6OQiAj|5u6Jx6X`J^{`4LA>>&??o9Bb@&WbpV*3ca5U@n`U1eZBbifJ7%{@xy9 zONKW9IhWo+S3?Ro>>4s*SD4-d!8@4Cy<#*hCxhbeDY}bq2LL-chq%2U9T6ep5{6 z6EJamC(>Og`_m7NZp$C5yezz8x)&JRatO8*n@lG$X|V#H??Za={tP(13m*UZV?!V{KT%1mXL{D(0f<&p>z~gxO#rdJPy))**MY(!xA=x!VT1lAYxFB(2yA z+~u@M`el~W79jrg{LxLY6I^CF1?V@xtUm5ifObn68Seo7R?2$(N=D0>K?8UwF+st2 z1%;VO$8ZUW3mDwi;_8UI?5QY;M(4C8+V#pd8ZG0Sv3^_)^}%r%_LrGt+u+-^3#uDgz0nih%({as?45SAeHScphs}V{?hAX`&b^Y=Q6$n zi!`>h#oIeM5zzV1kx7m7Cp88H_kd5t8SN}*rg_ok8>h2r>xiR1sy~&GnRQA>WM=D6A>?A55xJOp zA|y>5tCIzh-(bPv`3Vd;TprqUSqHz?iE;Y_`L!PBHm;{ zV;N>dP{oX{d5|WG){i0FKRJ2#Pi4DKGVj$+n)dFuyI83o_7b zY|b@v<5zJ0xO*pOia9;3IpOZwp=tdl<-3Ek0X*IvoabS{7w^ORwAJ*9bF1uG&Mlf7 zlzge=L(zkS1)SN;q_)asgQO;>NpA5?Enm;_oosjQUQQ%%_P`o2MmpM{q)j9nl9R14vSk=6Zkpv5*^J9d;~Mxx zUFHQN_ieaQ!|JrsxG;k#Cz3RK-MDhe1CYIjq_L+B03G@BzEhSxa^GW9Dyx5lAJUxqj3{##X%8%vrWvyfeyER-Uq1z9G-mZ z+tF*b@P0nLMNV`-N3-+QlQ@s?yv@P)QT&5~YJTQ6?6sR5_l~mVzk(|}PoEe~xRT>> zc}%3QgY-!dpJMu^ho9(tE88jp8*y%-$6nUsr4@JaAP!kbHQ~?tZoS7^K-M_BpkOf{JQ1$r_fj7F!%U`+p{Il{ zF0n5u4JHfbd%jY3er3(X3eYd$Ul&K_oeVuQOK6eztHpkAVjHXRz{R_ke=jlp_g*es zWLuuEh5X)>=fYB7YC)l|-^(>Kw_ONskLTSZO7ua%6QxuqmDb9;$Bb4Lly+*ZQMLM*=TBv&e*Rs{HoBDIM?B7X z4BJ@+#!<&dXg|X>40)XDk`%jbN1GcvMCbVg&)+E+%6hs5S#SB0h>&=l48cPj5f_M~ zSWSIcj<8>L8_aDGz4a$6Y1p&IvU91QeNHC={?YzqS^KNuJxM2TAUwr#E<5wAGtW9p ze+wcDBKTV{VF8L<%l}J}>$J$c*`jcQ8}dd&UL*R*OLi=0S;Xo5ED)fXp|A!kgG?}jyI-flqEZ+a3C8Fg!*%>z>)th~-0Z@>t)m?b;loX-8Z#-NjD zSNo7tiHYHA8870lyq1nTUc_LAXT*3Bc;!jE6Lu)q2iW2Cz=*9E!hSS@yafLb7RgcC literal 0 HcmV?d00001 diff --git a/examples/Maze/scripts/disass.sh b/examples/Maze/scripts/disass.sh new file mode 100755 index 000000000..77cffa076 --- /dev/null +++ b/examples/Maze/scripts/disass.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# Copyright (c) 2017 Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPTS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +MAZE_DIR=$( cd "$( dirname "${SCRIPTS_DIR}" )" && pwd ) +DISASSEMBLER=/opt/ida-6.9/idal64 + +function Disassemble { + + printf "[+] Disassembling ${MAZE_DIR}/bin/maze.amd64\n" + mcsema-disass \ + --os linux \ + --arch amd64 \ + --disassembler "${DISASSEMBLER}" \ + --log_file /tmp/log \ + --entrypoint main \ + --output "${MAZE_DIR}/cfg/maze.amd64.cfg" \ + --binary "${MAZE_DIR}/bin/maze.amd64" + + if [[ $? -ne 0 ]] ; then + printf "[x] Error disassembling ${MAZE_DIR}/bin/maze.amd64\n" + return 1 + else + printf " i Saved CFG to ${MAZE_DIR}/cfg/maze.amd64.cfg\n" + fi + + printf "[+] Disassembling ${MAZE_DIR}/bin/maze.aarch64\n" + mcsema-disass \ + --os linux \ + --arch aarch64 \ + --disassembler "${DISASSEMBLER}" \ + --log_file /tmp/log \ + --entrypoint main \ + --output "${MAZE_DIR}/cfg/maze.aarch64.cfg" \ + --binary "${MAZE_DIR}/bin/maze.aarch64" + + if [[ $? -ne 0 ]] ; then + printf "[x] Error disassembling ${MAZE_DIR}/bin/maze.aarch64\n" + return 1 + else + printf " i Saved CFG to ${MAZE_DIR}/cfg/maze.aarch64.cfg\n" + fi + + return 0 +} + +function main { + while [[ $# -gt 0 ]] ; do + key="$1" + + case $key in + + # Change the default installation prefix. + --disassembler) + DISASSEMBLER=$(python -c "import os; import sys; sys.stdout.write(os.path.abspath('${2}'))") + printf "[+] New disassembler path is ${DISASSEMBLER}\n" + shift # past argument + ;; + + *) + # unknown option + printf "[x] Unknown option: ${key}\n" + return 1 + ;; + esac + + shift # past argument or value + done + + if [[ ! -f "${DISASSEMBLER}" ]] ; then + printf "[x] Disassembler ${DISASSEMBLER} does not exist. Please specify it manually using --disassembler.\n" + return 1 + fi + + Disassemble + return $? +} + +main $@ +exit $? diff --git a/examples/Maze/scripts/lift.sh b/examples/Maze/scripts/lift.sh new file mode 100755 index 000000000..078e335d0 --- /dev/null +++ b/examples/Maze/scripts/lift.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Copyright (c) 2017 Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPTS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +MAZE_DIR=$( cd "$( dirname "${SCRIPTS_DIR}" )" && pwd ) +KLEE_WS_DIR=$(pwd) + +mkdir -p "${MAZE_DIR}/bc" + +# Look for `libc.bc`. This file is automatically created by the `build_klee.sh` +# script in Remill, so that we can make the lifted bitcode more dependable via +# the `--library` option. +LIB_ARGS= +if [[ -f "${KLEE_WS_DIR}/libc.bc" ]] ; then + LIB_ARGS="--library ${KLEE_WS_DIR}/libc.bc" +else + printf "[x] WARNING: Could not find klee-uclibc library bitcode.\n" +fi + +printf "[+] Lifting ${MAZE_DIR}/cfg/maze.amd64.cfg\n" + +mcsema-lift-3.9 \ + --os linux \ + --arch amd64 \ + --cfg "${MAZE_DIR}/cfg/maze.amd64.cfg" \ + --output "${MAZE_DIR}/bc/maze.amd64.bc" \ + --explicit_args \ + ${LIB_ARGS} + +if [[ $? -ne 0 ]] ; then + printf "[x] Could not lift ${MAZE_DIR}/cfg/maze.amd64.cfg\n" +else + printf " i Saved bitcode to ${MAZE_DIR}/bc/maze.amd64.bc\n" +fi + +printf "[+] Lifting ${MAZE_DIR}/cfg/maze.aarch64.cfg\n" + +mcsema-lift-3.9 \ + --os linux \ + --arch aarch64 \ + --cfg "${MAZE_DIR}/cfg/maze.aarch64.cfg" \ + --output "${MAZE_DIR}/bc/maze.aarch64.bc" \ + --explicit_args \ + ${LIB_ARGS} + +if [[ $? -ne 0 ]] ; then + printf "[x] Could not lift ${MAZE_DIR}/cfg/maze.aarch64.cfg\n" +else + printf " i Saved bitcode to ${MAZE_DIR}/bc/maze.aarch64.bc\n" +fi diff --git a/mcsema/Arch/ABI.cpp b/mcsema/Arch/ABI.cpp index 47d7b2f4f..08f2b6721 100644 --- a/mcsema/Arch/ABI.cpp +++ b/mcsema/Arch/ABI.cpp @@ -421,6 +421,40 @@ const char *CallingConvention::GetVarForNextArgument(llvm::Type *val_type) { return nullptr; } +static llvm::Function *ReadIntFromMemFunc(uint64_t size_bytes) { + if (8 == size_bytes) { + return gModule->getFunction("__remill_read_memory_64"); + } else if (4 == size_bytes) { + return gModule->getFunction("__remill_read_memory_32"); + } else if (2 == size_bytes) { + return gModule->getFunction("__remill_read_memory_16"); + } else if (1 == size_bytes) { + return gModule->getFunction("__remill_read_memory_8"); + } else { + LOG(FATAL) + << "Cannot find function to read " << size_bytes + << "-byte integer from memory."; + return nullptr; + } +} + +static llvm::Function *WriteIntToMemFunc(uint64_t size_bytes) { + if (8 == size_bytes) { + return gModule->getFunction("__remill_write_memory_64"); + } else if (4 == size_bytes) { + return gModule->getFunction("__remill_write_memory_32"); + } else if (2 == size_bytes) { + return gModule->getFunction("__remill_write_memory_16"); + } else if (1 == size_bytes) { + return gModule->getFunction("__remill_write_memory_8"); + } else { + LOG(FATAL) + << "Cannot find function to read " << size_bytes + << "-byte integer from memory."; + return nullptr; + } +} + llvm::Value *CallingConvention::LoadNextArgument(llvm::BasicBlock *block, llvm::Type *goal_type) { if (!goal_type) { @@ -466,28 +500,8 @@ llvm::Value *CallingConvention::LoadNextArgument(llvm::BasicBlock *block, val = ir.CreateCall(gModule->getFunction("__remill_read_memory_f32"), args); } else if (goal_type->isIntegerTy()) { - llvm::Function *func = nullptr; - if (8 == alloc_size) { - func = gModule->getFunction("__remill_read_memory_64"); - - } else if (4 == alloc_size) { - func = gModule->getFunction("__remill_read_memory_32"); - - } else if (2 == alloc_size) { - func = gModule->getFunction("__remill_read_memory_16"); - - } else if (1 == alloc_size) { - func = gModule->getFunction("__remill_read_memory_8"); - - } else { - LOG(FATAL) - << "Can't handle reading an " << alloc_size << "-byte integer " - << "argument from the stack (base type: " - << remill::LLVMThingToString(goal_type) << ")"; - return nullptr; - } - - val = ir.CreateCall(func, args); + auto read_mem = ReadIntFromMemFunc(alloc_size); + val = ir.CreateCall(read_mem, args); if (dl.getTypeSizeInBits(goal_type) < dl.getTypeAllocSizeInBits(goal_type)) { val = ir.CreateTrunc(val, goal_type); @@ -552,11 +566,15 @@ void CallingConvention::StoreReturnValue(llvm::BasicBlock *block, if (size < gArch->address_size) { val_type = gWordType; ret_val = ir.CreateZExt(ret_val, val_type); - } else { - CHECK(size <= gArch->address_size) - << "Cannot store value of type " + + } else if (size > gArch->address_size) { + LOG(ERROR) + << "Truncating value of type " << remill::LLVMThingToString(val_type) - << " into variable " << val_var; + << " to store it into variable " << val_var + << " of type " << remill::LLVMThingToString(gWordType); + ret_val = ir.CreateTrunc(ret_val, gWordType); + val_type = gWordType; } // Storing a `float` into an x87 register, convert it to a `double`. @@ -631,18 +649,7 @@ void CallingConvention::StoreArguments( func = gModule->getFunction("__remill_write_memory_f32"); } else if (arg_type->isIntegerTy()) { - if (8 == alloc_size) { - func = gModule->getFunction("__remill_write_memory_64"); - - } else if (4 == alloc_size) { - func = gModule->getFunction("__remill_write_memory_32"); - - } else if (2 == alloc_size) { - func = gModule->getFunction("__remill_write_memory_16"); - - } else if (1 == alloc_size) { - func = gModule->getFunction("__remill_write_memory_8"); - } + func = WriteIntToMemFunc(alloc_size); if (dl.getTypeSizeInBits(arg_type) < dl.getTypeAllocSizeInBits(arg_type)) { @@ -730,15 +737,24 @@ void CallingConvention::AllocateReturnAddress(llvm::BasicBlock *block) { void CallingConvention::FreeReturnAddress(llvm::BasicBlock *block) { if (gArch->IsAArch64()) { - return; // Return address is passed through the link pointer. + auto x30 = remill::FindVarInFunction(block, "X30"); + llvm::IRBuilder<> ir(block); + auto ret_addr = ir.CreateLoad(ir.CreateLoad(x30)); + remill::StoreProgramCounter(block, ret_addr); // The stack grows down on x86/amd64. } else if (gArch->IsX86() || gArch->IsAMD64()) { llvm::IRBuilder<> ir(block); auto addr_size = gArch->address_size / 8; auto addr_size_bytes = llvm::ConstantInt::get(gWordType, addr_size); + auto sp = LoadStackPointer(block); + auto read_ret_addr = ReadIntFromMemFunc(addr_size); + llvm::Value *read_ret_addr_args[] = {remill::LoadMemoryPointer(block), sp}; + auto ret_addr = ir.CreateCall(read_ret_addr, read_ret_addr_args); + remill::StoreProgramCounter(block, ret_addr); + StoreStackPointer( - block, ir.CreateAdd(LoadStackPointer(block), addr_size_bytes)); + block, ir.CreateAdd(sp, addr_size_bytes)); } else { LOG(FATAL) @@ -771,9 +787,25 @@ llvm::Value *CallingConvention::LoadStackPointer(llvm::BasicBlock *block) { void CallingConvention::StoreStackPointer(llvm::BasicBlock *block, llvm::Value *new_val) { llvm::IRBuilder<> ir(block); + auto val_type = new_val->getType(); + if (val_type->isPointerTy()) { + new_val = ir.CreatePtrToInt(new_val, gWordType); + } + ir.CreateStore( + new_val, + ir.CreateLoad(remill::FindVarInFunction(block, StackPointerVarName()))); +} + +void CallingConvention::StoreThreadPointer(llvm::BasicBlock *block, + llvm::Value *new_val) { + llvm::IRBuilder<> ir(block); + auto val_type = new_val->getType(); + if (val_type->isPointerTy()) { + new_val = ir.CreatePtrToInt(new_val, gWordType); + } ir.CreateStore( new_val, - ir.CreateLoad(remill::FindVarInFunction(block, sp_name))); + ir.CreateLoad(remill::FindVarInFunction(block, ThreadPointerVarName()))); } // Return the address of the base of the TLS data. diff --git a/mcsema/Arch/ABI.h b/mcsema/Arch/ABI.h index 03b5318de..1cbe8e91f 100644 --- a/mcsema/Arch/ABI.h +++ b/mcsema/Arch/ABI.h @@ -59,6 +59,8 @@ class CallingConvention { return sp_name; } + void StoreThreadPointer(llvm::BasicBlock *block, llvm::Value *new_val); + const char *ThreadPointerVarName(void) const { return tp_name; } diff --git a/mcsema/BC/Callback.cpp b/mcsema/BC/Callback.cpp index 76f68c7b3..1b409171b 100644 --- a/mcsema/BC/Callback.cpp +++ b/mcsema/BC/Callback.cpp @@ -223,6 +223,35 @@ static llvm::Constant *InitialStackPointerValue(void) { return llvm::ConstantExpr::getPtrToInt(gep, gWordType); } +// Create an array of data for holding thread-local storage. +static llvm::Constant *InitialThreadLocalStorage(void) { + static llvm::Constant *tls = nullptr; + if (tls) { + return tls; + } + + auto tls_type = llvm::ArrayType::get( + gWordType, 4096 / (gArch->address_size / 8)); + + auto tls_var = new llvm::GlobalVariable( + *gModule, tls_type, false, llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(tls_type), "__mcsema_tls"); + + std::vector indexes(2); + indexes[0] = llvm::ConstantInt::get(gWordType, 0); + indexes[1] = indexes[0]; + +#if LLVM_VERSION_NUMBER <= LLVM_VERSION(3, 6) + auto gep = llvm::ConstantExpr::getInBoundsGetElementPtr(tls_var, indexes); +#else + auto gep = llvm::ConstantExpr::getInBoundsGetElementPtr( + nullptr, tls_var, indexes); +#endif + + tls = llvm::ConstantExpr::getPtrToInt(gep, gWordType); + return tls; +} + // Create a state structure with everything zero-initialized, except for the // stack pointer. static llvm::Constant *CreateInitializedState( @@ -419,6 +448,8 @@ static llvm::Function *ImplementExplicitArgsEntryPoint( CallingConvention loader(gArch->DefaultCallingConv()); + loader.StoreThreadPointer(block, InitialThreadLocalStorage()); + // Save off the old stack pointer for later. auto old_sp = loader.LoadStackPointer(block); @@ -432,18 +463,6 @@ static llvm::Function *ImplementExplicitArgsEntryPoint( // Allocate any space needed on the stack for a return address. loader.AllocateReturnAddress(block); -// // Set up the thread pointer, if any. -// if (auto tp_name = loader.ThreadPointerVarName()) { -// auto get_tp = llvm::Intrinsic::getDeclaration( -// gModule, llvm::Intrinsic::thread_pointer); -// if (get_tp) { -// llvm::IRBuilder<> ir(block); -// ir.CreateStore( -// ir.CreatePtrToInt(ir.CreateCall(get_tp), gWordType), -// ir.CreateLoad(remill::FindVarInFunction(func, tp_name))); -// } -// } - // Call the lifted function. std::vector args(3); args[remill::kMemoryPointerArgNum] = remill::LoadMemoryPointer(block); @@ -543,7 +562,8 @@ static void ImplementExplicitArgsExitPoint( auto block = &(callback_func->back()); - // create call to function and args + // The emulated code already set up the machine state with the arguments for + // the external, so we need to go and read out the arguments. std::vector call_args; for (auto i = 0U; i < cfg_func->num_args; i++) { llvm::Type *param_type = nullptr; @@ -553,6 +573,9 @@ static void ImplementExplicitArgsExitPoint( call_args.push_back(loader.LoadNextArgument(block, param_type)); } + // Now that we've read the argument values, we want to free up the space that + // the emulated caller set up, so that when we eventually return, things are + // in the expected state. loader.FreeReturnAddress(block); loader.FreeArguments(block); diff --git a/mcsema/BC/External.cpp b/mcsema/BC/External.cpp index 52530df41..f75e63743 100644 --- a/mcsema/BC/External.cpp +++ b/mcsema/BC/External.cpp @@ -101,10 +101,10 @@ void DeclareExternals(const NativeModule *cfg_module) { auto var_type = llvm::Type::getIntNTy( *gContext, static_cast(cfg_var->size * 8)); + auto linkage = llvm::GlobalValue::ExternalLinkage; ll_var = new llvm::GlobalVariable(*gModule, var_type, false, - llvm::GlobalValue::ExternalLinkage, - nullptr, cfg_var->name, nullptr, - ThreadLocalMode(cfg_var)); + linkage, nullptr, cfg_var->name, + nullptr, ThreadLocalMode(cfg_var)); } if (!cfg_var->address) { diff --git a/mcsema/BC/Function.cpp b/mcsema/BC/Function.cpp index dffcc6a25..ff265d5dd 100644 --- a/mcsema/BC/Function.cpp +++ b/mcsema/BC/Function.cpp @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -65,7 +67,11 @@ DEFINE_bool(add_reg_tracer, false, DEFINE_bool(add_breakpoints, false, "Add 'breakpoint' functions between every lifted instruction. This " "allows one to set a breakpoint, in the lifted code, just before a " - "specific lifted instruction is executed."); + "specific lifted instruction is executed. This is a debugging aid."); + +DEFINE_bool(check_pc_at_breakpoints, false, + "Check whether or not the emulated program counter is correct at " + "each injected 'breakpoint' function. This is a debugging aid."); namespace mcsema { namespace { @@ -85,21 +91,59 @@ static llvm::Function *GetBreakPoint(uint64_t pc) { ss << "breakpoint_" << std::hex << pc; auto func_name = ss.str(); - auto bp = gModule->getFunction(func_name); - if (!bp) { - bp = llvm::Function::Create( - LiftedFunctionType(), llvm::GlobalValue::ExternalLinkage, - func_name, gModule); + auto func = gModule->getFunction(func_name); + if (func) { + return func; + } + + func = llvm::Function::Create( + LiftedFunctionType(), llvm::GlobalValue::ExternalLinkage, + func_name, gModule); + + // Make sure to keep this function around (along with `ExternalLinkage`). + func->addFnAttr(llvm::Attribute::OptimizeNone); + func->addFnAttr(llvm::Attribute::NoInline); + func->removeFnAttr(llvm::Attribute::ReadNone); - // Make sure to keep this function around (along with `ExternalLinkage`). - bp->addFnAttr(llvm::Attribute::OptimizeNone); - bp->addFnAttr(llvm::Attribute::NoInline); - bp->removeFnAttr(llvm::Attribute::ReadNone); +#if LLVM_VERSION_NUMBER < LLVM_VERSION(3, 7) + func->addFnAttr(llvm::Attribute::ReadOnly); +#else + func->addFnAttr(llvm::Attribute::ArgMemOnly); +#endif - llvm::IRBuilder<> ir(llvm::BasicBlock::Create(*gContext, "", bp)); - ir.CreateRet(remill::NthArgument(bp, remill::kMemoryPointerArgNum)); + auto state_ptr = remill::NthArgument(func, remill::kStatePointerArgNum); + auto state_ptr_type = state_ptr->getType(); + + llvm::IRBuilder<> ir(llvm::BasicBlock::Create(*gContext, "", func)); + + if (FLAGS_check_pc_at_breakpoints) { + auto trap = llvm::Intrinsic::getDeclaration(gModule, llvm::Intrinsic::trap); + auto are_eq = ir.CreateICmpEQ( + remill::NthArgument(func, remill::kPCArgNum), + llvm::ConstantInt::get(gWordType, pc)); + + auto not_eq_bb = llvm::BasicBlock::Create(*gContext, "", func); + auto eq_bb = llvm::BasicBlock::Create(*gContext, "", func); + ir.CreateCondBr(are_eq, eq_bb, not_eq_bb); + + ir.SetInsertPoint(not_eq_bb); + ir.CreateCall(trap); + ir.CreateUnreachable(); + + ir.SetInsertPoint(eq_bb); } - return bp; + + // Basically some empty inline assembly that tells the compiler not to + // optimize away the `state` pointer before each `breakpoint_XXX` function. + auto asm_func_type = llvm::FunctionType::get( + llvm::Type::getVoidTy(*gContext), state_ptr_type); + + auto asm_func = llvm::InlineAsm::get( + asm_func_type, "", "*m,~{dirflag},~{fpsr},~{flags}", true); + + ir.CreateCall(asm_func, state_ptr); + ir.CreateRet(remill::NthArgument(func, remill::kMemoryPointerArgNum)); + return func; } // Tries to get the lifted function beginning at `pc`. @@ -122,7 +166,7 @@ static void InlineSubFuncCall(llvm::BasicBlock *block, (void) new llvm::StoreInst(call, mem_ptr, block); } -// Find an external function associated with +// Find an external function associated with this indirect jump. static llvm::Function *DevirtualizeIndirectFlow( TranslationContext &ctx, llvm::Function *fallback) { if (ctx.cfg_inst->flow) { @@ -236,8 +280,8 @@ static void LiftIndirectJump(TranslationContext &ctx, llvm::BasicBlock *block, remill::Instruction &inst) { - auto fallback = DevirtualizeIndirectFlow( - ctx, GetLiftedToNativeExitPoint(kExitPointJump)); + auto exit_point = GetLiftedToNativeExitPoint(kExitPointJump); + auto fallback = DevirtualizeIndirectFlow(ctx, exit_point); std::unordered_map block_map; for (auto target_ea : ctx.cfg_block->successor_eas) { @@ -245,6 +289,38 @@ static void LiftIndirectJump(TranslationContext &ctx, fallback = ctx.lifter->intrinsics->missing_block; } + if (exit_point == fallback) { + + // If we have no targets, then a reasonable target turns out to be the next + // program counter. + if (block_map.empty()) { + block_map[inst.next_pc] = GetOrCreateBlock(ctx, inst.next_pc); + } + + // Build up a set of all reachable blocks that were known at disassembly + // time so that we can find blocks that have no predecessors. + std::unordered_set succ_eas; + succ_eas.insert(ctx.cfg_func->ea); + for (auto block_entry : ctx.cfg_func->blocks) { + auto cfg_block = block_entry.second; + succ_eas.insert(cfg_block->successor_eas.begin(), + cfg_block->successor_eas.end()); + } + + // We'll augment our block map to also target unreachable blocks, just in + // case our disassembly failed to find some of the targets. + for (auto block_entry : ctx.cfg_func->blocks) { + auto target_ea = block_entry.first; + if (!succ_eas.count(target_ea)) { + LOG(WARNING) + << "Adding block " << std::hex << target_ea + << " with no predecessors as additional target of the " + << " indirect jump at " << inst.pc << std::dec; + block_map[target_ea] = GetOrCreateBlock(ctx, target_ea); + } + } + } + // We have no jump table information, so assume that it's an indirect tail // call, so just go native. if (block_map.empty()) { @@ -544,7 +620,6 @@ static llvm::Function *LiftFunction( lifted_func->removeFnAttr(llvm::Attribute::NoReturn); lifted_func->addFnAttr(llvm::Attribute::NoInline); lifted_func->setVisibility(llvm::GlobalValue::DefaultVisibility); - lifted_func->setLinkage(llvm::GlobalValue::InternalLinkage); TranslationContext ctx; std::unique_ptr lifter( @@ -557,6 +632,10 @@ static llvm::Function *LiftFunction( ctx.cfg_inst = nullptr; ctx.lifted_func = lifted_func; + std::unordered_set referenced_blocks; + referenced_blocks.insert(cfg_func->ea); + + // Create basic blocks for each basic block in the original function. for (auto block_info : cfg_func->blocks) { auto cfg_block = block_info.second; diff --git a/mcsema/BC/Lift.cpp b/mcsema/BC/Lift.cpp index bbb3627d6..98903b8bb 100644 --- a/mcsema/BC/Lift.cpp +++ b/mcsema/BC/Lift.cpp @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -45,6 +47,7 @@ #include "remill/BC/ABI.h" #include "remill/BC/IntrinsicTable.h" #include "remill/BC/Lifter.h" +#include "remill/BC/Util.h" #include "mcsema/Arch/Arch.h" #include "mcsema/BC/Callback.h" @@ -58,6 +61,7 @@ #include "mcsema/CFG/CFG.h" DECLARE_bool(legacy_mode); +DECLARE_bool(explicit_args); DECLARE_string(pc_annotation); namespace mcsema { @@ -96,6 +100,64 @@ static void ExportVariables(const NativeModule *cfg_module) { } } +// Handle the GCC stack protector. Normally we'd do this in `External.cpp`, +// but it seems like optimizing a global that is `AvailableExternallyLinkage` +// with an initializer somewhere in the code (in our case, a `LazyInitXref`), +// results in the linkage being changed to `ExternalLinkage`. We want +// `AvailableExternallyLinkage` for the sake of something like KLEE. +static void DefineGCCStackGuard(void) { + if (auto stack_guard = gModule->getGlobalVariable("__stack_chk_guard")) { + stack_guard->setInitializer( + llvm::Constant::getNullValue(stack_guard->getType()->getElementType())); + stack_guard->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); + } +} + +// Define the function `__mcsema_debug_get_reg_state`. Normally this is part of +// the McSema runtime, but if we are lifting with `--explicit_args`, and are +// also compiling the lifted bitcode back to native, then we may not use the +// runtime and so might not have this useful debugging function available to us. +static void DefineDebugGetRegState(void) { + auto get_reg_state = gModule->getFunction("__mcsema_debug_get_reg_state"); + if (get_reg_state) { + return; + } + + auto reg_state = gModule->getGlobalVariable("__mcsema_reg_state", true); + if (!reg_state) { + return; + } + + auto state_ptr_type = reg_state->getType(); + auto reg_func_type = llvm::FunctionType::get(state_ptr_type, false); + get_reg_state = llvm::Function::Create( + reg_func_type, llvm::GlobalValue::ExternalWeakLinkage, + "__mcsema_debug_get_reg_state", gModule); + + llvm::IRBuilder<> ir(llvm::BasicBlock::Create(*gContext, "", get_reg_state)); + ir.CreateRet(reg_state); + + get_reg_state->addFnAttr(llvm::Attribute::NoInline); + get_reg_state->addFnAttr(llvm::Attribute::OptimizeNone); +} + +// Define some of the remill error intrinsics. +static void DefineErrorIntrinsics(llvm::FunctionType *lifted_func_type) { + const char *func_names[] = {"__remill_error", "__remill_missing_block"}; + auto trap = llvm::Intrinsic::getDeclaration(gModule, llvm::Intrinsic::trap); + for (auto func_name : func_names) { + auto func = gModule->getFunction(func_name); + if (!func) { + continue; + } + if (func->isDeclaration()) { + llvm::IRBuilder<> ir(llvm::BasicBlock::Create(*gContext, "", func)); + ir.CreateCall(trap); + ir.CreateUnreachable(); + } + } +} + } // namespace bool LiftCodeIntoModule(const NativeModule *cfg_module) { @@ -106,6 +168,8 @@ bool LiftCodeIntoModule(const NativeModule *cfg_module) { // so that cross-references to lifted code are handled. AddDataSegments(cfg_module); + auto lifted_func_type = remill::LiftedFunctionType(gModule); + // Lift the blocks of instructions into the declared functions. if (!DefineLiftedFunctions(cfg_module)) { return false; @@ -127,6 +191,12 @@ bool LiftCodeIntoModule(const NativeModule *cfg_module) { OptimizeModule(); + if (FLAGS_explicit_args) { + DefineGCCStackGuard(); + DefineDebugGetRegState(); + DefineErrorIntrinsics(lifted_func_type); + } + if (!FLAGS_pc_annotation.empty()) { legacy::PropagateInstAnnotations(); } diff --git a/mcsema/BC/Optimize.cpp b/mcsema/BC/Optimize.cpp index 5817cd8ab..95ce0000b 100644 --- a/mcsema/BC/Optimize.cpp +++ b/mcsema/BC/Optimize.cpp @@ -79,8 +79,7 @@ static void ReplaceUndefIntrinsic(llvm::Function *function) { static void RemoveFunction(llvm::Function *func) { if (!func->hasNUsesOrMore(1)) { - func->removeFromParent(); - delete func; + func->eraseFromParent(); } } @@ -177,21 +176,38 @@ static void RemoveIntrinsics(void) { // we'll try to get the optimizer to inline it on our behalf, which should // drop some references :-D if (auto remill_used = gModule->getFunction("__remill_mark_as_used")) { - if (remill_used->isDeclaration()) { - remill_used->setLinkage(llvm::GlobalValue::InternalLinkage); - remill_used->removeFnAttr(llvm::Attribute::NoInline); - remill_used->addFnAttr(llvm::Attribute::InlineHint); - remill_used->addFnAttr(llvm::Attribute::AlwaysInline); - auto block = llvm::BasicBlock::Create(*gContext, "", remill_used); - (void) llvm::ReturnInst::Create(*gContext, block); + std::vector uses; + for (auto use : remill_used->users()) { + if (auto call = llvm::dyn_cast(use)) { + uses.push_back(call); + } } + + for (auto call : uses) { + call->eraseFromParent(); + } + + if (remill_used->hasNUsesOrMore(1)) { + if (remill_used->isDeclaration()) { + remill_used->setLinkage(llvm::GlobalValue::InternalLinkage); + remill_used->removeFnAttr(llvm::Attribute::NoInline); + remill_used->addFnAttr(llvm::Attribute::InlineHint); + remill_used->addFnAttr(llvm::Attribute::AlwaysInline); + auto block = llvm::BasicBlock::Create(*gContext, "", remill_used); + (void) llvm::ReturnInst::Create(*gContext, block); + } + } + + RemoveFunction(remill_used); } - RemoveFunction("__remill_intrinsics"); +// if (auto intrinsics = gModule->getFunction("__remill_intrinsics")) { +// intrinsics->eraseFromParent(); +// } + RemoveFunction("__remill_basic_block"); - RemoveFunction("__remill_mark_as_used"); RemoveFunction("__remill_defer_inlining"); - RemoveFunction("__remill_mark_as_used"); + RemoveFunction("__remill_intrinsics"); } static void ReplaceBarrier(const char *name) { diff --git a/tools/mcsema_disass/ida/refs.py b/tools/mcsema_disass/ida/refs.py index 57d45019f..bd5c15855 100644 --- a/tools/mcsema_disass/ida/refs.py +++ b/tools/mcsema_disass/ida/refs.py @@ -165,6 +165,67 @@ def _nearest_head(ea, bounds): _NO_REFS = tuple() _ENABLE_CACHING = False _BAD_ARM_REF_OFF = (idc.BADADDR, 0) +_NOT_A_REF = set() + +# Remove a reference from `from_ea` to `to_ea`. +def remove_instruction_reference(from_ea, to_ea): + global _REFS, _NOT_A_REF + + _NOT_A_REF.add((from_ea, to_ea)) + + try: + idaapi.del_dref(from_ea, to_ea) + idaapi.del_cref(from_ea, to_ea, False) + idaapi.del_cref(from_ea, to_ea, True) + except: + pass + + if not _ENABLE_CACHING or from_ea not in _REFS: + return + + new_refs = [] + found = False + for old_ref in _REFS[from_ea]: + if old_ref.ea != to_ea: + new_refs.append(old_ref) + else: + found = True + + if found: + _REFS[from_ea] = tuple(new_refs) + +def _get_arm_ref_candidate(mask, op_val, op_str, all_refs): + global _BAD_ARM_REF_OFF + + try: + op_name = op_str.split("@")[0][1:] # `#asc_400E5C@PAGE` -> `asc_400E5C`. + ref_ea = idc.LocByName(op_name) + if (ref_ea & mask) == op_val: + return ref_ea, mask + except: + pass + + # NOTE(pag): We deal with candidates because it's possible that this + # instruction will have multiple references. In the case of + # `@PAGE`-based offsets, it's problematic when the wrong base + # is matched, because it really throws off the C++ side of things + # because the arrangement of the lifted data being on the same + # page is not guaranteed. + + candidates = set() + for ref_ea in all_refs: + if (ref_ea & mask) == op_val: + candidates.add(ref_ea) + return ref_ea, mask + + if len(candidates): + for candidate_ea in candidates: + if candidate_ea == op_val: + return candidate_ea, mask + + return candidates.pop(), mask + + return _BAD_ARM_REF_OFF # Try to handle `@PAGE` and `@PAGEOFF` references, resolving them to their # 'intended' address. @@ -181,18 +242,13 @@ def _try_get_arm_ref_addr(inst, op, op_val, all_refs): op_str = idc.GetOpnd(inst.ea, op.n) if '@PAGEOFF' in op_str: - mask = 0x0fff - for ref_ea in all_refs: - if (ref_ea & mask) == op_val: - return ref_ea, mask - DEBUG("Found @PAGEOFF-based reference at {:x} but could not match it in all_refs".format(inst.ea)); + return _get_arm_ref_candidate(0x0fff, op_val, op_str, all_refs) elif '@PAGE' in op_str: - mask = 0x0fffff000 - for ref_ea in all_refs: - if (ref_ea & mask) == op_val: - return ref_ea, mask - DEBUG("Found @PAGE-based reference at {:x} but could not match it in all_refs".format(inst.ea)); + return _get_arm_ref_candidate(0x0fffff000, op_val, op_str, all_refs) + + elif not is_invalid_ea(op_val) and inst.get_canon_mnem().lower() == "adr": + return op_val, 0 return _BAD_ARM_REF_OFF @@ -238,8 +294,9 @@ def _get_ref_candidate(inst, op, all_refs, binary_is_pie): # will avoid these awkward `if IS_ARM` and the comments about # x86 / amd64 stuff. if IS_ARM: + old_addr_val = addr_val addr_val, mask = _try_get_arm_ref_addr(inst, op, addr_val, all_refs) - + info = idaapi.refinfo_t() has_ref_info = idaapi.get_refinfo(inst.ea, op.n, info) == 1 @@ -343,7 +400,7 @@ def enable_reference_caching(): # Get a list of references from an instruction. def get_instruction_references(arg, binary_is_pie=False): - global _ENABLE_CACHING + global _ENABLE_CACHING, _NOT_A_REF inst = arg if isinstance(arg, (int, long)): @@ -447,7 +504,8 @@ def get_instruction_references(arg, binary_is_pie=False): # Note: idc.o_phrase is ignored because it doesn't have a displacement, # and so can't reference a specific symbol. - refs.append(ref) + if (inst.ea, ref.ea) not in _NOT_A_REF: + refs.append(ref) for ref in refs: assert not is_invalid_ea(ref.ea) diff --git a/tools/mcsema_disass/ida/table.py b/tools/mcsema_disass/ida/table.py index b451a2e1d..4bd845dc8 100644 --- a/tools/mcsema_disass/ida/table.py +++ b/tools/mcsema_disass/ida/table.py @@ -147,7 +147,17 @@ def get_num_jump_table_entries(builder): # now lets go and check all the targets max_i = max(curr_num_targets, 1024) entry_addr = builder.table_ea + table_seg_ea = idc.SegStart(builder.table_ea) for i in xrange(max_i): + + # Make sure we don't read across a segment (e.g. if the jump table is the + # last thing in our current segment). + try: + if table_seg_ea != idc.SegStart(entry_addr): + break + except: + break + entry_data = builder.read_entry(entry_addr) next_entry_addr = entry_addr + (builder.entry_size * builder.entry_mult) @@ -209,7 +219,7 @@ def try_get_simple_jump_table_reader(builder): # Try the offset table based approach first, as it's likely to be more # constrained. - for offset in (builder.table_ea, builder.table_ea, 0): + for offset in (builder.table_ea, builder.table_ea, builder.offset): for offset_mult in (1, -1): for entry_mult in (1, -1): for size in sizes: @@ -319,16 +329,22 @@ def get_manual_jump_table_reader(builder): of the table base address that happens before the actual `jmp`.""" ret = False next_inst_ea = builder.jump_ea + found_ref_eas = set() for i in xrange(5): inst_ea = next_inst_ea next_inst_ea = idc.PrevHead(inst_ea) if inst_ea == idc.BADADDR: break + elif builder.jump_ea != inst_ea and is_control_flow(inst_ea): + break + refs = get_instruction_references(inst_ea, builder.binary_is_pie) if not len(refs): continue + found_ref_eas.add((inst_ea, refs[0].ea)) + builder.table_ea = refs[0].ea builder.offset = 0 @@ -345,8 +361,80 @@ def get_manual_jump_table_reader(builder): DEBUG("Reader inferred jump table base: {:x}".format(builder.table_ea)) if builder.offset: DEBUG("Reader inferred jump table offset: {:x}".format(builder.offset)) + return ret + + if len(found_ref_eas) < 2: + return ret + + # We're going to try to recognize a jump table of the form: + # + # .text:00000000004009AC ADRP X1, #asc_400E5C@PAGE ; "\b" + # .text:00000000004009B0 ADD X1, X1, #asc_400E5C@PAGEOFF ; "\b" + # .text:00000000004009B4 LDR W0, [X1,W0,UXTW#2] + # .text:00000000004009B8 ADR X1, loc_4009C4 + # .text:00000000004009BC ADD X0, X1, W0,SXTW#2 + # .text:00000000004009C0 BR X0 + # + # Where it's a table of offsets (`asc_400E5C`), and the base offset is + # the basic block `loc_4009C4`. + min_ea, max_ea = get_function_bounds(builder.jump_ea) - return ret + inst_block_ea = idc.BADADDR + block_ea = idc.BADADDR + + for inst_ea, ref_ea in found_ref_eas: + if is_code_by_flags(ref_ea) and min_ea <= ref_ea < max_ea: + inst_block_ea = inst_ea + block_ea = ref_ea + break + + if idc.BADADDR == block_ea: + return ret + + found_ref_eas.remove((inst_block_ea, block_ea)) + + # The idea here is that we want to trick the jump table reader into thinking + # that the offset of the jump table is a basic block. + for inst_ea, ref_ea in found_ref_eas: + builder.table_ea = ref_ea + builder.offset = block_ea + ret = try_get_simple_jump_table_reader(builder) + if ret: + break + + if ret: + DEBUG("Reader inferred jump table base: {:x}".format(builder.table_ea)) + if builder.offset == block_ea: + DEBUG("Reader inferred jump table offset is the block {:x}".format( + builder.offset)) + + # McSema-lifted bitcode doesn't really have a good way of getting the + # address of a basic block, and even so, we don't really want that either. + # The way our jump table lifting works is to preserve the original + # addresses and computation, and `switch` based on that, so we need to + # make sure that the original basic block address shows up in the lifted + # bitcode. + + DEBUG("WARNING: Removing reference from {:x} to block {:x}".format( + inst_block_ea, block_ea)) + remove_instruction_reference(inst_block_ea, block_ea) + + # NOTE(pag): For now we disable this jump table detection, even if it seems + # like we find the table. This is because we don't yet have a good + # way of dealing with the `ADD X0, X1, W0,SXTW#2`, which scales + # the read table entry out by shifting it left by two. Besides + # that, the following code actually works reasonably well. To + # account for this, on the C++ side, we augment jump table + # `switch`es to target blocks that are not referenced by the + # successor lists of any other blocks. + # + # It is also pretty important to make sure that we remove the + # instruction reference. If/when we have good support for this + # kind of jump table, then the above call to + # `remove_instruction_reference` has to be removed so that the + # various things that the C++ side of things does to handle offset- + # based jump tables works. + return False def get_jump_table_reader(builder): """Returns the size of a jump table entry, as well as a reader function diff --git a/tools/mcsema_lift/Lift.cpp b/tools/mcsema_lift/Lift.cpp index 79a586a1c..3acf1bb59 100644 --- a/tools/mcsema_lift/Lift.cpp +++ b/tools/mcsema_lift/Lift.cpp @@ -87,15 +87,16 @@ static void PrintSupportedInstructions(void) { }); } +static std::unique_ptr gLibrary; + // Load in a separate bitcode or IR library, and copy function and variable // declarations from that library into our module. We can use this feature // to provide better type information to McSema. static void LoadLibraryIntoModule(void) { - std::unique_ptr lib( - remill::LoadModuleFromFile(mcsema::gContext, FLAGS_library)); + gLibrary.reset(remill::LoadModuleFromFile(mcsema::gContext, FLAGS_library)); // Declare the functions from the library in McSema's target module. - for (auto &func : *lib) { + for (auto &func : *gLibrary) { auto func_name = func.getName(); if (func_name.startswith("__mcsema") || func_name.startswith("__remill")) { continue; @@ -118,7 +119,7 @@ static void LoadLibraryIntoModule(void) { } // Declare the global variables from the library in McSema's target module. - for (auto &var : lib->globals()) { + for (auto &var : gLibrary->globals()) { auto var_name = var.getName(); if (var_name.startswith("__mcsema") || var_name.startswith("__remill")) { continue; @@ -142,6 +143,23 @@ static void LoadLibraryIntoModule(void) { } } +// Remove unused functions and globals brought in from the library. +static void UnloadLibraryFromModule(void) { + for (auto &func : *gLibrary) { + auto our_func = mcsema::gModule->getFunction(func.getName()); + if (our_func && !our_func->hasNUsesOrMore(1)) { + our_func->eraseFromParent(); + } + } + + for (auto &var : gLibrary->globals()) { + auto our_var = mcsema::gModule->getGlobalVariable(var.getName()); + if (our_var && !our_var->hasNUsesOrMore(1)) { + our_var->eraseFromParent(); + } + } +} + } // namespace int main(int argc, char *argv[]) { @@ -282,7 +300,13 @@ int main(int argc, char *argv[]) { << "Unable to lift CFG from " << FLAGS_cfg << " into module " << FLAGS_output; + if (!FLAGS_library.empty()) { + UnloadLibraryFromModule(); + gLibrary.reset(nullptr); + } + remill::StoreModuleToFile(mcsema::gModule, FLAGS_output); + google::ShutDownCommandLineFlags(); google::ShutdownGoogleLogging();