Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

strcmp dev #1787

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 144 additions & 33 deletions manticore/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,41 @@
from .cpu.abstractcpu import Cpu, ConcretizeArgument
from .state import State
from ..core.smtlib import issymbolic, BitVec
from ..core.smtlib.solver import SelectedSolver
from ..core.smtlib.operators import ITEBV, ZEXTEND
from ..core.smtlib.solver import SelectedSolver, ConstraintSet
from ..core.smtlib.operators import ITEBV, ZEXTEND, AND
from ..core.state import Concretize
from typing import Union
from typing import Union, Optional, Callable

VARIADIC_FUNC_ATTR = "_variadic"


def isvariadic(model):
def isvariadic(model: Callable) -> bool:
"""
:param callable model: Function model
:param model: Function model
:return: Whether `model` models a variadic function
:rtype: bool
"""
return getattr(model, VARIADIC_FUNC_ATTR, False)


def variadic(func):
def variadic(func: Callable) -> Callable:
"""
A decorator used to mark a function model as variadic. This function should
take two parameters: a :class:`~manticore.native.state.State` object, and
a generator object for the arguments.

:param callable func: Function model
:param func: Function model
"""
setattr(func, VARIADIC_FUNC_ATTR, True)
return func


def is_definitely_NULL(byte, constrs) -> bool:
def _is_definitely_NULL(byte: Union[int, BitVec], constrs: ConstraintSet) -> bool:
"""
Checks if a given byte read from memory is NULL.
This supports both concrete & symbolic byte values.

Helper for modeling with char* variables

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or constrained to NULL
Expand All @@ -49,10 +50,12 @@ def is_definitely_NULL(byte, constrs) -> bool:
return byte == 0


def cannot_be_NULL(byte, constrs) -> bool:
def _cannot_be_NULL(byte: Union[int, BitVec], constrs: ConstraintSet) -> bool:
"""
Checks if a given byte read from memory is not NULL or cannot be NULL

Helper for modeling with char* variables

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is not NULL or cannot be NULL
Expand All @@ -63,10 +66,12 @@ def cannot_be_NULL(byte, constrs) -> bool:
return byte != 0


def can_be_NULL(byte, constrs) -> bool:
def _can_be_NULL(byte: Union[int, BitVec], constrs: ConstraintSet) -> bool:
"""
Checks if a given byte read from memory can be NULL

Helper for modeling with char* variables

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or can be NULL
Expand All @@ -77,27 +82,125 @@ def can_be_NULL(byte, constrs) -> bool:
return byte == 0


def _find_zero(cpu, constrs, ptr: Union[int, BitVec]) -> int:
def _are_definitely_equal(
byte1: Union[int, BitVec], byte2: Union[int, BitVec], constrs: ConstraintSet
) -> bool:
"""
Checks if two given bytes read from memory are equal

Helper for modeling with char* variables

:param byte1: byte read from memory
:param byte2: byte read from memory
:param constrs: state constraints
:return: whether the given bytes are definetly equal
"""
if issymbolic(byte1) or issymbolic(byte2):
return SelectedSolver.instance().must_be_true(constrs, byte1 == byte2)
else:
return byte1 == byte2


def _can_be_equal(
byte1: Union[int, BitVec], byte2: Union[int, BitVec], constrs: ConstraintSet
) -> bool:
"""
Checks if two given bytes can be equal

Helper for modeling with char* variables

:param byte1: byte read from memory
:param byte2: byte read from memory
:re_definitely_equalparam constrs: state constraints
:return: whether the given bytes could be equal
"""
if issymbolic(byte1) or issymbolic(byte2):
return SelectedSolver.instance().can_be_true(constrs, byte1 == byte2)
else:
return byte1 == byte2


def _find_zero(cpu: Cpu, constrs: ConstraintSet, ptr: Union[int, BitVec]) -> int:
"""
Helper for finding the closest NULL or, effectively NULL byte from a starting address.

:param Cpu cpu:
:param ConstraintSet constrs: Constraints for current `State`
:param int ptr: Address to start searching for a zero from
Helper for modeling with char* variables

:param cpu:
:param constrs: Constraints for current `State`
:param ptr: Address to start searching for a zero from
:return: Offset from `ptr` to first byte that is 0 or an `Expression` that must be zero
"""

offset = 0
while True:
byte = cpu.read_int(ptr + offset, 8)
if is_definitely_NULL(byte, constrs):
if _is_definitely_NULL(byte, constrs):
break
offset += 1

return offset


def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
def strcmp_fork(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]) -> Union[int, BitVec]:
"""
strcmp symbolic model.

Algorithm:

"""
if issymbolic(s1):
raise ConcretizeArgument(state.cpu, 1)

if issymbolic(s2):
raise ConcretizeArgument(state.cpu, 2)

cpu = state.cpu
constrs = state.constraints

# Initialize offset based on whether state has been forked in strcmp
if "strcmp" not in state.context:
offset = 0
else:
offset = state.context["strcmp"]

# Get load values and cast to unsigned
s1_val = ZEXTEND(cpu.read_int(s1 + offset, 8), cpu.address_bit_size)
s2_val = ZEXTEND(cpu.read_int(s2 + offset, 8), cpu.address_bit_size)

# Iterate till 2 unequal bytes are found
while _are_definitely_equal(s1_val, s2_val, constrs):

# Check for NULL and possible NULL
if _is_definitely_NULL(s1_val, constrs):
return 0
elif _can_be_NULL(s1_val, constrs):
state.context["strcmp"] = offset
raise Concretize(
"Forking on possible NULL strcmp", expression=(s1_val == 0), policy="ALL"
)

offset += 1
s1_val = ZEXTEND(cpu.read_int(s1 + offset, 8), cpu.address_bit_size)
s2_val = ZEXTEND(cpu.read_int(s2 + offset, 8), cpu.address_bit_size)

# Fork if can be equal
if _can_be_equal(s1_val, s2_val, constrs):
state.context["strcmp"] = offset
raise Concretize(
"Forking on possible equal strcmp", expression=(s1_val == s2_val), policy="ALL"
)

# Clean up state context
if "strcmp" in state.context:
del state.context["strcmp"]

return s1_val - s2_val


def strcmp_ITE(
state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]
) -> Optional[Union[int, BitVec]]:
"""
strcmp symbolic model.

Expand All @@ -119,23 +222,25 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
:param s1: Address of string 1
:param s2: Address of string 2
:return: Symbolic strcmp result
:rtype: Expression or int
"""

cpu = state.cpu

if issymbolic(s1):
raise ConcretizeArgument(state.cpu, 1)

if issymbolic(s2):
raise ConcretizeArgument(state.cpu, 2)

cpu = state.cpu
constrs = state.constraints

s1_zero_idx = _find_zero(cpu, state.constraints, s1)
s2_zero_idx = _find_zero(cpu, state.constraints, s2)
min_zero_idx = min(s1_zero_idx, s2_zero_idx)

ret = None

for offset in range(min_zero_idx, -1, -1):
# Zero extends for an unsigned compairison
s1char = ZEXTEND(cpu.read_int(s1 + offset, 8), cpu.address_bit_size)
s2char = ZEXTEND(cpu.read_int(s2 + offset, 8), cpu.address_bit_size)

Expand All @@ -153,7 +258,7 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
return ret


def strlen_exact(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
def strlen_fork(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
"""
strlen symbolic model

Expand All @@ -180,19 +285,22 @@ def strlen_exact(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
offset = state.context["strlen"]

c = cpu.read_int(s + offset, 8)
while not is_definitely_NULL(c, constrs):
while not _is_definitely_NULL(c, constrs):
# If the byte can be NULL concretize and fork states
if can_be_NULL(c, constrs):
if _can_be_NULL(c, constrs):
state.context["strlen"] = offset
raise Concretize("Forking on possible NULL strlen", expression=(c == 0), policy="ALL")

offset += 1
c = cpu.read_int(s + offset, 8)

if "strlen" in state.context:
del state.context["strlen"]

return offset


def strlen_approx(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
def strlen_ITE(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
"""
strlen symbolic model

Expand All @@ -201,8 +309,8 @@ def strlen_approx(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
Algorithm: Walks from end of string not including NULL building ITE tree when current byte is symbolic.

:param state: current program state
:param s: Address of string
:return: Symbolic strlen result
:param s: address of string
:return: symbolic ITE or integer strlen result
"""

if issymbolic(s):
Expand All @@ -226,20 +334,21 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un
strcpy symbolic model

Algorithm: Copy every byte from src to dst until finding a byte that is NULL or is
constrained to only the NULL value. Every time a byte is fouund that can be NULL but
constrained to only the NULL value. Every time a byte is found that can be NULL but
is not definetly NULL concretize and fork states.

:param state: current program state
:param dst: destination string address
:param src: source string address
:return: pointer to the dst
"""
if issymbolic(src):
raise ConcretizeArgument(state.cpu, 2)

if issymbolic(dst):
raise ConcretizeArgument(state.cpu, 1)

if issymbolic(src):
raise ConcretizeArgument(state.cpu, 2)

cpu = state.cpu
constrs = state.constraints
ret = dst
Expand All @@ -252,11 +361,11 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un

# Copy until a src_byte is symbolic and constrained to '\000', or is concrete and '\000'
src_val = cpu.read_int(src + offset, 8)
while not is_definitely_NULL(src_val, constrs):
while not _is_definitely_NULL(src_val, constrs):
cpu.write_int(dst + offset, src_val, 8)

# If a src byte can be NULL concretize and fork states
if can_be_NULL(src_val, constrs):
if _can_be_NULL(src_val, constrs):
state.context["strcpy"] = offset
raise Concretize("Forking on NULL strcpy", expression=(src_val == 0), policy="ALL")
offset += 1
Expand Down Expand Up @@ -289,8 +398,10 @@ def strncpy(

if issymbolic(dst):
raise ConcretizeArgument(state.cpu, 1)

if issymbolic(src):
raise ConcretizeArgument(state.cpu, 2)

if issymbolic(n):
raise ConcretizeArgument(state.cpu, 3)

Expand All @@ -306,11 +417,11 @@ def strncpy(

# Copy until a src_byte is symbolic and constrained to '\000', or is concrete and '\000'
src_val = cpu.read_int(src + offset, 8)
while offset < n and not is_definitely_NULL(src_val, constrs):
while offset < n and not _is_definitely_NULL(src_val, constrs):
cpu.write_int(dst + offset, src_val, 8)

# If a src byte can be NULL concretize and fork states
if can_be_NULL(src_val, constrs):
if _can_be_NULL(src_val, constrs):
state.context["strncpy"] = offset
raise Concretize("Forking on NULL strncpy", expression=(src_val == 0), policy="ALL")
offset += 1
Expand Down
Loading