Skip to content

Commit

Permalink
Merge pull request #1641 from mandiant/fix/issue-1624
Browse files Browse the repository at this point in the history
forwarded export features
  • Loading branch information
williballenthin authored Jul 18, 2023
2 parents 244d56e + 40793ee commit 972fbe7
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,5 @@ Pipfile
Pipfile.lock
/cache/
.github/binja/binaryninja
.github/binja/download_headless.py
.github/binja/BinaryNinja-headless.zip
8 changes: 5 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# Change Log

## master (unreleased)
- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736

### New Features
- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- use fancy box drawing characters for default output #1586 @williballenthin
- use [pre-commit](https://pre-commit.com/) to invoke linters #1579 @williballenthin
- publish via PyPI trusted publishing #1491 @williballenthin
- migrate to pyproject.toml #1301 @williballenthin
- extract forwarded exports from PE files #1624 @williballenthin
- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736

### Breaking Changes
- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat
Expand Down Expand Up @@ -67,6 +66,9 @@
- update ATT&CK/MBC data for linting #1568 @mr-tz
- log time taken to analyze each function #1290 @williballenthin
- tests: make fixture available via conftest.py #1592 @williballenthin
- publish via PyPI trusted publishing #1491 @williballenthin
- migrate to pyproject.toml #1301 @williballenthin


### Raw diffs
- [capa v5.1.0...master](https://github.com/mandiant/capa/compare/v5.1.0...master)
Expand Down
17 changes: 17 additions & 0 deletions capa/features/extractors/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,23 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
yield symbol[:-1]


def reformat_forwarded_export_name(forwarded_name: str) -> str:
"""
a forwarded export has a DLL name/path an symbol name.
we want the former to be lowercase, and the latter to be verbatim.
"""

# use rpartition so we can split on separator between dll and name.
# the dll name can be a full path, like in the case of
# ef64d6d7c34250af8e21a10feb931c9b
# which i assume means the path can have embedded periods.
# so we don't want the first period, we want the last.
forwarded_dll, _, forwarded_symbol = forwarded_name.rpartition(".")
forwarded_dll = forwarded_dll.lower()

return f"{forwarded_dll}.{forwarded_symbol}"


def all_zeros(bytez: bytes) -> bool:
return all(b == 0 for b in builtins.bytes(bytez))

Expand Down
11 changes: 9 additions & 2 deletions capa/features/extractors/ida/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import idc
import idaapi
import idautils
import ida_entry

import capa.features.extractors.common
import capa.features.extractors.helpers
Expand Down Expand Up @@ -83,8 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:

def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
"""extract function exports"""
for _, _, ea, name in idautils.Entries():
yield Export(name), AbsoluteVirtualAddress(ea)
for _, ordinal, ea, name in idautils.Entries():
forwarded_name = ida_entry.get_entry_forwarder(ordinal)
if forwarded_name is None:
yield Export(name), AbsoluteVirtualAddress(ea)
else:
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
yield Export(forwarded_name), AbsoluteVirtualAddress(ea)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea)


def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
Expand Down
16 changes: 14 additions & 2 deletions capa/features/extractors/pefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,20 @@ def extract_file_export_names(pe, **kwargs):
name = export.name.partition(b"\x00")[0].decode("ascii")
except UnicodeDecodeError:
continue
va = base_address + export.address
yield Export(name), AbsoluteVirtualAddress(va)

if export.forwarder is None:
va = base_address + export.address
yield Export(name), AbsoluteVirtualAddress(va)

else:
try:
forwarded_name = export.forwarder.partition(b"\x00")[0].decode("ascii")
except UnicodeDecodeError:
continue
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
va = base_address + export.address
yield Export(forwarded_name), AbsoluteVirtualAddress(va)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)


def extract_file_import_names(pe, **kwargs):
Expand Down
28 changes: 27 additions & 1 deletion capa/features/extractors/viv/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Tuple, Iterator

import PE.carve as pe_carve # vivisect PE
import vivisect
import viv_utils
import viv_utils.flirt

Expand All @@ -25,10 +26,35 @@ def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]
yield Characteristic("embedded pe"), FileOffsetAddress(offset)


def extract_file_export_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
def get_first_vw_filename(vw: vivisect.VivWorkspace):
# vivisect associates metadata with each file that its loaded into the workspace.
# capa only loads a single file into each workspace.
# so to access the metadata for the file in question, we can just take the first one.
# otherwise, we'd have to pass around the module name of the file we're analyzing,
# which is a pain.
#
# so this is a simplifying assumption.
return next(iter(vw.filemeta.keys()))


def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]:
for va, _, name, _ in vw.getExports():
yield Export(name), AbsoluteVirtualAddress(va)

if vw.getMeta("Format") == "pe":
pe = vw.parsedbin
baseaddr = pe.IMAGE_NT_HEADERS.OptionalHeader.ImageBase
for rva, _, forwarded_name in vw.getFileMeta(get_first_vw_filename(vw), "forwarders"):
try:
forwarded_name = forwarded_name.partition(b"\x00")[0].decode("ascii")
except UnicodeDecodeError:
continue

forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
va = baseaddr + rva
yield Export(forwarded_name), AbsoluteVirtualAddress(va)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va)


def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
"""
Expand Down
14 changes: 11 additions & 3 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
BACKEND_VIV = "vivisect"
BACKEND_DOTNET = "dotnet"
BACKEND_BINJA = "binja"
BACKEND_PEFILE = "pefile"

E_MISSING_RULES = 10
E_MISSING_FILE = 11
Expand Down Expand Up @@ -567,8 +568,12 @@ def get_extractor(

return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)

# default to use vivisect backend
else:
elif backend == BACKEND_PEFILE:
import capa.features.extractors.pefile

return capa.features.extractors.pefile.PefileFeatureExtractor(path)

elif backend == BACKEND_VIV:
import capa.features.extractors.viv.extractor

with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
Expand All @@ -586,6 +591,9 @@ def get_extractor(

return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_)

else:
raise ValueError("unexpected backend: " + backend)


def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
file_extractors: List[FeatureExtractor] = []
Expand Down Expand Up @@ -911,7 +919,7 @@ def install_common_args(parser, wanted=None):
"--backend",
type=str,
help="select the backend to use",
choices=(BACKEND_VIV, BACKEND_BINJA),
choices=(BACKEND_VIV, BACKEND_BINJA, BACKEND_PEFILE),
default=BACKEND_VIV,
)

Expand Down
18 changes: 6 additions & 12 deletions scripts/profile-time.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,16 @@
logger = logging.getLogger("capa.profile")


def subshell(cmd):
return subprocess.run(cmd, shell=True, capture_output=True, text=True).stdout.strip()


def main(argv=None):
if argv is None:
argv = sys.argv[1:]

label = subprocess.run(
"git show --pretty=oneline --abbrev-commit | head -n 1", shell=True, capture_output=True, text=True
).stdout.strip()
is_dirty = (
subprocess.run(
"git status | grep 'modified: ' | grep -v 'rules' | grep -v 'tests/data'",
shell=True,
capture_output=True,
text=True,
).stdout
!= ""
)
label = subshell("git show --pretty=oneline --abbrev-commit | head -n 1").strip()
is_dirty = subshell("git status | grep 'modified: ' | grep -v 'rules' | grep -v 'tests/data'") != ""

if is_dirty:
label += " (dirty)"
Expand Down
14 changes: 13 additions & 1 deletion scripts/show-features.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import sys
import logging
import argparse
from typing import Tuple
from pathlib import Path

import capa.main
Expand All @@ -80,8 +81,10 @@
import capa.features.common
import capa.features.freeze
import capa.features.address
import capa.features.extractors.pefile
import capa.features.extractors.base_extractor
from capa.helpers import log_unsupported_runtime_error
from capa.features.extractors.base_extractor import FunctionHandle

logger = logging.getLogger("capa.show-features")

Expand All @@ -101,6 +104,10 @@ def main(argv=None):
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)

if args.function and args.backend == "pefile":
print("pefile backend does not support extracting function features")
return -1

try:
taste = capa.helpers.get_file_taste(Path(args.sample))
except IOError as e:
Expand Down Expand Up @@ -137,7 +144,12 @@ def main(argv=None):
for feature, addr in extractor.extract_file_features():
print(f"file: {format_address(addr)}: {feature}")

function_handles = tuple(extractor.get_functions())
function_handles: Tuple[FunctionHandle, ...]
if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor):
# pefile extractor doesn't extract function features
function_handles = ()
else:
function_handles = tuple(extractor.get_functions())

if args.function:
if args.format == "freeze":
Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ def get_data_path_by_name(name) -> Path:
return CD / "data" / "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_"
elif name.startswith("2bf18d"):
return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_"
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
else:
raise ValueError(f"unexpected sample fixture: {name}")

Expand Down Expand Up @@ -366,6 +368,8 @@ def get_sample_md5_by_name(name):
return "3db3e55b16a7b1b1afb970d5e77c5d98"
elif name.startswith("2bf18d"):
return "2bf18d0403677378adad9001b1243211"
elif name.startswith("ea2876"):
return "76fa734236daa023444dec26863401dc"
else:
raise ValueError(f"unexpected sample fixture: {name}")

Expand Down Expand Up @@ -529,6 +533,8 @@ def parametrize(params, values, **kwargs):
("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True),
("kernel32", "file", capa.features.file.Export("lstrlenW"), True),
("kernel32", "file", capa.features.file.Export("nope"), False),
# forwarded export
("ea2876", "file", capa.features.file.Export("vresion.GetFileVersionInfoA"), True),
# file/imports
("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True),
("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True),
Expand Down Expand Up @@ -715,6 +721,8 @@ def parametrize(params, values, **kwargs):
("mimikatz", "function=0x4702FD", capa.features.common.Characteristic("calls from"), False),
# function/characteristic(calls to)
("mimikatz", "function=0x40105D", capa.features.common.Characteristic("calls to"), True),
# function/characteristic(forwarded export)
("ea2876", "file", capa.features.common.Characteristic("forwarded export"), True),
# before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386
("mimikatz", "function=0x456BB9", capa.features.common.Characteristic("calls to"), False),
# file/function-name
Expand Down
9 changes: 9 additions & 0 deletions tests/test_binja_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import fixtures

import capa.main
import capa.features.file
import capa.features.common

logger = logging.getLogger(__file__)

Expand Down Expand Up @@ -40,6 +42,13 @@
def test_binja_features(sample, scope, feature, expected):
if feature == capa.features.common.Characteristic("stack string"):
pytest.xfail("skip failing Binja stack string detection temporarily, see #1473")

if isinstance(feature, capa.features.file.Export) and "." in str(feature.value):
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")

if feature == capa.features.common.Characteristic("forwarded export"):
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")

fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected)


Expand Down

0 comments on commit 972fbe7

Please sign in to comment.