Skip to content

Commit

Permalink
merge upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-hunhoff committed Jul 12, 2023
2 parents 97c2005 + 87a6459 commit eeb0f78
Show file tree
Hide file tree
Showing 62 changed files with 490 additions and 528 deletions.
11 changes: 10 additions & 1 deletion .github/flake8.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,16 @@ extend-ignore =
# B010 Do not call setattr with a constant attribute value
B010,
# G200 Logging statement uses exception in arguments
G200
G200,
# SIM102 Use a single if-statement instead of nested if-statements
# doesn't provide a space for commenting or logical separation of conditions
SIM102,
# SIM114 Use logical or and a single body
# makes logic trees too complex
SIM114,
# SIM117 Use 'with Foo, Bar:' instead of multiple with statements
# makes lines too long
SIM117


per-file-ignores =
Expand Down
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
### Breaking Changes
- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat
- Python 3.8 is now the minimum supported Python version #1578 @williballenthin
- Updated file paths to use pathlib.Path for improved path handling and compatibility [#1534](https://github.com/mandiant/capa/issues/1534) [@Aayush-Goel-04](https://github.com/aayush-goel-04)

### New Rules (24)
### New Rules (26)

- load-code/shellcode/execute-shellcode-via-windows-callback-function [email protected] [email protected]
- nursery/execute-shellcode-via-indirect-call [email protected]
Expand All @@ -39,6 +40,8 @@
- host-interaction/gui/switch-active-desktop [email protected]
- host-interaction/service/query-service-configuration @mr-tz
- anti-analysis/anti-av/patch-event-tracing-for-windows-function [email protected]
- data-manipulation/encoding/xor/covertly-decode-and-write-data-to-windows-directory-using-indirect-calls [email protected]
- linking/runtime-linking/resolve-function-by-brute-ratel-badger-hash [email protected]
-


Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-812-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-823-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/binja/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def is_mov_imm_to_stack(il: MediumLevelILInstruction) -> bool:
if il.src.operation != MediumLevelILOperation.MLIL_CONST:
return False

if not il.dest.source_type == VariableSourceType.StackVariableSourceType:
if il.dest.source_type != VariableSourceType.StackVariableSourceType:
return False

return True
Expand Down
4 changes: 1 addition & 3 deletions capa/features/extractors/binja/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
mlil_lookup[mlil_bb.source_block.start] = mlil_bb

for bb in f.basic_blocks:
mlil_bb = None
if bb.start in mlil_lookup:
mlil_bb = mlil_lookup[bb.start]
mlil_bb = mlil_lookup.get(bb.start)

yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))

Expand Down
5 changes: 3 additions & 2 deletions capa/features/extractors/binja/find_binja_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import subprocess
from pathlib import Path

# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
Expand All @@ -25,9 +26,9 @@
"""


def find_binja_path() -> str:
def find_binja_path() -> Path:
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
return bytes.fromhex(raw_output).decode("utf8")
return Path(bytes.fromhex(raw_output).decode("utf8"))


if __name__ == "__main__":
Expand Down
15 changes: 5 additions & 10 deletions capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)

for result in results:
yield result
yield from results


def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
Expand Down Expand Up @@ -318,8 +317,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)

for result in results:
yield result
yield from results


def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool:
Expand Down Expand Up @@ -375,8 +373,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)

for result in results:
yield result
yield from results


def extract_insn_mnemonic_features(
Expand Down Expand Up @@ -438,8 +435,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILOperation, index:
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)

for result in results:
yield result
yield from results


def extract_insn_segment_access_features(
Expand All @@ -466,8 +462,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index
for llil in func.get_llils_at(ih.address):
visit_llil_exprs(llil, llil_checker)

for result in results:
yield result
yield from results


def extract_insn_cross_section_cflow(
Expand Down
17 changes: 9 additions & 8 deletions capa/features/extractors/dnfile/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from __future__ import annotations

from typing import Dict, List, Tuple, Union, Iterator, Optional
from pathlib import Path

import dnfile
from dncil.cil.opcode import OpCodes
Expand Down Expand Up @@ -52,25 +53,25 @@ def __init__(self, pe: dnfile.dnPE):
self.types[type_.token] = type_

def get_import(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.imports.get(token, None)
return self.imports.get(token)

def get_native_import(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.native_imports.get(token, None)
return self.native_imports.get(token)

def get_method(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.methods.get(token, None)
return self.methods.get(token)

def get_field(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.fields.get(token, None)
return self.fields.get(token)

def get_type(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:
return self.types.get(token, None)
return self.types.get(token)


class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
# most relevant at instruction scope
Expand Down Expand Up @@ -119,7 +120,7 @@ def get_functions(self) -> Iterator[FunctionHandle]:
address: DNTokenAddress = DNTokenAddress(insn.operand.value)

# record call to destination method; note: we only consider MethodDef methods for destinations
dest: Optional[FunctionHandle] = methods.get(address, None)
dest: Optional[FunctionHandle] = methods.get(address)
if dest is not None:
dest.ctx["calls_to"].add(fh.address)

Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/dnfile/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDT
return InvalidToken(token.value)
return user_string

table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table, None)
table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table)
if table is None:
# table index is not valid
return InvalidToken(token.value)
Expand Down Expand Up @@ -204,7 +204,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
continue

token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
access: Optional[str] = accessor_map.get(token, None)
access: Optional[str] = accessor_map.get(token)

method_name: str = method.row.Name
if method_name.startswith(("get_", "set_")):
Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/dnfile/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Optional


class DnType(object):
class DnType:
def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None):
self.token: int = token
self.access: Optional[str] = access
Expand Down
7 changes: 4 additions & 3 deletions capa/features/extractors/dnfile_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from typing import Tuple, Iterator
from pathlib import Path

import dnfile
import pefile
Expand Down Expand Up @@ -74,10 +75,10 @@ def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]


class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

def get_base_address(self) -> AbsoluteVirtualAddress:
return AbsoluteVirtualAddress(0x0)
Expand Down
7 changes: 4 additions & 3 deletions capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from typing import Tuple, Iterator
from pathlib import Path

import dnfile
import pefile
Expand Down Expand Up @@ -158,10 +159,10 @@ def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]


class DotnetFileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

def get_base_address(self):
return NO_ADDRESS
Expand Down
3 changes: 1 addition & 2 deletions capa/features/extractors/elf.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,8 +706,7 @@ def get_symbols(self) -> Iterator[Symbol]:
return a tuple: (name, value, size, info, other, shndx)
for each symbol contained in the symbol table
"""
for symbol in self.symbols:
yield symbol
yield from self.symbols

@classmethod
def from_Elf(cls, ElfBinary) -> Optional["SymTab"]:
Expand Down
14 changes: 6 additions & 8 deletions capa/features/extractors/elffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import io
import logging
from typing import Tuple, Iterator
from pathlib import Path

from elftools.elf.elffile import ELFFile, SymbolTableSection

Expand Down Expand Up @@ -107,11 +108,10 @@ def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature,


class ElfFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path = path
with open(self.path, "rb") as f:
self.elf = ELFFile(io.BytesIO(f.read()))
self.path: Path = path
self.elf = ELFFile(io.BytesIO(path.read_bytes()))

def get_base_address(self):
# virtual address of the first segment with type LOAD
Expand All @@ -120,15 +120,13 @@ def get_base_address(self):
return AbsoluteVirtualAddress(segment.header.p_vaddr)

def extract_global_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = self.path.read_bytes()

for feature, addr in extract_global_features(self.elf, buf):
yield feature, addr

def extract_file_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = self.path.read_bytes()

for feature, addr in extract_file_features(self.elf, buf):
yield feature, addr
Expand Down
28 changes: 11 additions & 17 deletions capa/features/extractors/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def get_file_externs() -> Dict[int, Tuple[str, str, int]]:
externs = {}

for seg in get_segments(skip_header_segments=True):
if not (seg.type == ida_segment.SEG_XTRN):
if seg.type != ida_segment.SEG_XTRN:
continue

for ea in idautils.Functions(seg.start_ea, seg.end_ea):
Expand Down Expand Up @@ -275,20 +275,18 @@ def is_op_offset(insn: idaapi.insn_t, op: idaapi.op_t) -> bool:

def is_sp_modified(insn: idaapi.insn_t) -> bool:
"""determine if instruction modifies SP, ESP, RSP"""
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
if op.reg == idautils.procregs.sp.reg and is_op_write(insn, op):
# register is stack and written
return True
return False
return any(
op.reg == idautils.procregs.sp.reg and is_op_write(insn, op)
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,))
)


def is_bp_modified(insn: idaapi.insn_t) -> bool:
"""check if instruction modifies BP, EBP, RBP"""
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
if op.reg == idautils.procregs.bp.reg and is_op_write(insn, op):
# register is base and written
return True
return False
return any(
op.reg == idautils.procregs.bp.reg and is_op_write(insn, op)
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,))
)


def is_frame_register(reg: int) -> bool:
Expand Down Expand Up @@ -334,10 +332,7 @@ def mask_op_val(op: idaapi.op_t) -> int:

def is_function_recursive(f: idaapi.func_t) -> bool:
"""check if function is recursive"""
for ref in idautils.CodeRefsTo(f.start_ea, True):
if f.contains(ref):
return True
return False
return any(f.contains(ref) for ref in idautils.CodeRefsTo(f.start_ea, True))


def is_basic_block_tight_loop(bb: idaapi.BasicBlock) -> bool:
Expand Down Expand Up @@ -386,8 +381,7 @@ def find_data_reference_from_insn(insn: idaapi.insn_t, max_depth: int = 10) -> i
def get_function_blocks(f: idaapi.func_t) -> Iterator[idaapi.BasicBlock]:
"""yield basic blocks contained in specified function"""
# leverage idaapi.FC_NOEXT flag to ignore useless external blocks referenced by the function
for block in idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT)):
yield block
yield from idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT))


def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/ida/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def extract_insn_offset_features(

p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)

op_off = p_info.get("offset", None)
op_off = p_info.get("offset")
if op_off is None:
continue

Expand Down Expand Up @@ -447,7 +447,7 @@ def extract_insn_cross_section_cflow(
insn: idaapi.insn_t = ih.inner

for ref in idautils.CodeRefsFrom(insn.ea, False):
if ref in get_imports(fh.ctx).keys():
if ref in get_imports(fh.ctx):
# ignore API calls
continue
if not idaapi.getseg(ref):
Expand Down
Loading

0 comments on commit eeb0f78

Please sign in to comment.