Skip to content

Commit

Permalink
Merge branch 'master' into backend-ghidra
Browse files Browse the repository at this point in the history
  • Loading branch information
colton-gabertan committed Aug 2, 2023
2 parents 0e58ec5 + 149983d commit bab6c97
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 23 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
## master (unreleased)

### New Features
- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04

### Breaking Changes

### New Rules (1)
### New Rules (4)

- executable/pe/export/forwarded-export [email protected]
- host-interaction/bootloader/get-uefi-variable [email protected]
- host-interaction/bootloader/set-uefi-variable [email protected]
-

### Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-824-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
Expand Down
77 changes: 62 additions & 15 deletions capa/features/extractors/elffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,19 @@
from pathlib import Path

from elftools.elf.elffile import ELFFile, SymbolTableSection
from elftools.elf.relocation import RelocationSection

import capa.features.extractors.common
from capa.features.file import Import, Section
from capa.features.file import Export, Import, Section
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor

logger = logging.getLogger(__name__)


def extract_file_import_names(elf, **kwargs):
# see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372
symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)]

for _, section in symbol_tables:
def extract_file_export_names(elf: ELFFile, **kwargs):
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue

Expand All @@ -35,14 +33,64 @@ def extract_file_import_names(elf, **kwargs):

logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols())

for symbol in section.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue

yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)


def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index
symbol_names = {}

# Extract symbol names and store them in the dictionary
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue

for _, symbol in enumerate(section.iter_symbols()):
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
# TODO(williballenthin): extract symbol address
# https://github.com/mandiant/capa/issues/1608
yield Import(symbol.name), FileOffsetAddress(0x0)
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value != 0:
continue
if symbol.entry.st_shndx != "SHN_UNDEF":
continue
if symbol.entry.st_name == 0:
continue

symbol_names[_] = symbol.name

for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
continue

if section["sh_entsize"] == 0:
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
continue

logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())

for relocation in section.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])


def extract_file_section_names(elf, **kwargs):
def extract_file_section_names(elf: ELFFile, **kwargs):
for section in elf.iter_sections():
if section.name:
yield Section(section.name), AbsoluteVirtualAddress(section.header.sh_addr)
Expand All @@ -54,7 +102,7 @@ def extract_file_strings(buf, **kwargs):
yield from capa.features.extractors.common.extract_file_strings(buf)


def extract_file_os(elf, buf, **kwargs):
def extract_file_os(elf: ELFFile, buf, **kwargs):
# our current approach does not always get an OS value, e.g. for packed samples
# for file limitation purposes, we're more lax here
try:
Expand All @@ -68,7 +116,7 @@ def extract_file_format(**kwargs):
yield Format(FORMAT_ELF), NO_ADDRESS


def extract_file_arch(elf, **kwargs):
def extract_file_arch(elf: ELFFile, **kwargs):
arch = elf.get_machine_arch()
if arch == "x86":
yield Arch("i386"), NO_ADDRESS
Expand All @@ -85,8 +133,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i


FILE_HANDLERS = (
# TODO(williballenthin): implement extract_file_export_names
# https://github.com/mandiant/capa/issues/1607
extract_file_export_names,
extract_file_import_names,
extract_file_section_names,
extract_file_strings,
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ classifiers = [
]
dependencies = [
"tqdm==4.65.0",
"pyyaml==6.0",
"pyyaml==6.0.1",
"tabulate==0.9.0",
"colorama==0.4.6",
"termcolor==2.3.0",
Expand Down Expand Up @@ -89,11 +89,11 @@ dev = [
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
"types-PyYAML==6.0.8",
"types-tabulate==0.9.0.1",
"types-tabulate==0.9.0.3",
"types-termcolor==1.1.4",
"types-psutil==5.8.23",
"types_requests==2.31.0.1",
"types-protobuf==4.23.0.1",
"types_requests==2.31.0.2",
"types-protobuf==4.23.0.2",
]
build = [
"pyinstaller==5.10.1",
Expand Down
71 changes: 71 additions & 0 deletions tests/test_elffile_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
from pathlib import Path

from elftools.elf.elffile import ELFFile

from capa.features.extractors.elffile import extract_file_export_names, extract_file_import_names

CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"


def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
imports = list(extract_file_import_names(elf))

# Verify that at least one import was found
assert len(imports) > 0, "No imports were found."

# Extract the symbol names from the extracted imports
extracted_symbol_names = [imported[0].value for imported in imports]

# Check if all expected symbol names are found
for symbol_name in expected_imports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."


def test_elffile_export_features():
expected_exports = [
"deregister_tm_clones",
"register_tm_clones",
"__do_global_dtors_aux",
"completed.8060",
"__do_global_dtors_aux_fini_array_entry",
"frame_dummy",
"_init",
"__libc_csu_fini",
"_fini",
"__dso_handle",
"_IO_stdin_used",
"__libc_csu_init",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))

# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."

# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]

# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."

0 comments on commit bab6c97

Please sign in to comment.